pax_global_header00006660000000000000000000000064121757411460014522gustar00rootroot0000000000000052 comment=3b76d7cf92318e518f58329f69dd77338888d69b LucenePlusPlus-rel_3.0.4/000077500000000000000000000000001217574114600153135ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/.gitignore000066400000000000000000000031561217574114600173100ustar00rootroot00000000000000*~ *.o *.aps *.tar.gz *.rar *.cmd *.suo *.ncb *.idb *.obj *.opt *.pch *.pyc *.log *.exe *.exp *.lib *.idb *.pdb *.ilk *.manifest *.user *.bak *.lock* *.waf* .DS_Store bin/* src/msvc/Debug DLL src/msvc/Debug Static src/msvc/Release DLL src/msvc/Release Static src/core/msvc/Debug DLL src/core/msvc/Debug Static src/core/msvc/Release DLL src/core/msvc/Release Static src/contrib/msvc/Debug DLL src/contrib/msvc/Debug Static src/contrib/msvc/Release DLL src/contrib/msvc/Release Static src/test/msvc/Debug DLL src/test/msvc/Debug Static src/test/msvc/Release DLL src/test/msvc/Release Static src/test/testfiles/temp src/demo/deletefiles/msvc/Release DLL src/demo/deletefiles/msvc/Release Static src/demo/deletefiles/msvc/Debug DLL src/demo/deletefiles/msvc/Debug Static src/demo/indexfiles/msvc/Release DLL src/demo/indexfiles/msvc/Release Static src/demo/indexfiles/msvc/Debug DLL src/demo/indexfiles/msvc/Debug Static src/demo/searchfiles/msvc/Release DLL src/demo/searchfiles/msvc/Release Static src/demo/searchfiles/msvc/Debug DLL src/demo/searchfiles/msvc/Debug Static CMakeCache.txt CMakeFiles/ CTestTestfile.cmake Makefile cmake_install.cmake cmake_uninstall.cmake include/Config.h install_manifest.txt liblucene++-contrib.pc liblucene++.pc src/contrib/CMakeFiles/ src/contrib/CTestTestfile.cmake src/contrib/Makefile src/contrib/cmake_install.cmake src/core/CMakeFiles/ src/core/CTestTestfile.cmake src/core/Makefile src/core/cmake_install.cmake src/demo/CMakeFiles/ src/demo/CTestTestfile.cmake src/demo/Makefile src/demo/cmake_install.cmake src/test/CMakeFiles/ src/test/CTestTestfile.cmake src/test/Makefile src/test/cmake_install.cmake LucenePlusPlus-rel_3.0.4/APACHE.license000066400000000000000000000261361217574114600176100ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. LucenePlusPlus-rel_3.0.4/AUTHORS000066400000000000000000000000571217574114600163650ustar00rootroot00000000000000Alan Wright Ben van Klinken Jamie Kirkpatrick LucenePlusPlus-rel_3.0.4/CMakeLists.txt000066400000000000000000000152071217574114600200600ustar00rootroot00000000000000project(lucene++-base) #################################### # VERSION information #These versions match the Lucene version SET(LUCENE++_VERSION_MAJOR "3") SET(LUCENE++_VERSION_MINOR "0") SET(LUCENE++_VERSION_REVISION "3") SET(LUCENE++_VERSION_PATCH "4") # SOVERSION information #Must be incremented for releases if the api is not backwards compatible SET(LUCENE++_SOVERSION "0") #derived versions MATH(EXPR LUCENE++_INT_VERSION "(${LUCENE++_VERSION_MAJOR} * 1000000) + (${LUCENE++_VERSION_MINOR} * 10000) + (${LUCENE++_VERSION_REVISION} * 100) + (${LUCENE++_VERSION_PATCH} * 1)" ) SET(LUCENE++_VERSION "${LUCENE++_VERSION_MAJOR}.${LUCENE++_VERSION_MINOR}.${LUCENE++_VERSION_REVISION}.${LUCENE++_VERSION_PATCH}") MESSAGE(${LUCENE++_INT_VERSION}) MESSAGE(${LUCENE++_VERSION}) #################################### #################################### # Build system options and includes #################################### CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR) #build policies if(COMMAND cmake_policy) cmake_policy(SET CMP0003 NEW) endif(COMMAND cmake_policy) # include specific modules set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") #if setup using the Toolchain-llvm.cmake file, then use llvm... IF ( ENABLE_LLVM ) INCLUDE (Toolchain-llvm) ENDIF ( ENABLE_LLVM ) #define options... INCLUDE (Lucene++Docs) INCLUDE (FindThreads) INCLUDE (TestCXXAcceptsFlag) ENABLE_TESTING() #Single output directory for building all executables and libraries. SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin CACHE PATH "Executable Output Directory" FORCE) SET(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin CACHE PATH "Library Output Directory" FORCE) #################################### #################################### #user specified build options #################################### IF(NOT CMAKE_BUILD_TYPE) SET(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) ELSE(NOT CMAKE_BUILD_TYPE) MESSAGE( "Compiling as ${CMAKE_BUILD_TYPE}" ) ENDIF(NOT CMAKE_BUILD_TYPE) OPTION(ENABLE_PACKAGING "create build scripts for creating lucene++ packages" OFF) OPTION(ENABLE_NEDMALLOC "use nedmalloc for memory allocations" OFF) OPTION(LUCENE_USE_STATIC_BOOST_LIBS "use static boost libraries " OFF) OPTION(ENABLE_CYCLIC_CHECK "enable cyclic checking " OFF) #install path options SET(LIB_DESTINATION "lib" CACHE STRING "Define lib output directory name") IF ( ENABLE_NEDMALLOC ) SET(DEFINE_USE_NEDMALLOC "define") ELSE ( ENABLE_NEDMALLOC ) SET(DEFINE_USE_NEDMALLOC "undef") ENDIF ( ENABLE_NEDMALLOC ) IF ( ENABLE_STANDARD_ALLOCATOR ) SET(DEFINE_USE_ALLOCATOR "undef") ELSE ( ENABLE_STANDARD_ALLOCATOR ) SET(DEFINE_USE_ALLOCATOR "define") ENDIF ( ENABLE_STANDARD_ALLOCATOR ) IF ( ENABLE_CYCLIC_CHECK ) SET(DEFINE_USE_CYCLIC_CHECK "define") ELSE ( ENABLE_CYCLIC_CHECK ) SET(DEFINE_USE_CYCLIC_CHECK "undef") ENDIF ( ENABLE_CYCLIC_CHECK ) #################################### #################################### # PLATFORM specific options #################################### #add a debug build postfix if(WIN32 OR WIN64) set(CMAKE_DEBUG_POSTFIX "d") endif(WIN32 OR WIN64) if(NOT MSVC AND NOT CMAKE_SYSTEM MATCHES "SunOS-5*.") add_definitions(-fPIC) endif(NOT MSVC AND NOT CMAKE_SYSTEM MATCHES "SunOS-5*.") INCLUDE(MacroCheckGccVisibility) MACRO_CHECK_GCC_VISIBILITY(LPP_HAVE_GXXCLASSVISIBILITY) if ( LPP_HAVE_GXXCLASSVISIBILITY ) ADD_DEFINITIONS(-DLPP_HAVE_GXXCLASSVISIBILITY) endif() IF(CYGWIN) ADD_DEFINITIONS(-D__LARGE64_FILES) ENDIF(CYGWIN) #set ansi mode SET(ENABLE_ANSI_MODE OFF) IF(CMAKE_COMPILER_IS_GNUCXX) SET(ENABLE_ANSI_MODE ON) #exceptions: IF(MINGW OR CYGWIN) SET(ENABLE_ANSI_MODE OFF) ENDIF(MINGW OR CYGWIN) ENDIF(CMAKE_COMPILER_IS_GNUCXX) IF ( CMAKE_COMPILER_IS_GNUCC ) IF( ENABLE_ANSI_MODE ) SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ansi") ENDIF ( ENABLE_ANSI_MODE ) ENDIF(CMAKE_COMPILER_IS_GNUCC) #################################### #find boost #################################### SET(Boost_USE_STATIC_LIBS ${LUCENE_USE_STATIC_BOOST_LIBS}) SET(Boost_USE_MULTITHREADED ON) #Boost 1.38 required for bug fixes in basic_streambuf. #The following line fails in earlier builds, so if altered, may allow older versions of boost: #boost::gregorian::date date = parser.parse_date(paddedDate.c_str(), dateFormat->c_str(), svp); find_package( Boost 1.38.0 COMPONENTS date_time filesystem iostreams regex system thread unit_test_framework REQUIRED) IF (Boost_FOUND) MESSAGE( STATUS "boost found: includes in ${Boost_INCLUDE_DIRS}, library in ${Boost_LIBRARY_DIRS}") SET(LUCENE_BOOST_LIBS ${Boost_FILESYSTEM_LIBRARY_RELEASE} ${Boost_IOSTREAMS_LIBRARY_RELEASE} ${Boost_REGEX_LIBRARY_RELEASE} ${Boost_SYSTEM_LIBRARY_RELEASE} ${Boost_THREAD_LIBRARY_RELEASE}) ENDIF (Boost_FOUND) #################################### # Pre-Compiled headers #################################### INCLUDE(PCHSupport) #todo: make this optional and make it possible to add more headers - like boost threads ################################# # generate Config.h ################################# configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/Config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/include/Config.h @ONLY) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/include) #################################### # The subdirs #################################### #include sub-projects ADD_SUBDIRECTORY (src/core) ADD_SUBDIRECTORY (src/contrib) ADD_SUBDIRECTORY (src/demo EXCLUDE_FROM_ALL) ADD_SUBDIRECTORY (src/test) ################################# # install pkg-config file ################################# IF(NOT WIN32) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/liblucene++.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc @ONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/liblucene++-contrib.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contrib.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc ${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contrib.pc DESTINATION ${LIB_DESTINATION}/pkgconfig ) ENDIF(NOT WIN32) #################################### # Custom targets #################################### #add uninstall command CONFIGURE_FILE( "${CMAKE_MODULE_PATH}/cmake_uninstall.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" IMMEDIATE @ONLY) ADD_CUSTOM_TARGET(uninstall "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake") #################################### # Finalise build script #################################### #this must go last... IF (ENABLE_PACKAGING) INCLUDE(CreateLucene++Packages) ENDIF ( ENABLE_PACKAGING) LucenePlusPlus-rel_3.0.4/COPYING000066400000000000000000000010521217574114600163440ustar00rootroot00000000000000This source code is dual-licensed. ================================== LGPL: This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. See the file LGPL.licence Apache 2.0: Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. See the file APACHE.licence LucenePlusPlus-rel_3.0.4/GPL.license000066400000000000000000001045131217574114600173050ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . LucenePlusPlus-rel_3.0.4/LGPL.license000066400000000000000000000167411217574114600174260ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. LucenePlusPlus-rel_3.0.4/README.PACKAGE000066400000000000000000000001621217574114600172240ustar00rootroot00000000000000An up to date C++ port of the popular Java Lucene library, a high-performance, full-featured text search engine. LucenePlusPlus-rel_3.0.4/README.rst000066400000000000000000000076401217574114600170110ustar00rootroot00000000000000Lucene++ ========== Welcome to lucene++ version **3.0.4**. Lucene++ is an up to date C++ port of the popular Java `Lucene `_ library, a high-performance, full-featured text search engine. Components ---------------- - liblucene++ library - liblucene_contrib library - lucene_tester (unit tester) - deletefiles (demo) - indexfiles (demo) - searchfiles (demo) Useful Resources ---------------- Official `Java Lucene `_ - useful links and documentation relevant to Lucene and lucene++. `Lucene in Action `_ by Otis Gospodnetic and Erik Hatcher. Build Instructions using CMake ------------------------------ You'll need the `Boost `_ libraries installed somewhere. On Debian systems, the following packages are required: - libboost-date-time-dev - libboost-filesystem-dev - libboost-regex-dev - libboost-thread-dev - libboost-iostreams-dev - libboost-test-dev .. note:: At present, you must use Boost 1.49 or older. There is an incompatibility to Boost 1.50 and newer that causes ``make`` to fail. See `issue #30`__. __ https://github.com/luceneplusplus/LucenePlusPlus/issues/30 To build the library the following commands should be issued:: $ cmake . $ make $ make install To build the demo programs, execute the following after having first built the library:: $ make indexfiles searchfiles deletefiles Build Instructions using Waf ------------------------------ **After running CMake** you can use `Waf `_ to drive the build. Waf requires that you have a recent version of `Python `_ installed on your system. To build the library the following commands should be issued:: $ ./waf configure $ ./waf --static build Additionally static builds of the following libraries are required for a successful build: - boost::date_time - boost::filesystem - boost::regex - boost::thread - boost::system - boost::iostreams - boost::unit_test_framework The libraries and headers should be made available at a standard prefix (``/usr/local`` for example). Build Instructions for Windows systems -------------------------------------- Open solution lucene++.sln located in the *msvc* folder into Visual Studio 2008 and build. **Note: "BOOST_ROOT" environment variable must be defined to point to the Boost library directory (eg. c:\\boost_1_44_0)** You'll need Boost installed. `BoostPro `_ has some precompiled Windows packages. You'll need the following extras installed:: - boost::system - boost::thread - boost::filesystem - boost::regex - boost::date_time - boost::iostreams - boost::unit_test_framework Building Performance -------------------- Use of ccache will speed up build times a lot. I found it easiest to add the ``/usr/lib/ccache`` directory to the beginning of your paths. This works for most common compilers:: PATH=/usr/lib/ccache:$PATH To run unit test suite ---------------------- lucene_tester is built using the `Boost Unit Test Framework `_ and is launched by the following command:: $ bin/lucene_tester --show_progress=yes Other `command options `_ can be supplied. Acknowledgements ---------------- - Ben van Klinken and contributors to the CLucene project for inspiring this project. - Jamie Kirkpatrick for cross-platform and waf build support. - `nedmalloc `_ Copyright 2005-2006 Niall Douglas - md5 Copyright (C) 1999, 2000, 2002 Aladdin Enterprises - `Unicode character properties (guniprop) `_ Copyright (C) 1999 Tom Tromey, Copyright (C) 2000 Red Hat, Inc. LucenePlusPlus-rel_3.0.4/REQUESTS000066400000000000000000000000751217574114600165130ustar00rootroot00000000000000See https://github.com/luceneplusplus/LucenePlusPlus/issues LucenePlusPlus-rel_3.0.4/build/000077500000000000000000000000001217574114600164125ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/build/clang.py000066400000000000000000000034451217574114600200560ustar00rootroot00000000000000############################################################################# ## Copyright (c) 2009-2011 Alan Wright. All rights reserved. ## Distributable under the terms of either the Apache License (Version 2.0) ## or the GNU Lesser General Public License. ############################################################################# from TaskGen import feature import Options import sys @feature('cc') def apply_clang(self): ''' Replaced the default compiler with clang if required. ''' if not getattr(self, 'clang', False) or Options.options.disable_clang: return self.env['CC'] = self.env['CLANG'] or self.env['CC'] if sys.platform == "darwin": # workaround problems with non-static inline functions # http://clang.llvm.org/compatibility.html self.env['CCFLAGS'] += ['-std=gnu89'] @feature('cc') def apply_clang_cpp(self): ''' Replaced the default compiler with clang if required. ''' if not getattr(self, 'clang', False) or Options.options.disable_clang: return self.env['CPP'] = self.env['CLANGPP'] or self.env['CXX'] self.env['CXX'] = self.env['CLANGPP'] or self.env['CXX'] if sys.platform == "darwin": self.env['shlib_CXXFLAGS'] = ['-fPIC'] def options(opt): """ Add options specific the codehash tool """ opt.add_option('--noclang', dest = 'disable_clang', action = 'store_true', default = False, help = 'disable the clang compiler if it is available') def configure(conf): search_paths = ['/Xcode4/usr/bin/'] if sys.platform == "darwin" else [] if not getattr(conf, 'clang', False) or Options.options.disable_clang: return conf.find_program('clang', var='CLANG') conf.find_program('clang++', var='CLANGPP', path_list = search_paths) LucenePlusPlus-rel_3.0.4/build/gch.py000066400000000000000000000026211217574114600175260ustar00rootroot00000000000000############################################################################# ## Copyright (c) 2009-2011 Alan Wright. All rights reserved. ## Distributable under the terms of either the Apache License (Version 2.0) ## or the GNU Lesser General Public License. ############################################################################# #! /usr/bin/env python # encoding: utf-8 # Thomas Nagy, 2006 (ita) """ for some obscure reason, the precompiled header will not be taken if all.h is in the same directory as main.cpp we recommend to add the header to compile in a separate directory without any sources Note: the #warning will come once when the .h is compiled, it will not come when the .cpp is compiled Note: do not forget to set the include paths (include=...) """ from waflib.TaskGen import feature, after from waflib.Task import Task from waflib.Tools import c_preproc #@feature('cxx') <- python >= 2.4 #@after('apply_link') def process_pch(self): if getattr(self, 'pch', ''): nodes = self.to_nodes(self.pch) for x in nodes: self.create_task('gchx', x, x.change_ext('.gch')) feature('cxx')(process_pch) after('apply_link')(process_pch) class gchx(Task): run_str = '${CXX} ${CXXFLAGS} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${CPPPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${CXX_SRC_F}${SRC} ${CXX_TGT_F}${TGT}' scan = c_preproc.scan ext_out = ['.h'] color = 'BLUE' LucenePlusPlus-rel_3.0.4/cmake/000077500000000000000000000000001217574114600163735ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/cmake/CreateLucene++Packages.cmake000066400000000000000000000071061217574114600235250ustar00rootroot00000000000000#Creates all the relevant packages SET(CPACK_PACKAGE_VERSION_MAJOR ${LUCENE++_VERSION_MAJOR}) SET(CPACK_PACKAGE_VERSION_MINOR ${LUCENE++_VERSION_MINOR}) SET(CPACK_PACKAGE_VERSION_REVISION ${LUCENE++_VERSION_REVISION}) SET(CPACK_PACKAGE_VERSION_PATCH ${LUCENE++_VERSION_MAJOR}) SET(CPACK_PACKAGE_VERSION ${LUCENE++_VERSION}) SET(CPACK_PACKAGE_SOVERSION ${LUCENE++_SOVERSION}) SET(CPACK_PACKAGE_VENDOR "Alan Wright") SET(CPACK_PACKAGE_CONTACT "alanwright.home@googlemail.com") SET(CPACK_PACKAGE_NAME "liblucene++") SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Lucene++ is an up to date C++ port of the popular Java Lucene library, a high-performance, full-featured text search engine") SET(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/COPYING") #SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") #so, what are we going to install? SET(CPACK_INSTALL_CMAKE_PROJECTS "${CMAKE_BINARY_DIR};lucene++;ALL;/") SET(CPACK_COMPONENTS_ALL development runtime) SET(CPACK_GENERATOR "TGZ") SET(CPACK_PACKAGE_FILE_NAME "lucene++-${CPACK_PACKAGE_VERSION}-${CMAKE_SYSTEM_NAME}") IF( (WIN32 OR WIN64) AND NOT UNIX) SET(CPACK_SOURCE_GENERATOR "ZIP") ELSE( (WIN32 OR WIN64) AND NOT UNIX) SET(CPACK_SOURCE_GENERATOR "TBZ2;TGZ") ENDIF( (WIN32 OR WIN64) AND NOT UNIX) SET(CPACK_SOURCE_PACKAGE_FILE_NAME "lucene++-${CPACK_PACKAGE_VERSION}-Source") #specific packaging requirements:, SET(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.4), libgcc1 (>= 1:4.1.1-21), libstdc++6 (>= 4.1.1-21), libboost-date-time1.42.0, libboost-filesystem1.42.0, libboost-regex1.42.0, libboost-thread1.42.0, libboost-iostreams1.42.0") SET(CPACK_DEBIAN_PACKAGE_SECTION "libs") SET(CPACK_RPM_PACKAGE_LICENSE "Apache 2.0") SET(CPACK_RPM_PACKAGE_GROUP "libs") SET(CPACK_RPM_PACKAGE_REQUIRES "libboost-date-time1.42.0, libboost-filesystem1.42.0, libboost-regex1.42.0, libboost-thread1.42.0, libboost-iostreams1.42.0") #don't include the current binary dir. get_filename_component(lucene++-base_BINARY_DIR_name ${lucene++-base_BINARY_DIR} NAME) SET(CPACK_SOURCE_IGNORE_FILES "/\\\\.svn/" "/\\\\.git/" "/\\\\.waf*/" "\\\\.swp$" "\\\\.#;/#" ".*~" ".*\\\\.tmp" ".*\\\\.save" "/${lucene++-base_BINARY_DIR_name}/" ) IF( (WIN32 OR WIN64) AND NOT UNIX) # There is a bug in NSI that does not handle full unix paths properly. Make # sure there is at least one set of four (4) backlasshes. SET(CPACK_GENERATOR "${CPACK_GENERATOR};NSIS") #SET(CPACK_PACKAGE_ICON "${CMake_SOURCE_DIR}/Utilities/Release\\\\InstallIcon.bmp") #SET(CPACK_NSIS_INSTALLED_ICON_NAME "bin\\\\MyExecutable.exe") SET(CPACK_NSIS_DISPLAY_NAME "${CPACK_PACKAGE_INSTALL_DIRECTORY} Lucene++ Library") SET(CPACK_NSIS_HELP_LINK "http:\\\\\\\\lucene++.sourceforge.net") SET(CPACK_NSIS_URL_INFO_ABOUT "http:\\\\\\\\lucene++.sourceforge.net") SET(CPACK_NSIS_CONTACT "lucene++-developers@lists.sourceforge.net") #SET(CPACK_NSIS_MODIFY_PATH ON) ELSE( (WIN32 OR WIN64) AND NOT UNIX) # SET(CPACK_STRIP_FILES "bin/xxx") SET(CPACK_SOURCE_STRIP_FILES "") ENDIF( (WIN32 OR WIN64) AND NOT UNIX) #SET(CPACK_PACKAGE_EXECUTABLES "MyExecutable" "My Executable") ADD_CUSTOM_TARGET(dist-package COMMAND rsync -avP -e ssh ${CPACK_PACKAGE_FILE_NAME}.* ustramooner@frs.sourceforge.net:uploads/ # DEPENDS package ) ADD_CUSTOM_TARGET(dist-package_source COMMAND rsync -avP -e ssh ${CPACK_SOURCE_PACKAGE_FILE_NAME}.* ustramooner@frs.sourceforge.net:uploads/ # DEPENDS package_source ) #this must be last INCLUDE(CPack) LucenePlusPlus-rel_3.0.4/cmake/Lucene++Docs.cmake000066400000000000000000000127211217574114600215520ustar00rootroot00000000000000# - Lucene++Docs.cmake # This file provides support for building the Lucene++ Documentation. # To build the documention, you will have to enable it # and then do the equivalent of "make doc". OPTION(ENABLE_DOCS "Build the Lucene++ documentation." OFF) MACRO(SET_YESNO) FOREACH(param ${ARGV}) IF ( ${param} ) SET(${param} "YES") ELSE ( ${param} ) SET(${param} "NO") ENDIF ( ${param} ) ENDFOREACH(param) ENDMACRO(SET_YESNO) MACRO(SET_BLANK) FOREACH(param ${ARGV}) IF ( NOT ${param} ) SET(${param} "") ENDIF ( NOT ${param} ) ENDFOREACH(param) ENDMACRO(SET_BLANK) IF (ENABLE_DOCS) OPTION(DOCS_HTML_HELP "Doxygen should compile HTML into a Help file (CHM)." NO) OPTION(DOCS_HTML "Doxygen should build HTML documentation." YES) OPTION(DOCS_XML "Doxygen should build XML documentation." NO) OPTION(DOCS_RTF "Doxygen should build RTF documentation." NO) OPTION(DOCS_MAN "Doxygen should build man documentation." NO) OPTION(DOCS_TAGFILE "Doxygen should build a tagfile." NO) OPTION(DOCS_LATEX "Doxygen should build Latex documentation." NO ) MARK_AS_ADVANCED( DOCS_HTML_HELP DOCS_LATEX DOCS_XML DOCS_HTML DOCS_RTF DOCS_MAN DOCS_TAGFILE ) # # Check for the tools # FIND_PACKAGE(Doxygen) IF ( DOXYGEN_FOUND ) # This creates a new target to build documentation. # It runs ${DOXYGEN_EXECUTABLE} which is the full path and executable to # Doxygen on your system, set by the FindDoxygen.cmake module # (called by FindDocumentation.cmake). # It runs the final generated Doxyfile against it. # The DOT_PATH is substituted into the Doxyfile. ADD_CUSTOM_TARGET(doc ${DOXYGEN_EXECUTABLE} ${PROJECT_BINARY_DIR}/doc/doxyfile ) IF ( DOCS_HTML_HELP ) IF ( NOT DOCS_HTML ) MESSAGE ( FATAL_ERROR "DOCS_HTML is required to buidl DOCS_HTML_HELP" ) ENDIF ( NOT DOCS_HTML ) FIND_PACKAGE(HTMLHelp) IF ( NOT HTML_HELP_COMPILER ) MESSAGE(FATAL_ERROR "HTML Help compiler not found, turn DOCS_HTML_HELP off to proceed") ENDIF ( NOT HTML_HELP_COMPILER ) #make cygwin work with hhc... IF ( CYGWIN ) EXECUTE_PROCESS ( COMMAND cygpath "${HTML_HELP_COMPILER}" OUTPUT_VARIABLE HTML_HELP_COMPILER_EX ) STRING ( REPLACE "\n" "" HTML_HELP_COMPILER_EX ${HTML_HELP_COMPILER_EX} ) STRING ( REPLACE "\r" "" HTML_HELP_COMPILER_EX ${HTML_HELP_COMPILER_EX} ) SET ( HTML_HELP_COMPILER_EX "\"${HTML_HELP_COMPILER_EX}\"" ) ELSE ( CYGWIN ) SET ( HTML_HELP_COMPILER_EX ${HTML_HELP_COMPILER} ) ENDIF ( CYGWIN ) ENDIF ( DOCS_HTML_HELP ) IF ( DOCS_LATEX ) FIND_PACKAGE(LATEX) IF ( NOT LATEX_COMPILER ) MESSAGE(FATAL_ERROR "Latex compiler not found, turn DOCS_LATEX off to proceed") ENDIF ( NOT LATEX_COMPILER ) ENDIF ( DOCS_LATEX ) FIND_PACKAGE(Perl) IF ( DOXYGEN_DOT_EXECUTABLE ) SET ( HAVE_DOT "YES" ) ELSE ( DOXYGEN_DOT_EXECUTABLE ) SET ( HAVE_DOT "NO" ) ENDIF ( DOXYGEN_DOT_EXECUTABLE ) #doxygen expects YES/NO parameters SET_YESNO( DOCS_HTML_HELP DOCS_LATEX DOCS_XML DOCS_HTML DOCS_RTF DOCS_MAN ) #empty out paths if not found SET_BLANK( PERL_EXECUTABLE DOXYGEN_DOT_EXECUTABLE HTML_HELP_COMPILER LATEX_COMPILER ) IF ( DOCS_TAGFILE ) SET ( DOCS_TAGFILE_LOCATION "${PROJECT_BINARY_DIR}/doc/tag/lucene++.tag" ) ENDIF ( DOCS_TAGFILE ) # This processes our Doxyfile.cmake and substitutes paths to generate a final Doxyfile CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/doc/Doxyfile.cmake ${PROJECT_BINARY_DIR}/doc/doxyfile ) CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/doc/helpheader.htm.cmake ${PROJECT_BINARY_DIR}/doc/helpheader.htm ) CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/doc/helpfooter.htm.cmake ${PROJECT_BINARY_DIR}/doc/helpfooter.htm ) CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/doc/doxygen.css.cmake ${PROJECT_BINARY_DIR}/doc/html/doxygen.css ) #create a target for tar.gz html help FIND_PACKAGE(UnixCommands) IF ( TAR AND GZIP ) ADD_CUSTOM_TARGET(doc-tarz COMMAND "${TAR}" "-czf" "${PROJECT_BINARY_DIR}/doc/lucene++-doc.tar.gz" ./ WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/doc/html/" #DEPENDS doc ) ENDIF ( TAR AND GZIP ) #install HTML pages if they were built IF ( DOCS_HTML AND NOT WIN32 ) INSTALL(DIRECTORY ${PROJECT_BINARY_DIR}/doc/html/ DESTINATION share/doc/lucene++-${LUCENE++_VERSION}) ENDIF ( DOCS_HTML AND NOT WIN32 ) #install man pages if they were built IF ( DOCS_MAN ) INSTALL(DIRECTORY ${PROJECT_BINARY_DIR}/doc/man/ DESTINATION man) ENDIF ( DOCS_MAN ) ELSE ( DOXYGEN_FOUND ) MESSAGE(FATAL_ERROR "Doxygen not found, turn ENABLE_DOCS off to proceed") ENDIF ( DOXYGEN_FOUND ) ENDIF (ENABLE_DOCS) LucenePlusPlus-rel_3.0.4/cmake/MacroCheckGccVisibility.cmake000066400000000000000000000047311217574114600240660ustar00rootroot00000000000000# # Copyright (c) 2006, Alexander Neundorf # Copyright (c) 2006, Laurent Montel, # # Redistribution and use is allowed according to the terms of the BSD license. # For details see the accompanying COPYING-CMAKE-SCRIPTS file. macro(MACRO_CHECK_GCC_VISIBILITY GccVisibility) if (CMAKE_COMPILER_IS_GNUCXX) include(CheckCXXCompilerFlag) include(MacroEnsureVersion) # visibility support check_cxx_compiler_flag(-fvisibility=hidden ${GccVisibility}) # get the gcc version exec_program(${CMAKE_C_COMPILER} ARGS --version OUTPUT_VARIABLE _gcc_version_info) string (REGEX MATCH "[345]\\.[0-9]\\.[0-9]" _gcc_version "${_gcc_version_info}") if (NOT _gcc_version) # clang reports: clang version 1.1 (trunk 95754) string (REGEX MATCH "clang version ([123]\\.[0-9])" _gcc_version "${_gcc_version_info}") if ( _gcc_version ) string(REGEX REPLACE "clang version (.*)" "\\1.0" _gcc_version "${_gcc_version}" ) endif ( _gcc_version ) # gcc on mac just reports: "gcc (GCC) 3.3 20030304 ..." without the patch level, handle this here: if (NOT _gcc_version) string (REGEX REPLACE ".*\\(GCC\\).* ([34]\\.[0-9]) .*" "\\1.0" _gcc_version "${_gcc_version_info}") endif (NOT _gcc_version) endif (NOT _gcc_version) macro_ensure_version("4.1.0" "${_gcc_version}" GCC_IS_NEWER_THAN_4_1) macro_ensure_version("4.2.0" "${_gcc_version}" GCC_IS_NEWER_THAN_4_2) set(_GCC_COMPILED_WITH_BAD_ALLOCATOR FALSE) if (GCC_IS_NEWER_THAN_4_1) exec_program(${CMAKE_C_COMPILER} ARGS -v OUTPUT_VARIABLE _gcc_alloc_info) string(REGEX MATCH "(--enable-libstdcxx-allocator=mt)" _GCC_COMPILED_WITH_BAD_ALLOCATOR "${_gcc_alloc_info}") endif (GCC_IS_NEWER_THAN_4_1) if (${GccVisibility} AND GCC_IS_NEWER_THAN_4_1 AND NOT _GCC_COMPILED_WITH_BAD_ALLOCATOR) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden") if (GCC_IS_NEWER_THAN_4_2) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") endif (GCC_IS_NEWER_THAN_4_2) else (${GccVisibility} AND GCC_IS_NEWER_THAN_4_1 AND NOT _GCC_COMPILED_WITH_BAD_ALLOCATOR) set (${GccVisibility} 0) endif (${GccVisibility} AND GCC_IS_NEWER_THAN_4_1 AND NOT _GCC_COMPILED_WITH_BAD_ALLOCATOR) else (CMAKE_COMPILER_IS_GNUCXX) set(${GccVisibility} FALSE) endif (CMAKE_COMPILER_IS_GNUCXX) endmacro(MACRO_CHECK_GCC_VISIBILITY) LucenePlusPlus-rel_3.0.4/cmake/MacroEnsureVersion.cmake000066400000000000000000000066561217574114600232030ustar00rootroot00000000000000# This macro compares version numbers of the form "x.y.z" # MACRO_ENSURE_VERSION( FOO_MIN_VERSION FOO_VERSION_FOUND FOO_VERSION_OK) # will set FOO_VERSIN_OK to true if FOO_VERSION_FOUND >= FOO_MIN_VERSION # where both have to be in a 3-part-version format, leading and trailing # text is ok, e.g. # MACRO_ENSURE_VERSION( "2.5.31" "flex 2.5.4a" VERSION_OK) # which means 2.5.31 is required and "flex 2.5.4a" is what was found on the system # Copyright (c) 2006, David Faure, # # Redistribution and use is allowed according to the terms of the BSD license. # For details see the accompanying COPYING-CMAKE-SCRIPTS file. MACRO(MACRO_ENSURE_VERSION requested_version found_version var_too_old) # parse the parts of the version string STRING(REGEX REPLACE "([0-9]+)\\.[0-9]+\\.[0-9]+" "\\1" req_major_vers "${requested_version}") STRING(REGEX REPLACE "[0-9]+\\.([0-9]+)\\.[0-9]+" "\\1" req_minor_vers "${requested_version}") STRING(REGEX REPLACE "[0-9]+\\.[0-9]+\\.([0-9]+)" "\\1" req_patch_vers "${requested_version}") STRING(REGEX REPLACE "[^0-9]*([0-9]+)\\.[0-9]+\\.[0-9]+.*" "\\1" found_major_vers "${found_version}") STRING(REGEX REPLACE "[^0-9]*[0-9]+\\.([0-9]+)\\.[0-9]+.*" "\\1" found_minor_vers "${found_version}") STRING(REGEX REPLACE "[^0-9]*[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" found_patch_vers "${found_version}") # compute an overall version number which can be compared at once MATH(EXPR req_vers_num "${req_major_vers}*10000 + ${req_minor_vers}*100 + ${req_patch_vers}") MATH(EXPR found_vers_num "${found_major_vers}*10000 + ${found_minor_vers}*100 + ${found_patch_vers}") if (found_vers_num LESS req_vers_num) set( ${var_too_old} FALSE ) else (found_vers_num LESS req_vers_num) set( ${var_too_old} TRUE ) endif (found_vers_num LESS req_vers_num) ENDMACRO(MACRO_ENSURE_VERSION) # This macro compares version numbers of the form "x.y" # MACRO_ENSURE_VERSION( FOO_MIN_VERSION FOO_VERSION_FOUND FOO_VERSION_OK) # will set FOO_VERSIN_OK to true if FOO_VERSION_FOUND >= FOO_MIN_VERSION # where both have to be in a 2-part-version format, leading and trailing # text is ok, e.g. # MACRO_ENSURE_VERSION( "0.5" "foo 0.6" VERSION_OK) # which means 0.5 is required and "foo 0.6" is what was found on the system # Copyright (c) 2006, David Faure, # Copyright (c) 2007, Pino Toscano, # # Redistribution and use is allowed according to the terms of the BSD license. # For details see the accompanying COPYING-CMAKE-SCRIPTS file. MACRO(MACRO_ENSURE_VERSION2 requested_version found_version var_too_old) # parse the parts of the version string STRING(REGEX REPLACE "([0-9]+)\\.[0-9]+" "\\1" req_major_vers "${requested_version}") STRING(REGEX REPLACE "[0-9]+\\.([0-9]+)" "\\1" req_minor_vers "${requested_version}") STRING(REGEX REPLACE "[^0-9]*([0-9]+)\\.[0-9]+.*" "\\1" found_major_vers "${found_version}") STRING(REGEX REPLACE "[^0-9]*[0-9]+\\.([0-9]+).*" "\\1" found_minor_vers "${found_version}") # compute an overall version number which can be compared at once MATH(EXPR req_vers_num "${req_major_vers}*100 + ${req_minor_vers}") MATH(EXPR found_vers_num "${found_major_vers}*100 + ${found_minor_vers}") if (found_vers_num LESS req_vers_num) set( ${var_too_old} FALSE ) else (found_vers_num LESS req_vers_num) set( ${var_too_old} TRUE ) endif (found_vers_num LESS req_vers_num) ENDMACRO(MACRO_ENSURE_VERSION2) LucenePlusPlus-rel_3.0.4/cmake/PCHSupport.cmake000066400000000000000000000216111217574114600214050ustar00rootroot00000000000000# - Try to find precompiled headers support for GCC 3.4 and 4.x # Once done this will define: # # Variable: # PCHSupport_FOUND # PCHSupport_ENABLED # # Macro: # ADD_PRECOMPILED_HEADER _targetName _input _dowarn # ADD_PRECOMPILED_HEADER_TO_TARGET _targetName _input _pch_output_to_use _dowarn # # Since this macro overides COMPILER_FLAGS on a target, you must use the following # variables instead. # set PCH_ADDITIONAL_COMPILER_FLAGS to add extra COMPILER_FLAGS to targets # set PCH_ADDITIONAL_COMPILER_FLAGS_${targetName} to add extra COMPILER_FLAGS to a specific target # IF(CMAKE_COMPILER_IS_GNUCXX) EXEC_PROGRAM( ${CMAKE_CXX_COMPILER} ARGS ${CMAKE_CXX_COMPILER_ARG1} -dumpversion OUTPUT_VARIABLE gcc_compiler_version) #MESSAGE("GCC Version: ${gcc_compiler_version}") IF(gcc_compiler_version MATCHES "4\\.[0-9]\\.[0-9]") SET(PCHSupport_FOUND TRUE) ELSE(gcc_compiler_version MATCHES "4\\.[0-9]\\.[0-9]") IF(gcc_compiler_version MATCHES "3\\.4\\.[0-9]") SET(PCHSupport_FOUND TRUE) ENDIF(gcc_compiler_version MATCHES "3\\.4\\.[0-9]") ENDIF(gcc_compiler_version MATCHES "4\\.[0-9]\\.[0-9]") SET(_PCH_include_prefix "-I") ELSE(CMAKE_COMPILER_IS_GNUCXX) IF( (WIN32 OR WIN64) ) #SET(PCHSupport_FOUND TRUE) # for experimental msvc support #SET(_PCH_include_prefix "/I") SET(PCHSupport_FOUND FALSE) ELSE( (WIN32 OR WIN64) ) SET(PCHSupport_FOUND FALSE) ENDIF( (WIN32 OR WIN64) ) ENDIF(CMAKE_COMPILER_IS_GNUCXX) IF ( DEFINED PCHSupport_ENABLED AND NOT PCHSupport_ENABLED ) SET(PCHSupport_FOUND FALSE) ENDIF ( DEFINED PCHSupport_ENABLED AND NOT PCHSupport_ENABLED) MACRO(_PCH_GET_COMPILE_FLAGS _out_compile_flags) STRING(TOUPPER "CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}" _flags_var_name) SET(${_out_compile_flags} ${${_flags_var_name}} ) IF(CMAKE_COMPILER_IS_GNUCXX) GET_TARGET_PROPERTY(_targetType ${_PCH_current_target} TYPE) IF(${_targetType} STREQUAL SHARED_LIBRARY) LIST(APPEND ${_out_compile_flags} "${${_out_compile_flags}} -fPIC") ENDIF(${_targetType} STREQUAL SHARED_LIBRARY) ELSE(CMAKE_COMPILER_IS_GNUCXX) ## TODO ... ? or does it work out of the box ENDIF(CMAKE_COMPILER_IS_GNUCXX) GET_DIRECTORY_PROPERTY(DIRINC INCLUDE_DIRECTORIES ) FOREACH(item ${DIRINC}) LIST(APPEND ${_out_compile_flags} "${_PCH_include_prefix}${item}") ENDFOREACH(item) GET_DIRECTORY_PROPERTY(_directory_flags DEFINITIONS) #MESSAGE("_directory_flags ${_directory_flags}" ) LIST(APPEND ${_out_compile_flags} ${_directory_flags}) LIST(APPEND ${_out_compile_flags} ${CMAKE_CXX_FLAGS} ) SEPARATE_ARGUMENTS(${_out_compile_flags}) ENDMACRO(_PCH_GET_COMPILE_FLAGS) MACRO(_PCH_WRITE_PCHDEP_CXX _targetName _include_file _dephelp) SET(${_dephelp} ${CMAKE_CURRENT_BINARY_DIR}/${_targetName}_pch_dephelp.cxx) FILE(WRITE ${${_dephelp}} "#include \"${_include_file}\" int testfunction() { return 0; } " ) ENDMACRO(_PCH_WRITE_PCHDEP_CXX ) MACRO(_PCH_GET_COMPILE_COMMAND out_command _input _output) FILE(TO_NATIVE_PATH ${_input} _native_input) FILE(TO_NATIVE_PATH ${_output} _native_output) IF(CMAKE_COMPILER_IS_GNUCXX) IF(CMAKE_CXX_COMPILER_ARG1) # remove leading space in compiler argument STRING(REGEX REPLACE "^ +" "" pchsupport_compiler_cxx_arg1 ${CMAKE_CXX_COMPILER_ARG1}) SET(${out_command} ${CMAKE_CXX_COMPILER} ${pchsupport_compiler_cxx_arg1} ${_compile_FLAGS} -x c++-header -o ${_output} ${_input} ) ELSE(CMAKE_CXX_COMPILER_ARG1) SET(${out_command} ${CMAKE_CXX_COMPILER} ${_compile_FLAGS} -x c++-header -o ${_output} ${_input} ) ENDIF(CMAKE_CXX_COMPILER_ARG1) ELSE(CMAKE_COMPILER_IS_GNUCXX) SET(_dummy_str "#include <${_input}>") FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/pch_dummy.cpp ${_dummy_str}) SET(${out_command} ${CMAKE_CXX_COMPILER} ${_compile_FLAGS} /c /Fp${_native_output} /Yc${_native_input} pch_dummy.cpp ) #/out:${_output} ENDIF(CMAKE_COMPILER_IS_GNUCXX) ENDMACRO(_PCH_GET_COMPILE_COMMAND ) MACRO(_PCH_GET_TARGET_COMPILE_FLAGS _targetName _cflags _header_name _pch_path _dowarn ) FILE(TO_NATIVE_PATH ${_pch_path} _native_pch_path) #message(${_native_pch_path}) IF(CMAKE_COMPILER_IS_GNUCXX) # for use with distcc and gcc >4.0.1 if preprocessed files are accessible # on all remote machines set # PCH_ADDITIONAL_COMPILER_FLAGS to -fpch-preprocess # if you want warnings for invalid header files (which is very inconvenient # if you have different versions of the headers for different build types # you may set _pch_dowarn IF (_dowarn) SET(${_cflags} "${PCH_ADDITIONAL_COMPILER_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS_${_targetName}} -include ${CMAKE_CURRENT_BINARY_DIR}/${_header_name} -Winvalid-pch " ) ELSE (_dowarn) SET(${_cflags} "${PCH_ADDITIONAL_COMPILER_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS_${_targetName}} -include ${CMAKE_CURRENT_BINARY_DIR}/${_header_name} " ) ENDIF (_dowarn) ELSE(CMAKE_COMPILER_IS_GNUCXX) set(${_cflags} "/Fp${_native_pch_path} /Yu${_header_name}" ) ENDIF(CMAKE_COMPILER_IS_GNUCXX) ENDMACRO(_PCH_GET_TARGET_COMPILE_FLAGS ) MACRO(GET_PRECOMPILED_HEADER_OUTPUT _targetName _input _output) GET_FILENAME_COMPONENT(_name ${_input} NAME) GET_FILENAME_COMPONENT(_path ${_input} PATH) SET(_output "${CMAKE_CURRENT_BINARY_DIR}/${_name}.gch/${_targetName}_${CMAKE_BUILD_TYPE}.h++") ENDMACRO(GET_PRECOMPILED_HEADER_OUTPUT _targetName _input) MACRO(ADD_PRECOMPILED_HEADER_TO_TARGET _targetName _input _pch_output_to_use ) if ( PCHSupport_FOUND ) # to do: test whether compiler flags match between target _targetName # and _pch_output_to_use GET_FILENAME_COMPONENT(_name ${_input} NAME) IF( "${ARGN}" STREQUAL "0") SET(_dowarn 0) ELSE( "${ARGN}" STREQUAL "0") SET(_dowarn 1) ENDIF("${ARGN}" STREQUAL "0") _PCH_GET_TARGET_COMPILE_FLAGS(${_targetName} _target_cflags ${_name} ${_pch_output_to_use} ${_dowarn}) #MESSAGE("Add flags ${_target_cflags} to ${_targetName} " ) SET_TARGET_PROPERTIES(${_targetName} PROPERTIES COMPILE_FLAGS ${_target_cflags} ) ADD_CUSTOM_TARGET(pch_Generate_${_targetName} DEPENDS ${_pch_output_to_use} ) ADD_DEPENDENCIES(${_targetName} pch_Generate_${_targetName} ) else ( PCHSupport_FOUND ) SET_TARGET_PROPERTIES(${_targetName} PROPERTIES COMPILE_FLAGS ${PCH_ADDITIONAL_COMPILER_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS_${_targetName}} ) endif ( PCHSupport_FOUND ) ENDMACRO(ADD_PRECOMPILED_HEADER_TO_TARGET) MACRO(ADD_PRECOMPILED_HEADER _targetName _input) if ( PCHSupport_FOUND ) SET(_PCH_current_target ${_targetName}) IF(NOT CMAKE_BUILD_TYPE) MESSAGE(FATAL_ERROR "This is the ADD_PRECOMPILED_HEADER macro. " "You must set CMAKE_BUILD_TYPE!" ) ENDIF(NOT CMAKE_BUILD_TYPE) IF( "${ARGN}" STREQUAL "0") SET(_dowarn 0) ELSE( "${ARGN}" STREQUAL "0") SET(_dowarn 1) ENDIF("${ARGN}" STREQUAL "0") GET_FILENAME_COMPONENT(_name ${_input} NAME) GET_FILENAME_COMPONENT(_path ${_input} PATH) GET_PRECOMPILED_HEADER_OUTPUT( ${_targetName} ${_input} _output) GET_FILENAME_COMPONENT(_outdir ${_output} PATH ) GET_TARGET_PROPERTY(_targetType ${_PCH_current_target} TYPE) _PCH_WRITE_PCHDEP_CXX(${_targetName} ${_input} _pch_dephelp_cxx) #MESSAGE(${_pch_dephelp_cxx}) IF(${_targetType} STREQUAL SHARED_LIBRARY) ADD_LIBRARY(${_targetName}_pch_dephelp SHARED ${_pch_dephelp_cxx} ) ELSE(${_targetType} STREQUAL SHARED_LIBRARY) ADD_LIBRARY(${_targetName}_pch_dephelp STATIC ${_pch_dephelp_cxx}) ENDIF(${_targetType} STREQUAL SHARED_LIBRARY) FILE(MAKE_DIRECTORY ${_outdir}) _PCH_GET_COMPILE_FLAGS(_compile_FLAGS) SET(_compile_FLAGS ${_compile_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS_${_targetName}}) #MESSAGE("_compile_FLAGS: ${_compile_FLAGS}") #message("COMMAND ${CMAKE_CXX_COMPILER} ${_compile_FLAGS} -x c++-header -o ${_output} ${_input}") SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_BINARY_DIR}/${_name} PROPERTIES GENERATED 1) ADD_CUSTOM_COMMAND( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${_name} COMMAND ${CMAKE_COMMAND} -E copy ${_input} ${CMAKE_CURRENT_BINARY_DIR}/${_name} # ensure same directory! Required by gcc DEPENDS ${_input} ) #message("_command ${_input} ${_output}") _PCH_GET_COMPILE_COMMAND(_command ${CMAKE_CURRENT_BINARY_DIR}/${_name} ${_output} ) #message(${_input} ) #message("_output ${_output}") ADD_CUSTOM_COMMAND( OUTPUT ${_output} COMMAND ${_command} DEPENDS ${_input} ${CMAKE_CURRENT_BINARY_DIR}/${_name} ${_targetName}_pch_dephelp ) ADD_PRECOMPILED_HEADER_TO_TARGET(${_targetName} ${_input} ${_output} ${_dowarn}) endif ( PCHSupport_FOUND ) ENDMACRO(ADD_PRECOMPILED_HEADER) LucenePlusPlus-rel_3.0.4/cmake/Toolchain-g++32.cmake000066400000000000000000000012661217574114600220410ustar00rootroot00000000000000# Cross compiling from linux using g++-multilib to create 32 bit output # On ubuntu, you'll need to install the packages: g++-multilib gcc-multilib # # Use of this file: # cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchain-g++32.cmake .. SET(CMAKE_CXX_FLAGS "-m32") SET(CMAKE_C_FLAGS "-m32") SET(CMAKE_EXE_LINKER_FLAGS "-m32") SET(CMAKE_MODULE_LINKER_FLAGS "-m32") # here is the target environment located SET(CMAKE_FIND_ROOT_PATH /usr/lib32 ) # adjust the default behaviour of the FIND_XXX() commands: # search headers and libraries in the target environment, search # programs in the host environment set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) LucenePlusPlus-rel_3.0.4/cmake/Toolchain-llvm.cmake000066400000000000000000000007771217574114600223000ustar00rootroot00000000000000# Use of this file: # cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchain-llvm.cmake .. # which compilers to use for C and C++ SET(CMAKE_C_COMPILER clang) SET(CMAKE_CXX_COMPILER clang++) SET(ENABLE_LLVM CACHE BOOL TRUE) SET(ENABLE_LLVM_BC CACHE BOOL FALSE) SET(PCHSupport_ENABLED FALSE) IF ( ENABLE_LLVM_BC ) #TODO: make this work... #this only crates the llvm objects, it can't link them together currently SET(CMAKE_C_FLAGS "-emit-llvm") SET(CMAKE_CXX_FLAGS "-emit-llvm") ENDIF ( ENABLE_LLVM_BC ) LucenePlusPlus-rel_3.0.4/cmake/Toolchain-mingw32.cmake000066400000000000000000000023031217574114600225770ustar00rootroot00000000000000# Cross compiling from linux using mingw32 tools # On ubuntu, you'll need to install the packages: mingw32, mingw32-binutils, mingw32-runtime # # Use of this file: # cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchain-mingw32.cmake -C ../cmake/Toolchain-mingw32.cmake .. # the name of the target operating system SET(CMAKE_SYSTEM_NAME Windows) # which compilers to use for C and C++ SET(CMAKE_C_COMPILER i586-mingw32msvc-gcc) SET(CMAKE_CXX_COMPILER i586-mingw32msvc-g++) # here is the target environment located SET(CMAKE_FIND_ROOT_PATH /usr/i586-mingw32msvc /home/alex/mingw-install ) INCLUDE_DIRECTORIES(/usr/lib/gcc/i586-mingw32msvc/4.2.1-sjlj/include/c++) # adjust the default behaviour of the FIND_XXX() commands: # search headers and libraries in the target environment, search # programs in the host environment set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) SET(_CL_HAVE_GCCVISIBILITYPATCH 0) SET(_CL_HAVE_NAMESPACES_EXITCODE 0) SET(_CL_HAVE_NO_SNPRINTF_BUG_EXITCODE 0) SET(_CL_HAVE_NO_SNWPRINTF_BUG_EXITCODE 0) SET(LUCENE_STATIC_CONSTANT_SYNTAX_EXITCODE 1) SET(_CL_HAVE_TRY_BLOCKS_EXITCODE 0) SET(ENABLE_ANSI_MODE OFF) LucenePlusPlus-rel_3.0.4/cmake/cmake_uninstall.cmake.in000066400000000000000000000015631217574114600231600ustar00rootroot00000000000000IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"") ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) STRING(REGEX REPLACE "\n" ";" files "${files}") FOREACH(file ${files}) MESSAGE(STATUS "Uninstalling \"${file}\"") IF(EXISTS "${file}") EXEC_PROGRAM( "@CMAKE_COMMAND@" ARGS "-E remove \"${file}\"" OUTPUT_VARIABLE rm_out RETURN_VALUE rm_retval ) IF("${rm_retval}" STREQUAL 0) ELSE("${rm_retval}" STREQUAL 0) MESSAGE(FATAL_ERROR "Problem when removing \"${file}\"") ENDIF("${rm_retval}" STREQUAL 0) #ELSE(EXISTS "${file}") # MESSAGE(STATUS "File \"${file}\" does not exist.") ENDIF(EXISTS "${file}") ENDFOREACH(file) LucenePlusPlus-rel_3.0.4/doc/000077500000000000000000000000001217574114600160605ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/doc/Doxyfile.cmake000066400000000000000000000207051217574114600206510ustar00rootroot00000000000000# Doxyfile 1.2.18 #--------------------------------------------------------------------------- # General configuration options #--------------------------------------------------------------------------- PROJECT_NAME = Lucene++ PROJECT_NUMBER = @LUCENE++_SOVERSION@ OUTPUT_DIRECTORY = @PROJECT_BINARY_DIR@/doc OUTPUT_LANGUAGE = English EXTRACT_ALL = YES EXTRACT_PRIVATE = NO EXTRACT_STATIC = YES EXTRACT_LOCAL_CLASSES = YES EXTRACT_LOCAL_METHODS = YES HIDE_UNDOC_MEMBERS = NO HIDE_UNDOC_CLASSES = NO HIDE_FRIEND_COMPOUNDS = YES HIDE_IN_BODY_DOCS = NO BRIEF_MEMBER_DESC = YES REPEAT_BRIEF = YES ALWAYS_DETAILED_SEC = NO INLINE_INHERITED_MEMB = NO FULL_PATH_NAMES = NO STRIP_FROM_PATH = INTERNAL_DOCS = NO STRIP_CODE_COMMENTS = YES CASE_SENSE_NAMES = YES SHORT_NAMES = NO HIDE_SCOPE_NAMES = NO VERBATIM_HEADERS = YES SHOW_INCLUDE_FILES = YES JAVADOC_AUTOBRIEF = YES MULTILINE_CPP_IS_BRIEF = YES DETAILS_AT_TOP = NO INHERIT_DOCS = YES INLINE_INFO = YES SORT_MEMBER_DOCS = YES DISTRIBUTE_GROUP_DOC = NO TAB_SIZE = 8 GENERATE_TODOLIST = YES GENERATE_TESTLIST = YES GENERATE_BUGLIST = YES GENERATE_DEPRECATEDLIST= YES ALIASES = "memory=\par Memory management:\n" ENABLED_SECTIONS = MAX_INITIALIZER_LINES = 30 OPTIMIZE_OUTPUT_FOR_C = YES OPTIMIZE_OUTPUT_JAVA = NO SHOW_USED_FILES = YES DIRECTORY_GRAPH = YES DOCSET_BUNDLE_ID = org.doxygen.Project DOCSET_FEEDNAME = "Doxygen generated docs" DOXYFILE_ENCODING = UTF-8 FORMULA_FONTSIZE = 10 TYPEDEF_HIDES_STRUCT = YES ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- QUIET = NO WARNINGS = YES WARN_IF_UNDOCUMENTED = YES WARN_IF_DOC_ERROR = YES WARN_NO_PARAMDOC = NO WARN_FORMAT = "$file:$line: $text" WARN_LOGFILE = @PROJECT_BINARY_DIR@/doc/doxygen.warnings.log #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- INPUT = @PROJECT_SOURCE_DIR@/include FILE_PATTERNS = *.h RECURSIVE = YES EXCLUDE_SYMLINKS = NO EXCLUDE_PATTERNS = "**/.svn/**" \ "**/.git/**" \ "**/Lucene.h" "*/test/*" \ "*/md5/*" \ "*/nedmalloc/*" \ "*/utf8/*" EXAMPLE_PATH = EXAMPLE_PATTERNS = EXAMPLE_RECURSIVE = NO IMAGE_PATH = INPUT_FILTER = FILTER_SOURCE_FILES = NO #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- SOURCE_BROWSER = NO INLINE_SOURCES = NO REFERENCED_BY_RELATION = YES REFERENCES_RELATION = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- ALPHABETICAL_INDEX = NO COLS_IN_ALPHA_INDEX = 5 IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- GENERATE_HTML = @DOCS_HTML@ HTML_OUTPUT = html HTML_FILE_EXTENSION = .html HTML_HEADER = @PROJECT_BINARY_DIR@/doc/helpheader.htm HTML_FOOTER = @PROJECT_BINARY_DIR@/doc/helpfooter.htm HTML_STYLESHEET = HTML_ALIGN_MEMBERS = YES HTML_DYNAMIC_SECTIONS = YES GENERATE_HTMLHELP = @DOCS_HTML_HELP@ CHM_FILE = ../lucene++.chm HHC_LOCATION = @HTML_HELP_COMPILER_EX@ GENERATE_CHI = YES BINARY_TOC = YES TOC_EXPAND = NO DISABLE_INDEX = NO ENUM_VALUES_PER_LINE = 4 GENERATE_TREEVIEW = NO TREEVIEW_WIDTH = 250 #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- GENERATE_LATEX = @DOCS_LATEX@ LATEX_OUTPUT = latex LATEX_CMD_NAME = @LATEX_COMPILER@ MAKEINDEX_CMD_NAME = makeindex COMPACT_LATEX = NO PAPER_TYPE = a4wide EXTRA_PACKAGES = LATEX_HEADER = PDF_HYPERLINKS = YES USE_PDFLATEX = NO LATEX_BATCHMODE = NO #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- GENERATE_RTF = @DOCS_RTF@ RTF_OUTPUT = rtf COMPACT_RTF = NO RTF_HYPERLINKS = NO RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- GENERATE_MAN = @DOCS_MAN@ MAN_OUTPUT = man MAN_EXTENSION = .3 MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- GENERATE_XML = @DOCS_XML@ XML_SCHEMA = XML_DTD = XML_OUTPUT = xml XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- ENABLE_PREPROCESSING = YES MACRO_EXPANSION = YES EXPAND_ONLY_PREDEF = NO SEARCH_INCLUDES = YES INCLUDE_PATH = INCLUDE_FILE_PATTERNS = PREDEFINED = "" EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::addtions related to external references #--------------------------------------------------------------------------- TAGFILES = GENERATE_TAGFILE = @DOCS_TAGFILE_LOCATION@ ALLEXTERNALS = NO EXTERNAL_GROUPS = YES PERL_PATH = @PERL_EXECUTABLE@ #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- CLASS_DIAGRAMS = YES HIDE_UNDOC_RELATIONS = YES HAVE_DOT = @HAVE_DOT@ CLASS_GRAPH = YES COLLABORATION_GRAPH = YES TEMPLATE_RELATIONS = NO INCLUDE_GRAPH = YES INCLUDED_BY_GRAPH = YES GRAPHICAL_HIERARCHY = YES DOT_IMAGE_FORMAT = png DOT_PATH = @DOXYGEN_DOT_EXECUTABLE@ DOTFILE_DIRS = GENERATE_LEGEND = YES DOT_CLEANUP = YES DOT_FONTNAME = FreeSans DOT_FONTPATH = DOT_FONTSIZE = 10 DOT_GRAPH_MAX_NODES = 50 DOT_MULTI_TARGETS = NO DOT_TRANSPARENT = NO #--------------------------------------------------------------------------- # Configuration::addtions related to the search engine #--------------------------------------------------------------------------- SEARCHENGINE = YES LucenePlusPlus-rel_3.0.4/doc/doxygen.css.cmake000066400000000000000000000120301217574114600213220ustar00rootroot00000000000000H1 { text-align: center; font-family: Arial, Helvetica, sans-serif; } H2 { font-family: Geneva, Arial, Helvetica, sans-serif; } CAPTION { font-weight: bold } DIV.qindex { width: 100%; background-color: #eeeeff; border: 4px solid #eeeeff; text-align: center; margin-bottom: 2px } A.qindex { text-decoration: none; font-weight: bold; } A.qindex:hover { text-decoration: none; background-color: #ddddff } A.qindexHL { text-decoration: none; font-weight: bold; background-color: #6666cc; color: #ffffff } A.qindexHL:hover { text-decoration: none; background-color: #6666cc } A.qindexRef { text-decoration: none; font-weight: bold; } A.qindexRef:hover { text-decoration: none; background-color: #ddddff } A.qindexRefHL { text-decoration: none; font-weight: bold; background-color: #6666cc; color: #ffffff } A.qindexRefHL:hover { text-decoration: none; background-color: #6666cc } A.el { text-decoration: none; font-weight: bold } A.elRef { font-weight: bold } A.code { text-decoration: none; font-weight: normal; color: #4444ee } A.codeRef { font-weight: normal; color: #4444ee } A:hover { text-decoration: none; background-color: #f2f2ff } DL.el { margin-left: -1cm } DIV.fragment { width: 98%; border: 1px solid #CCCCCC; background-color: #f5f5f5; padding-left: 4px; margin: 4px; } DIV.ah { background-color: black; font-weight: bold; color: #ffffff; margin-bottom: 3px; margin-top: 3px } TD.md { background-color: #f2f2ff; font-weight: bold; } TD.mdname1 { background-color: #f2f2ff; font-weight: bold; color: #602020; } TD.mdname { background-color: #f2f2ff; font-weight: bold; color: #602020; width: 600px; } DIV.groupHeader { margin-left: 16px; margin-top: 12px; margin-bottom: 6px; font-weight: bold } DIV.groupText { margin-left: 16px; font-style: italic; font-size: smaller } BODY { background: white; color: black; margin-right: 20px; margin-left: 20px; } TD.indexkey { background-color: #eeeeff; font-weight: bold; padding-right : 10px; padding-top : 2px; padding-left : 10px; padding-bottom : 2px; margin-left : 0px; margin-right : 0px; margin-top : 2px; margin-bottom : 2px } TD.indexvalue { background-color: #eeeeff; font-style: italic; padding-right : 10px; padding-top : 2px; padding-left : 10px; padding-bottom : 2px; margin-left : 0px; margin-right : 0px; margin-top : 2px; margin-bottom : 2px } TR.memlist { background-color: #f0f0f0; } P.formulaDsp { text-align: center; } IMG.formulaDsp { } IMG.formulaInl { vertical-align: middle; } SPAN.keyword { color: #008000 } SPAN.keywordtype { color: #604020 } SPAN.keywordflow { color: #e08000 } SPAN.comment { color: #800000 } SPAN.preprocessor { color: #806020 } SPAN.stringliteral { color: #002080 } SPAN.charliteral { color: #008080 } .mdTable { border: 1px solid #868686; background-color: #f2f2ff; } .mdRow { padding: 8px 20px; } .mdescLeft { font-size: smaller; font-family: Arial, Helvetica, sans-serif; background-color: #FAFAFA; padding-left: 8px; border-top: 1px none #E0E0E0; border-right: 1px none #E0E0E0; border-bottom: 1px none #E0E0E0; border-left: 1px none #E0E0E0; margin: 0px; } .mdescRight { font-size: smaller; font-family: Arial, Helvetica, sans-serif; font-style: italic; background-color: #FAFAFA; padding-left: 4px; border-top: 1px none #E0E0E0; border-right: 1px none #E0E0E0; border-bottom: 1px none #E0E0E0; border-left: 1px none #E0E0E0; margin: 0px; padding-bottom: 0px; padding-right: 8px; } .memItemLeft { padding: 1px 0px 0px 8px; margin: 4px; border-top-width: 1px; border-right-width: 1px; border-bottom-width: 1px; border-left-width: 1px; border-top-style: solid; border-top-color: #E0E0E0; border-right-color: #E0E0E0; border-bottom-color: #E0E0E0; border-left-color: #E0E0E0; border-right-style: none; border-bottom-style: none; border-left-style: none; background-color: #FAFAFA; font-family: Geneva, Arial, Helvetica, sans-serif; font-size: 12px; } .memItemRight { padding: 1px 0px 0px 8px; margin: 4px; border-top-width: 1px; border-right-width: 1px; border-bottom-width: 1px; border-left-width: 1px; border-top-style: solid; border-top-color: #E0E0E0; border-right-color: #E0E0E0; border-bottom-color: #E0E0E0; border-left-color: #E0E0E0; border-right-style: none; border-bottom-style: none; border-left-style: none; background-color: #FAFAFA; font-family: Geneva, Arial, Helvetica, sans-serif; font-size: 13px; } LucenePlusPlus-rel_3.0.4/doc/helpfooter.htm.cmake000066400000000000000000000001451217574114600220200ustar00rootroot00000000000000

clucene.sourceforge.net

LucenePlusPlus-rel_3.0.4/doc/helpheader.htm.cmake000066400000000000000000000010531217574114600217510ustar00rootroot00000000000000 Lucene++ API Documentation (Version @LUCENE++_SOVERSION@)

Lucene++ - a full-featured, c++ search engine
API Documentation


LucenePlusPlus-rel_3.0.4/doxygen/000077500000000000000000000000001217574114600167705ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/doxygen/lucene++000066400000000000000000001765761217574114600203420ustar00rootroot00000000000000# Doxyfile 1.6.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded # by quotes) that should identify the project. PROJECT_NAME = Lucene++ # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. PROJECT_NUMBER = 3.0.4 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = C:/Alan/lucene++/docs # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = NO # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. STRIP_FROM_PATH = /Users/dimitri/doxygen/mail/1.5.7/doxywizard/ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful is your file systems # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = YES # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = YES # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it parses. # With this tag you can assign which parser to use for a given extension. # Doxygen has a built-in mapping, but you can override or extend it using this tag. # The format is ext=language, where ext is a file extension, and language is one of # the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, # Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat # .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), # use: inc=Fortran f=C. Note that for custom extensions you also need to set # FILE_PATTERNS otherwise the files are not read by doxygen. EXTENSION_MAPPING = # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = YES # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate getter # and setter methods for a property. Setting this option to YES (the default) # will make doxygen to replace the get and set methods by a property in the # documentation. This will only work if the methods are indeed getting or # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = YES # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to # determine which symbols to keep in memory and which to flush to disk. # When the cache is full, less often used symbols will be written to disk. # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time # causing a significant performance penality. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on # a logarithmic scale so increasing the size by one will rougly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols SYMBOL_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = YES # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespace are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = YES # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen # will sort the (brief and detailed) documentation of class members so that # constructors and destructors are listed first. If set to NO (the default) # the constructors will appear in the respective orders defined by # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = NO # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = NO # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = NO # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if sectionname ... \endif. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or define consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and defines in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # If the sources in your project are distributed over multiple directories # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy # in the documentation. The default is NO. SHOW_DIRECTORIES = NO # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by # doxygen. The layout file controls the global structure of the generated output files # in an output format independent way. The create the layout file that represents # doxygen's defaults, run doxygen with the -l option. You can optionally specify a # file name after the option, if omitted DoxygenLayout.xml will be used as the name # of the layout file. LAYOUT_FILE = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = NO # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be abled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = C:/Alan/lucene++ # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 FILE_PATTERNS = *.cc \ *.cxx \ *.cpp \ *.c++ \ *.d \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.idl \ *.odl \ *.cs \ *.php \ *.php3 \ *.inc \ *.m \ *.mm \ *.dox \ *.py \ *.f90 \ *.f \ *.vhd \ *.vhdl # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used select whether or not files or # directories that are symbolic links (a Unix filesystem feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = */test/* \ */md5/* \ */nedmalloc/* \ */utf8/* \ */zlib/* # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER # is applied to all files. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C and C++ comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = YES # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If the tag is left blank doxygen # will generate a default style sheet. Note that doxygen will try to copy # the style sheet file to the HTML output directory, so don't put your own # stylesheet in the HTML output directory as well, or it will be erased! HTML_STYLESHEET = # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, # files or namespaces will be aligned in HTML using tables. If set to # NO a bullet list will be used. HTML_ALIGN_MEMBERS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. For this to work a browser that supports # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). HTML_DYNAMIC_SECTIONS = YES # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. GENERATE_DOCSET = NO # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Doxygen generated docs" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = org.doxygen.Project # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING # is used to encode HtmlHelp index (hhk), content (hhc) and project file # content. CHM_INDEX_ENCODING = # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER # are set, an additional index file will be generated that can be used as input for # Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated # HTML documentation. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can # be used to specify the file name of the resulting .qch file. # The path specified is relative to the HTML output folder. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#namespace QHP_NAMESPACE = # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#virtual-folders QHP_VIRTUAL_FOLDER = doc # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. # For more information please see # http://doc.trolltech.com/qthelpproject.html#custom-filters QHP_CUST_FILTER_NAME = # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see # Qt Help Project / Custom Filters. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's # filter section matches. # Qt Help Project / Filter Attributes. QHP_SECT_FILTER_ATTRS = # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can # be used to specify the location of Qt's qhelpgenerator. # If non-empty doxygen will try to run qhelpgenerator on the generated # .qhp file. QHG_LOCATION = # The DISABLE_INDEX tag can be used to turn on/off the condensed index at # top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. DISABLE_INDEX = NO # This tag can be used to set the number of enum values (range [1..20]) # that doxygen will group on one line in the generated HTML documentation. ENUM_VALUES_PER_LINE = 4 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. GENERATE_TREEVIEW = NO # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, # and Class Hierarchy pages using a tree view instead of an ordered list. USE_INLINE_TREES = NO # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 10 # When the SEARCHENGINE tag is enable doxygen will generate a search box # for the HTML output. The underlying search engine uses javascript # and DHTML and should work on any modern browser. Note that when using # HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP) # there is already a search function so this one should typically # be disabled. SEARCHENGINE = YES #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = NO # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, a4wide, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4wide # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = YES # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO # If LATEX_SOURCE_CODE is set to YES then doxygen will include # source code with syntax highlighting in the LaTeX output. # Note that which sources are shown also depends on other settings # such as SOURCE_BROWSER. LATEX_SOURCE_CODE = NO #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load stylesheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. This is useful # if you want to understand what is going on. On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # in the INCLUDE_PATH (see below) will be search if a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all function-like macros that are alone # on a line, have an all uppercase name, and do not end with a semicolon. Such # function macros are typically used for boiler-plate code, and will confuse # the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. # Optionally an initial location of the external documentation # can be added for each tagfile. The format of a tag file without # this location is as follows: # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths or # URLs. If a location is present for each tag, the installdox tool # does not have to be run to correct the links. # Note that each tag file must have a unique name # (where the name does NOT include the path) # If a tag file is not located in the directory in which doxygen # is run, you must also specify the path to the tagfile here. TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option is superseded by the HAVE_DOT option below. This is only a # fallback. It is recommended to install and use dot, since it yields more # powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = NO # By default doxygen will write a font called FreeSans.ttf to the output # directory and reference it in all dot files that doxygen generates. This # font does not include all possible unicode characters however, so when you need # these (or just want a differently looking font) you can specify the font name # using DOT_FONTNAME. You need need to make sure dot is able to find the font, # which can be done by putting it in a standard location or by setting the # DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory # containing the font. DOT_FONTNAME = FreeSans # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. # The default size is 10pt. DOT_FONTSIZE = 10 # By default doxygen will tell dot to use the output directory to look for the # FreeSans.ttf font (which doxygen will put there itself). If you specify a # different font using DOT_FONTNAME you can set the path where dot # can find it using this tag. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # the CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = NO # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are png, jpg, or gif # If left blank png will be used. DOT_IMAGE_FORMAT = png # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = NO # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES LucenePlusPlus-rel_3.0.4/include/000077500000000000000000000000001217574114600167365ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/include/ASCIIFoldingFilter.h000066400000000000000000000100731217574114600224110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ASCIIFOLDINGFILTER_H #define ASCIIFOLDINGFILTER_H #include "TokenFilter.h" namespace Lucene { /// This class converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII /// characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if one exists. /// /// Characters from the following Unicode blocks are converted; however, only those characters with reasonable ASCII /// alternatives are converted: /// /// C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf /// Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf /// Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf /// Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf /// Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf /// Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf /// IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf /// Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf /// Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf /// General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf /// Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf /// Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf /// Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf /// Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf /// Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf /// Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf /// /// See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode /// /// The set of character conversions supported by this class is a superset of those supported by Lucene's {@link /// ISOLatin1AccentFilter} which strips accents from Latin1 characters. For example, 'à' will be replaced by 'a'. /// class LPPAPI ASCIIFoldingFilter : public TokenFilter { public: ASCIIFoldingFilter(TokenStreamPtr input); virtual ~ASCIIFoldingFilter(); LUCENE_CLASS(ASCIIFoldingFilter); protected: CharArray output; int32_t outputPos; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Converts characters above ASCII to their ASCII equivalents. For example, accents are removed from /// accented characters. /// @param input The string to fold /// @param length The number of characters in the input string void foldToASCII(const wchar_t* input, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.4/include/AbstractAllTermDocs.h000066400000000000000000000025261217574114600227510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ABSTRACTALLTERMDOCS_H #define ABSTRACTALLTERMDOCS_H #include "TermDocs.h" namespace Lucene { /// Base class for enumerating all but deleted docs. /// /// NOTE: this class is meant only to be used internally by Lucene; it's only public so it /// can be shared across packages. class LPPAPI AbstractAllTermDocs : public TermDocs, public LuceneObject { public: AbstractAllTermDocs(int32_t maxDoc); virtual ~AbstractAllTermDocs(); LUCENE_CLASS(AbstractAllTermDocs); protected: int32_t maxDoc; int32_t _doc; public: virtual void seek(TermPtr term); virtual void seek(TermEnumPtr termEnum); virtual int32_t doc(); virtual int32_t freq(); virtual bool next(); virtual int32_t read(Collection docs, Collection freqs); virtual bool skipTo(int32_t target); virtual void close(); virtual bool isDeleted(int32_t doc) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/AbstractField.h000066400000000000000000000250001217574114600216130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ABSTRACTFIELD_H #define ABSTRACTFIELD_H #include "Fieldable.h" namespace Lucene { class LPPAPI AbstractField : public Fieldable, public LuceneObject { public: /// Specifies whether and how a field should be stored. enum Store { /// Store the original field value in the index. This is useful for short texts like a document's title /// which should be displayed with the results. The value is stored in its original form, ie. no analyzer /// is used before it is stored. STORE_YES, /// Do not store the field value in the index. STORE_NO }; /// Specifies whether and how a field should be indexed. enum Index { /// Do not index the field value. This field can thus not be searched, but one can still access its /// contents provided it is {@link Field.Store stored}. INDEX_NO, /// Index the tokens produced by running the field's value through an Analyzer. This is useful for /// common text. INDEX_ANALYZED, /// Index the field's value without using an Analyzer, so it can be searched. As no analyzer is used /// the value will be stored as a single term. This is useful for unique Ids like product numbers. INDEX_NOT_ANALYZED, /// Index the field's value without an Analyzer, and also disable the storing of norms. Note that you /// can also separately enable/disable norms by calling {@link Field#setOmitNorms}. No norms means /// that index-time field and document boosting and field length normalization are disabled. The benefit /// is less memory usage as norms take up one byte of RAM per indexed field for every document in the /// index, during searching. Note that once you index a given field with norms enabled, disabling norms /// will have no effect. In other words, for this to have the above described effect on a field, all /// instances of that field must be indexed with NOT_ANALYZED_NO_NORMS from the beginning. INDEX_NOT_ANALYZED_NO_NORMS, /// Index the tokens produced by running the field's value through an Analyzer, and also separately /// disable the storing of norms. See {@link #NOT_ANALYZED_NO_NORMS} for what norms are and why you /// may want to disable them. INDEX_ANALYZED_NO_NORMS }; /// Specifies whether and how a field should have term vectors. enum TermVector { /// Do not store term vectors. TERM_VECTOR_NO, /// Store the term vectors of each document. A term vector is a list of the document's terms and their /// number of occurrences in that document. TERM_VECTOR_YES, /// Store the term vector + token position information /// @see #YES TERM_VECTOR_WITH_POSITIONS, /// Store the term vector + token offset information /// @see #YES TERM_VECTOR_WITH_OFFSETS, /// Store the term vector + token position and offset information /// @see #YES /// @see #WITH_POSITIONS /// @see #WITH_OFFSETS TERM_VECTOR_WITH_POSITIONS_OFFSETS }; public: virtual ~AbstractField(); LUCENE_CLASS(AbstractField); protected: AbstractField(); AbstractField(const String& name, Store store, Index index, TermVector termVector); String _name; bool storeTermVector; bool storeOffsetWithTermVector; bool storePositionWithTermVector; bool _omitNorms; bool _isStored; bool _isIndexed; bool _isTokenized; bool _isBinary; bool lazy; bool omitTermFreqAndPositions; double boost; // the data object for all different kind of field values FieldsData fieldsData; // pre-analyzed tokenStream for indexed fields TokenStreamPtr tokenStream; // length/offset for all primitive types int32_t binaryLength; int32_t binaryOffset; public: /// Sets the boost factor hits on this field. This value will be multiplied into the score of all /// hits on this this field of this document. /// /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field. /// If a document has multiple fields with the same name, all such values are multiplied together. /// This product is then used to compute the norm factor for the field. By default, in the {@link /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)} /// before it is stored in the index. One should attempt to ensure that this product does not overflow /// the range of that encoding. /// /// @see Document#setBoost(double) /// @see Similarity#computeNorm(String, FieldInvertState) /// @see Similarity#encodeNorm(double) virtual void setBoost(double boost); /// Returns the boost factor for hits for this field. /// /// The default value is 1.0. /// /// Note: this value is not stored directly with the document in the index. Documents returned from /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value /// present as when this field was indexed. virtual double getBoost(); /// Returns the name of the field as an interned string. For example "date", "title", "body", ... virtual String name(); /// True if the value of the field is to be stored in the index for return with search hits. It is an /// error for this to be true if a field is Reader-valued. virtual bool isStored(); /// True if the value of the field is to be indexed, so that it may be searched on. virtual bool isIndexed(); /// True if the value of the field should be tokenized as text prior to indexing. Un-tokenized fields /// are indexed as a single word and may not be Reader-valued. virtual bool isTokenized(); /// True if the term or terms used to index this field are stored as a term vector, available from /// {@link IndexReader#getTermFreqVector(int,String)}. These methods do not provide access to the /// original content of the field, only to terms used to index it. If the original content must be /// preserved, use the stored attribute instead. virtual bool isTermVectorStored(); /// True if terms are stored as term vector together with their offsets (start and end position in /// source text). virtual bool isStoreOffsetWithTermVector(); /// True if terms are stored as term vector together with their token positions. virtual bool isStorePositionWithTermVector(); /// True if the value of the field is stored as binary. virtual bool isBinary(); /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. /// @return reference to the Field value as byte[]. virtual ByteArray getBinaryValue(); /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. /// @return reference to the Field value as byte[]. virtual ByteArray getBinaryValue(ByteArray result); /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is /// undefined. /// @return length of byte[] segment that represents this Field value. virtual int32_t getBinaryLength(); /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is /// undefined. /// @return index of the first character in byte[] segment that represents this Field value. virtual int32_t getBinaryOffset(); /// True if norms are omitted for this indexed field. virtual bool getOmitNorms(); /// @see #setOmitTermFreqAndPositions virtual bool getOmitTermFreqAndPositions(); /// If set, omit normalization factors associated with this indexed field. /// This effectively disables indexing boosts and length normalization for this field. virtual void setOmitNorms(bool omitNorms); /// If set, omit term freq, positions and payloads from postings for this field. /// /// NOTE: While this option reduces storage space required in the index, it also means any query requiring /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail /// to find results. virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions); /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()} /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open. /// /// @return true if this field can be loaded lazily virtual bool isLazy(); /// Prints a Field for human consumption. virtual String toString(); protected: void setStoreTermVector(TermVector termVector); }; } #endif LucenePlusPlus-rel_3.0.4/include/AllTermDocs.h000066400000000000000000000014021217574114600212550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ALLTERMDOCS_H #define ALLTERMDOCS_H #include "AbstractAllTermDocs.h" namespace Lucene { class AllTermDocs : public AbstractAllTermDocs { public: AllTermDocs(SegmentReaderPtr parent); virtual ~AllTermDocs(); LUCENE_CLASS(AllTermDocs); protected: BitVectorWeakPtr _deletedDocs; public: virtual bool isDeleted(int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.4/include/Analyzer.h000066400000000000000000000071301217574114600206750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ANALYZER_H #define ANALYZER_H #include "CloseableThreadLocal.h" namespace Lucene { /// An Analyzer builds TokenStreams, which analyze text. It thus represents a policy for extracting index terms /// from text. /// /// Typical implementations first build a Tokenizer, which breaks the stream of characters from the Reader into /// raw Tokens. One or more TokenFilters may then be applied to the output of the Tokenizer. class LPPAPI Analyzer : public LuceneObject { public: virtual ~Analyzer(); LUCENE_CLASS(Analyzer); protected: CloseableThreadLocal tokenStreams; public: /// Creates a TokenStream which tokenizes all the text in the provided Reader. Must be able to handle null /// field name for backward compatibility. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) = 0; /// Creates a TokenStream that is allowed to be re-used from the previous time that the same thread called /// this method. Callers that do not need to use more than one TokenStream at the same time from this analyzer /// should use this method for better performance. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); /// Invoked before indexing a Fieldable instance if terms have already been added to that field. This allows /// custom analyzers to place an automatic position increment gap between Fieldable instances using the same /// field name. The default value position increment gap is 0. With a 0 position increment gap and the typical /// default token position increment of 1, all terms in a field, including across Fieldable instances, are in /// successive positions, allowing exact PhraseQuery matches, for instance, across Fieldable instance boundaries. /// /// @param fieldName Fieldable name being indexed. /// @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)} virtual int32_t getPositionIncrementGap(const String& fieldName); /// Just like {@link #getPositionIncrementGap}, except for Token offsets instead. By default this returns 1 for /// tokenized fields and, as if the fields were joined with an extra space character, and 0 for un-tokenized /// fields. This method is only called if the field produced at least one token for indexing. /// /// @param field the field just indexed /// @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)} virtual int32_t getOffsetGap(FieldablePtr field); /// Frees persistent resources used by this Analyzer virtual void close(); protected: /// Used by Analyzers that implement reusableTokenStream to retrieve previously saved TokenStreams for re-use /// by the same thread. virtual LuceneObjectPtr getPreviousTokenStream(); /// Used by Analyzers that implement reusableTokenStream to save a TokenStream for later re-use by the /// same thread. virtual void setPreviousTokenStream(LuceneObjectPtr stream); }; } #endif LucenePlusPlus-rel_3.0.4/include/Array.h000066400000000000000000000071601217574114600201710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARRAY_H #define ARRAY_H #include #include "Lucene.h" namespace Lucene { template class ArrayData { public: ArrayData(int32_t size) { data = NULL; resize(size); } ~ArrayData() { resize(0); } public: TYPE* data; int32_t size; public: void resize(int32_t size) { if (size == 0) { FreeMemory(data); data = NULL; } else if (data == NULL) data = (TYPE*)AllocMemory(size * sizeof(TYPE)); else data = (TYPE*)ReallocMemory(data, size * sizeof(TYPE)); this->size = size; } }; /// Utility template class to handle sharable arrays of simple data types template class Array { public: typedef Array this_type; typedef ArrayData array_type; Array() { array = NULL; } protected: boost::shared_ptr container; array_type* array; public: static this_type newInstance(int32_t size) { this_type instance; instance.container = Lucene::newInstance(size); instance.array = instance.container.get(); return instance; } void reset() { resize(0); } void resize(int32_t size) { if (size == 0) container.reset(); else if (!container) container = Lucene::newInstance(size); else container->resize(size); array = container.get(); } TYPE* get() const { return array->data; } int32_t size() const { return array->size; } bool equals(const this_type& other) const { if (array->size != other.array->size) return false; return (std::memcmp(array->data, other.array->data, array->size) == 0); } int32_t hashCode() const { return (int32_t)(int64_t)array; } TYPE& operator[] (int32_t i) const { BOOST_ASSERT(i >= 0 && i < array->size); return array->data[i]; } operator bool () const { return container; } bool operator! () const { return !container; } bool operator== (const Array& other) { return (container == other.container); } bool operator!= (const Array& other) { return (container != other.container); } }; template inline std::size_t hash_value(const Array& value) { return (std::size_t)value.hashCode(); } template inline bool operator== (const Array& value1, const Array& value2) { return (value1.hashCode() == value2.hashCode()); } } #endif LucenePlusPlus-rel_3.0.4/include/Attribute.h000066400000000000000000000042571217574114600210620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ATTRIBUTE_H #define ATTRIBUTE_H #include "LuceneObject.h" namespace Lucene { /// Base class for Attributes that can be added to a {@link AttributeSource}. /// /// Attributes are used to add data in a dynamic, yet type-safe way to a source of usually streamed objects, /// eg. a {@link TokenStream}. class LPPAPI Attribute : public LuceneObject { public: virtual ~Attribute(); LUCENE_CLASS(Attribute); public: /// Clears the values in this Attribute and resets it to its default value. If this implementation /// implements more than one Attribute interface it clears all. virtual void clear() = 0; /// Subclasses must implement this method and should compute a hashCode similar to this: /// /// int32_t hashCode() /// { /// int32_t code = startOffset; /// code = code * 31 + endOffset; /// return code; /// } /// /// see also {@link #equals(Object)} virtual int32_t hashCode() = 0; /// All values used for computation of {@link #hashCode()} should be checked here for equality. /// /// see also {@link LuceneObject#equals(Object)} virtual bool equals(LuceneObjectPtr other) = 0; /// Copies the values from this Attribute into the passed-in target attribute. The target implementation /// must support all the Attributes this implementation supports. virtual void copyTo(AttributePtr target) = 0; /// Shallow clone. Subclasses must override this if they need to clone any members deeply. /// @param base clone reference - null when called initially, then set in top virtual override. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/AttributeSource.h000066400000000000000000000177401217574114600222440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ATTRIBUTESOURCE_H #define ATTRIBUTESOURCE_H #include "LuceneObject.h" namespace Lucene { class LPPAPI AttributeFactory : public LuceneObject { protected: AttributeFactory(); public: virtual ~AttributeFactory(); LUCENE_CLASS(AttributeFactory); public: /// returns an {@link Attribute}. virtual AttributePtr createAttributeInstance(const String& className); template AttributePtr createInstance(const String& className) { AttributePtr attrImpl = createAttributeInstance(className); return attrImpl ? attrImpl : newLucene(); } /// This is the default factory that creates {@link Attribute}s using the class name of the supplied /// {@link Attribute} interface class by appending Impl to it. static AttributeFactoryPtr DEFAULT_ATTRIBUTE_FACTORY(); }; /// An AttributeSource contains a list of different {@link Attribute}s, and methods to add and get them. /// There can only be a single instance of an attribute in the same AttributeSource instance. This is ensured /// by passing in the actual type of the Attribute (Class) to the {@link #addAttribute(Class)}, /// which then checks if an instance of that type is already present. If yes, it returns the instance, otherwise /// it creates a new instance and returns it. class LPPAPI AttributeSource : public LuceneObject { public: /// An AttributeSource using the default attribute factory {@link DefaultAttributeFactory}. AttributeSource(); /// An AttributeSource that uses the same attributes as the supplied one. AttributeSource(AttributeSourcePtr input); /// An AttributeSource using the supplied {@link AttributeFactory} for creating new {@link Attribute} /// instances. AttributeSource(AttributeFactoryPtr factory); virtual ~AttributeSource(); LUCENE_CLASS(AttributeSource); protected: AttributeFactoryPtr factory; MapStringAttribute attributes; AttributeSourceStatePtr currentState; public: /// returns the used AttributeFactory. AttributeFactoryPtr getAttributeFactory(); /// This method first checks if an instance of that class is already in this AttributeSource and returns it. /// Otherwise a new instance is created, added to this AttributeSource and returned. template boost::shared_ptr addAttribute() { String className(ATTR::_getClassName()); boost::shared_ptr attrImpl(boost::dynamic_pointer_cast(getAttribute(className))); if (!attrImpl) { attrImpl = boost::dynamic_pointer_cast(factory->createInstance(className)); if (!attrImpl) boost::throw_exception(IllegalArgumentException(L"Could not instantiate implementing class for " + className)); addAttribute(className, attrImpl); } return attrImpl; } /// Adds a custom Attribute instance. void addAttribute(const String& className, AttributePtr attrImpl); /// Returns true if this AttributeSource has any attributes. bool hasAttributes(); /// Returns true, if this AttributeSource contains the passed-in Attribute. template bool hasAttribute() { return getAttribute(ATTR::_getClassName()); } /// Returns the instance of the passed in Attribute contained in this AttributeSource. template boost::shared_ptr getAttribute() { String className(ATTR::_getClassName()); boost::shared_ptr attr(boost::dynamic_pointer_cast(getAttribute(className))); if (!attr) boost::throw_exception(IllegalArgumentException(L"This AttributeSource does not have the attribute '" + className + L"'.")); return attr; } /// Resets all Attributes in this AttributeSource by calling {@link AttributeImpl#clear()} on each Attribute /// implementation. void clearAttributes(); /// Captures the state of all Attributes. The return value can be passed to {@link #restoreState} to restore /// the state of this or another AttributeSource. AttributeSourceStatePtr captureState(); /// Restores this state by copying the values of all attribute implementations that this state contains into /// the attributes implementations of the targetStream. The targetStream must contain a corresponding instance /// for each argument contained in this state (eg. it is not possible to restore the state of an AttributeSource /// containing a TermAttribute into a AttributeSource using a Token instance as implementation). /// /// Note that this method does not affect attributes of the targetStream that are not contained in this state. /// In other words, if for example the targetStream contains an OffsetAttribute, but this state doesn't, then /// the value of the OffsetAttribute remains unchanged. It might be desirable to reset its value to the default, /// in which case the caller should first call {@link TokenStream#clearAttributes()} on the targetStream. void restoreState(AttributeSourceStatePtr state); /// Return hash code for this object. virtual int32_t hashCode(); /// Return whether two objects are equal virtual bool equals(LuceneObjectPtr other); /// Returns a string representation of the object virtual String toString(); /// Performs a clone of all {@link AttributeImpl} instances returned in a new AttributeSource instance. This /// method can be used to eg. create another TokenStream with exactly the same attributes (using {@link /// #AttributeSource(AttributeSource)}) AttributeSourcePtr cloneAttributes(); /// Return a vector of attributes based on currentState. Collection getAttributes(); protected: /// The caller must pass in a className value. /// This method checks if an instance of that class is already in this AttributeSource and returns it. AttributePtr getAttribute(const String& className); /// Returns true, if this AttributeSource contains the passed-in Attribute. bool hasAttribute(const String& className); void computeCurrentState(); }; class LPPAPI DefaultAttributeFactory : public AttributeFactory { public: virtual ~DefaultAttributeFactory(); LUCENE_CLASS(DefaultAttributeFactory); public: /// returns an {@link Attribute}. virtual AttributePtr createAttributeInstance(const String& className); }; /// This class holds the state of an AttributeSource. /// @see #captureState /// @see #restoreState class LPPAPI AttributeSourceState : public LuceneObject { public: virtual ~AttributeSourceState(); LUCENE_CLASS(AttributeSourceState); protected: AttributePtr attribute; AttributeSourceStatePtr next; public: virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); friend class AttributeSource; }; } #endif LucenePlusPlus-rel_3.0.4/include/AveragePayloadFunction.h000066400000000000000000000022721217574114600235040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef AVERAGEPAYLOADFUNCTION_H #define AVERAGEPAYLOADFUNCTION_H #include "PayloadFunction.h" namespace Lucene { /// Calculate the final score as the average score of all payloads seen. /// /// Is thread safe and completely reusable. class LPPAPI AveragePayloadFunction : public PayloadFunction { public: virtual ~AveragePayloadFunction(); LUCENE_CLASS(AveragePayloadFunction); public: virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore); virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); }; } #endif LucenePlusPlus-rel_3.0.4/include/Base64.h000066400000000000000000000015371217574114600201410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BASE64_H #define BASE64_H #include "LuceneObject.h" namespace Lucene { class Base64 : public LuceneObject { public: virtual ~Base64(); LUCENE_CLASS(Base64); protected: static const String BASE64_CHARS; public: static String encode(ByteArray bytes); static String encode(const uint8_t* bytes, int32_t length); static ByteArray decode(const String& str); protected: static bool isBase64(wchar_t ch); }; } #endif LucenePlusPlus-rel_3.0.4/include/BaseCharFilter.h000066400000000000000000000022421217574114600217250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BASECHARFILTER_H #define BASECHARFILTER_H #include "CharFilter.h" namespace Lucene { /// Base utility class for implementing a {@link CharFilter}. You subclass this, and then record mappings by /// calling {@link #addOffCorrectMap}, and then invoke the correct method to correct an offset. class LPPAPI BaseCharFilter : public CharFilter { public: BaseCharFilter(CharStreamPtr in); virtual ~BaseCharFilter(); LUCENE_CLASS(BaseCharFilter); protected: IntArray offsets; IntArray diffs; int32_t size; protected: /// Retrieve the corrected offset. virtual int32_t correct(int32_t currentOff); int32_t getLastCumulativeDiff(); void addOffCorrectMap(int32_t off, int32_t cumulativeDiff); }; } #endif LucenePlusPlus-rel_3.0.4/include/BitSet.h000066400000000000000000000044431217574114600203060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BITSET_H #define BITSET_H #include #include "LuceneObject.h" namespace Lucene { class LPPAPI BitSet : public LuceneObject { public: BitSet(uint32_t size = 0); virtual ~BitSet(); LUCENE_CLASS(BitSet); protected: typedef boost::dynamic_bitset< uint64_t, LuceneAllocator > bitset_type; bitset_type bitSet; public: const uint64_t* getBits(); void clear(); void clear(uint32_t bitIndex); void fastClear(uint32_t bitIndex); void clear(uint32_t fromIndex, uint32_t toIndex); void fastClear(uint32_t fromIndex, uint32_t toIndex); void set(uint32_t bitIndex); void fastSet(uint32_t bitIndex); void set(uint32_t bitIndex, bool value); void fastSet(uint32_t bitIndex, bool value); void set(uint32_t fromIndex, uint32_t toIndex); void fastSet(uint32_t fromIndex, uint32_t toIndex); void set(uint32_t fromIndex, uint32_t toIndex, bool value); void fastSet(uint32_t fromIndex, uint32_t toIndex, bool value); void flip(uint32_t bitIndex); void fastFlip(uint32_t bitIndex); void flip(uint32_t fromIndex, uint32_t toIndex); void fastFlip(uint32_t fromIndex, uint32_t toIndex); uint32_t size() const; uint32_t numBlocks() const; bool isEmpty() const; bool get(uint32_t bitIndex) const; bool fastGet(uint32_t bitIndex) const; int32_t nextSetBit(uint32_t fromIndex) const; void _and(BitSetPtr set); void _or(BitSetPtr set); void _xor(BitSetPtr set); void andNot(BitSetPtr set); bool intersectsBitSet(BitSetPtr set) const; uint32_t cardinality(); void resize(uint32_t size); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/BitUtil.h000066400000000000000000000061211217574114600204630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BITUTIL_H #define BITUTIL_H #include "LuceneObject.h" namespace Lucene { /// A variety of high efficiency bit twiddling routines. class LPPAPI BitUtil : public LuceneObject { public: virtual ~BitUtil(); LUCENE_CLASS(BitUtil); public: /// Table of number of trailing zeros in a byte static const uint8_t ntzTable[]; public: /// Returns the number of bits set in the long static int32_t pop(int64_t x); /// Returns the number of set bits in an array of longs. static int64_t pop_array(const int64_t* A, int32_t wordOffset, int32_t numWords); /// Returns the popcount or cardinality of the two sets after an intersection. Neither array is modified. static int64_t pop_intersect(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); /// Returns the popcount or cardinality of the union of two sets. Neither array is modified. static int64_t pop_union(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); /// Returns the popcount or cardinality of A & ~B. Neither array is modified. static int64_t pop_andnot(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); /// Returns the popcount or cardinality of A ^ B. Neither array is modified. static int64_t pop_xor(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); /// Returns number of trailing zeros in a 64 bit long value. static int32_t ntz(int64_t val); /// Returns number of trailing zeros in a 32 bit int value. static int32_t ntz(int32_t val); /// Returns 0 based index of first set bit (only works for x!=0) /// This is an alternate implementation of ntz() static int32_t ntz2(int64_t x); /// Returns 0 based index of first set bit. /// This is an alternate implementation of ntz() static int32_t ntz3(int64_t x); /// Returns true if v is a power of two or zero. static bool isPowerOfTwo(int32_t v); /// Returns true if v is a power of two or zero. static bool isPowerOfTwo(int64_t v); /// Returns the next highest power of two, or the current value if it's already a power of two or zero. static int32_t nextHighestPowerOfTwo(int32_t v); /// Returns the next highest power of two, or the current value if it's already a power of two or zero. static int64_t nextHighestPowerOfTwo(int64_t v); protected: inline static void CSA(int64_t& h, int64_t& l, int64_t a, int64_t b, int64_t c); }; } #endif LucenePlusPlus-rel_3.0.4/include/BitVector.h000066400000000000000000000062101217574114600210070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BITVECTOR_H #define BITVECTOR_H #include "LuceneObject.h" namespace Lucene { /// Optimized implementation of a vector of bits. class LPPAPI BitVector : public LuceneObject { public: /// Constructs a vector capable of holding n bits. BitVector(int32_t n = 0); BitVector(ByteArray bits, int32_t size); /// Constructs a bit vector from the file name in Directory d, /// as written by the {@link #write} method. BitVector(DirectoryPtr d, const String& name); virtual ~BitVector(); LUCENE_CLASS(BitVector); protected: ByteArray bits; int32_t _size; int32_t _count; static const uint8_t BYTE_COUNTS[]; // table of bits/byte public: /// Clone this vector virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Sets the value of bit to one. void set(int32_t bit); /// Sets the value of bit to true, and returns true if bit was already set. bool getAndSet(int32_t bit); /// Sets the value of bit to zero. void clear(int32_t bit); /// Returns true if bit is one and false if it is zero. bool get(int32_t bit); /// Returns the number of bits in this vector. This is also one greater than /// the number of the largest valid bit number. int32_t size(); /// Returns the total number of one bits in this vector. This is efficiently /// computed and cached, so that, if the vector is not changed, no recomputation /// is done for repeated calls. int32_t count(); /// For testing int32_t getRecomputedCount(); /// Writes this vector to the file name in Directory d, in a format that can /// be read by the constructor {@link #BitVector(DirectoryPtr, const String&)}. void write(DirectoryPtr d, const String& name); /// Retrieve a subset of this BitVector. /// @param start starting index, inclusive /// @param end ending index, exclusive /// @return subset BitVectorPtr subset(int32_t start, int32_t end); protected: /// Write as a bit set. void writeBits(IndexOutputPtr output); /// Write as a d-gaps list. void writeDgaps(IndexOutputPtr output); /// Indicates if the bit vector is sparse and should be saved as a d-gaps list, /// or dense, and should be saved as a bit set. bool isSparse(); /// Read as a bit set. void readBits(IndexInputPtr input); /// Read as a d-gaps list. void readDgaps(IndexInputPtr input); }; } #endif LucenePlusPlus-rel_3.0.4/include/BooleanClause.h000066400000000000000000000037041217574114600216270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BOOLEANCLAUSE_H #define BOOLEANCLAUSE_H #include "LuceneObject.h" namespace Lucene { /// A clause in a BooleanQuery. class LPPAPI BooleanClause : public LuceneObject { public: /// Specifies how clauses are to occur in matching documents. enum Occur { /// Use this operator for clauses that must appear in the matching documents. MUST, /// Use this operator for clauses that should appear in the matching documents. For a BooleanQuery /// with no MUST clauses one or more SHOULD clauses must match a document for the BooleanQuery to match. /// @see BooleanQuery#setMinimumNumberShouldMatch SHOULD, /// Use this operator for clauses that must not appear in the matching documents. Note that it is not /// possible to search for queries that only consist of a MUST_NOT clause. MUST_NOT }; public: BooleanClause(QueryPtr query, Occur occur); virtual ~BooleanClause(); LUCENE_CLASS(BooleanClause); protected: /// The query whose matching documents are combined by the boolean query. QueryPtr query; Occur occur; public: Occur getOccur(); void setOccur(Occur occur); QueryPtr getQuery(); void setQuery(QueryPtr query); bool isProhibited(); bool isRequired(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/BooleanQuery.h000066400000000000000000000100721217574114600215140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BOOLEANQUERY_H #define BOOLEANQUERY_H #include "Query.h" #include "BooleanClause.h" #include "Weight.h" namespace Lucene { /// A Query that matches documents matching boolean combinations of other queries, eg. {@link TermQuery}s, /// {@link PhraseQuery}s or other BooleanQuerys. class LPPAPI BooleanQuery : public Query { public: /// Constructs an empty boolean query. /// /// {@link Similarity#coord(int32_t, int32_t)} may be disabled in scoring, as appropriate. For example, /// this score factor does not make sense for most automatically generated queries, like {@link WildcardQuery} /// and {@link FuzzyQuery}. /// /// @param disableCoord disables {@link Similarity#coord(int32_t, int32_t)} in scoring. BooleanQuery(bool disableCoord = false); virtual ~BooleanQuery(); LUCENE_CLASS(BooleanQuery); protected: static int32_t maxClauseCount; Collection clauses; bool disableCoord; int32_t minNrShouldMatch; public: using Query::toString; /// Return the maximum number of clauses permitted, 1024 by default. Attempts to add more than the permitted /// number of clauses cause TooManyClauses to be thrown. /// @see #setMaxClauseCount(int32_t) static int32_t getMaxClauseCount(); /// Set the maximum number of clauses permitted per BooleanQuery. Default value is 1024. static void setMaxClauseCount(int32_t maxClauseCount); /// Returns true if {@link Similarity#coord(int32_t, int32_t)} is disabled in scoring for this query instance. /// @see #BooleanQuery(bool) bool isCoordDisabled(); /// Implement coord disabling. virtual SimilarityPtr getSimilarity(SearcherPtr searcher); /// Specifies a minimum number of the optional BooleanClauses which must be satisfied. /// /// By default no optional clauses are necessary for a match (unless there are no required clauses). If this /// method is used, then the specified number of clauses is required. /// /// Use of this method is totally independent of specifying that any specific clauses are required (or prohibited). /// This number will only be compared against the number of matching optional clauses. /// /// @param min the number of optional clauses that must match void setMinimumNumberShouldMatch(int32_t min); /// Gets the minimum number of the optional BooleanClauses which must be satisfied. int32_t getMinimumNumberShouldMatch(); /// Adds a clause to a boolean query. /// @see #getMaxClauseCount() void add(QueryPtr query, BooleanClause::Occur occur); /// Adds a clause to a boolean query. /// @see #getMaxClauseCount() void add(BooleanClausePtr clause); /// Returns the set of clauses in this query. Collection getClauses(); /// Returns an iterator on the clauses in this query. Collection::iterator begin(); Collection::iterator end(); virtual WeightPtr createWeight(SearcherPtr searcher); virtual QueryPtr rewrite(IndexReaderPtr reader); virtual void extractTerms(SetTerm terms); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); friend class BooleanWeight; }; } #endif LucenePlusPlus-rel_3.0.4/include/BooleanScorer.h000066400000000000000000000132571217574114600216540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BOOLEANSCORER_H #define BOOLEANSCORER_H #include "Scorer.h" #include "Collector.h" namespace Lucene { /// BooleanScorer uses a ~16k array to score windows of docs. So it scores docs 0-16k first, then docs 16-32k, /// etc. For each window it iterates through all query terms and accumulates a score in table[doc%16k]. It also /// stores in the table a bitmask representing which terms contributed to the score. Non-zero scores are chained /// in a linked list. At the end of scoring each window it then iterates through the linked list and, if the /// bitmask matches the boolean constraints, collects a hit. For boolean queries with lots of frequent terms this /// can be much faster, since it does not need to update a priority queue for each posting, instead performing /// constant-time operations per posting. The only downside is that it results in hits being delivered out-of-order /// within the window, which means it cannot be nested within other scorers. But it works well as a top-level scorer. /// /// The new BooleanScorer2 implementation instead works by merging priority queues of postings, albeit with some /// clever tricks. For example, a pure conjunction (all terms required) does not require a priority queue. Instead it /// sorts the posting streams at the start, then repeatedly skips the first to to the last. If the first ever equals /// the last, then there's a hit. When some terms are required and some terms are optional, the conjunction can /// be evaluated first, then the optional terms can all skip to the match and be added to the score. Thus the /// conjunction can reduce the number of priority queue updates for the optional terms. class BooleanScorer : public Scorer { public: BooleanScorer(SimilarityPtr similarity, int32_t minNrShouldMatch, Collection optionalScorers, Collection prohibitedScorers); virtual ~BooleanScorer(); LUCENE_CLASS(BooleanScorer); protected: SubScorerPtr scorers; BucketTablePtr bucketTable; int32_t maxCoord; Collection coordFactors; int32_t requiredMask; int32_t prohibitedMask; int32_t nextMask; int32_t minNrShouldMatch; int32_t end; BucketPtr current; int32_t doc; protected: // firstDocID is ignored since nextDoc() initializes 'current' virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); public: virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); virtual void score(CollectorPtr collector); virtual String toString(); }; class BooleanScorerCollector : public Collector { public: BooleanScorerCollector(int32_t mask, BucketTablePtr bucketTable); virtual ~BooleanScorerCollector(); LUCENE_CLASS(BooleanScorerCollector); protected: BucketTableWeakPtr _bucketTable; int32_t mask; ScorerWeakPtr _scorer; public: virtual void collect(int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setScorer(ScorerPtr scorer); virtual bool acceptsDocsOutOfOrder(); }; // An internal class which is used in score(Collector, int32_t) for setting the current score. This is required // since Collector exposes a setScorer method and implementations that need the score will call scorer->score(). // Therefore the only methods that are implemented are score() and doc(). class BucketScorer : public Scorer { public: BucketScorer(); virtual ~BucketScorer(); LUCENE_CLASS(BucketScorer); public: double _score; int32_t doc; public: virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); }; class Bucket : public LuceneObject { public: Bucket(); virtual ~Bucket(); LUCENE_CLASS(Bucket); public: int32_t doc; // tells if bucket is valid double score; // incremental score int32_t bits; // used for bool constraints int32_t coord; // count of terms in score BucketWeakPtr _next; // next valid bucket }; /// A simple hash table of document scores within a range. class BucketTable : public LuceneObject { public: BucketTable(); virtual ~BucketTable(); LUCENE_CLASS(BucketTable); public: static const int32_t SIZE; static const int32_t MASK; Collection buckets; BucketPtr first; // head of valid list public: CollectorPtr newCollector(int32_t mask); int32_t size(); }; class SubScorer : public LuceneObject { public: SubScorer(ScorerPtr scorer, bool required, bool prohibited, CollectorPtr collector, SubScorerPtr next); virtual ~SubScorer(); LUCENE_CLASS(SubScorer); public: ScorerPtr scorer; bool required; bool prohibited; CollectorPtr collector; SubScorerPtr next; }; } #endif LucenePlusPlus-rel_3.0.4/include/BooleanScorer2.h000066400000000000000000000143571217574114600217400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BOOLEANSCORER2_H #define BOOLEANSCORER2_H #include "DisjunctionSumScorer.h" #include "ConjunctionScorer.h" namespace Lucene { /// See the description in BooleanScorer, comparing BooleanScorer & BooleanScorer2 /// /// An alternative to BooleanScorer that also allows a minimum number of optional scorers that should match. /// Implements skipTo(), and has no limitations on the numbers of added scorers. /// Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer. class BooleanScorer2 : public Scorer { public: /// Creates a {@link Scorer} with the given similarity and lists of required, prohibited and optional /// scorers. In no required scorers are added, at least one of the optional scorers will have to match /// during the search. /// /// @param similarity The similarity to be used. /// @param minNrShouldMatch The minimum number of optional added scorers that should match during the search. /// In case no required scorers are added, at least one of the optional scorers will have to match during /// the search. /// @param required The list of required scorers. /// @param prohibited The list of prohibited scorers. /// @param optional The list of optional scorers. BooleanScorer2(SimilarityPtr similarity, int32_t minNrShouldMatch, Collection required, Collection prohibited, Collection optional); virtual ~BooleanScorer2(); LUCENE_CLASS(BooleanScorer2); protected: Collection requiredScorers; Collection optionalScorers; Collection prohibitedScorers; CoordinatorPtr coordinator; /// The scorer to which all scoring will be delegated, except for computing and using the coordination factor. ScorerPtr countingSumScorer; int32_t minNrShouldMatch; int32_t doc; public: virtual void initialize(); /// Scores and collects all matching documents. /// @param collector The collector to which all matching documents are passed through. virtual void score(CollectorPtr collector); virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); virtual int32_t advance(int32_t target); protected: ScorerPtr countingDisjunctionSumScorer(Collection scorers, int32_t minNrShouldMatch); ScorerPtr countingConjunctionSumScorer(Collection requiredScorers); ScorerPtr dualConjunctionSumScorer(ScorerPtr req1, ScorerPtr req2); /// Returns the scorer to be used for match counting and score summing. Uses requiredScorers, optionalScorers /// and prohibitedScorers. ScorerPtr makeCountingSumScorer(); ScorerPtr makeCountingSumScorerNoReq(); ScorerPtr makeCountingSumScorerSomeReq(); /// Returns the scorer to be used for match counting and score summing. Uses the given required scorer and /// the prohibitedScorers. /// @param requiredCountingSumScorer A required scorer already built. ScorerPtr addProhibitedScorers(ScorerPtr requiredCountingSumScorer); friend class CountingDisjunctionSumScorer; friend class CountingConjunctionSumScorer; }; class Coordinator : public LuceneObject { public: Coordinator(BooleanScorer2Ptr scorer); virtual ~Coordinator(); LUCENE_CLASS(Coordinator); public: BooleanScorer2WeakPtr _scorer; Collection coordFactors; int32_t maxCoord; // to be increased for each non prohibited scorer int32_t nrMatchers; // to be increased by score() of match counting scorers. public: void init(); // use after all scorers have been added. friend class BooleanScorer2; }; /// Count a scorer as a single match. class SingleMatchScorer : public Scorer { public: SingleMatchScorer(ScorerPtr scorer, CoordinatorPtr coordinator); virtual ~SingleMatchScorer(); LUCENE_CLASS(SingleMatchScorer); protected: ScorerPtr scorer; CoordinatorPtr coordinator; int32_t lastScoredDoc; double lastDocScore; public: virtual double score(); virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; class CountingDisjunctionSumScorer : public DisjunctionSumScorer { public: CountingDisjunctionSumScorer(BooleanScorer2Ptr scorer, Collection subScorers, int32_t minimumNrMatchers); virtual ~CountingDisjunctionSumScorer(); LUCENE_CLASS(CountingDisjunctionSumScorer); protected: BooleanScorer2WeakPtr _scorer; int32_t lastScoredDoc; // Save the score of lastScoredDoc, so that we don't compute it more than once in score(). double lastDocScore; public: virtual double score(); friend class BooleanScorer2; }; class CountingConjunctionSumScorer : public ConjunctionScorer { public: CountingConjunctionSumScorer(BooleanScorer2Ptr scorer, SimilarityPtr similarity, Collection scorers); virtual ~CountingConjunctionSumScorer(); LUCENE_CLASS(CountingConjunctionSumScorer); protected: BooleanScorer2WeakPtr _scorer; int32_t lastScoredDoc; int32_t requiredNrMatchers; // Save the score of lastScoredDoc, so that we don't compute it more than once in score(). double lastDocScore; public: virtual double score(); }; } #endif LucenePlusPlus-rel_3.0.4/include/BufferedDeletes.h000066400000000000000000000034341217574114600221430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BUFFEREDDELETES_H #define BUFFEREDDELETES_H #include "Term.h" #include "Query.h" namespace Lucene { /// Holds buffered deletes, by docID, term or query. We hold two instances of this class: one for /// the deletes prior to the last flush, the other for deletes after the last flush. This is so if /// we need to abort (discard all buffered docs) we can also discard the buffered deletes yet keep /// the deletes done during previously flushed segments. class BufferedDeletes : public LuceneObject { public: BufferedDeletes(bool doTermSort); virtual ~BufferedDeletes(); LUCENE_CLASS(BufferedDeletes); public: int32_t numTerms; MapTermNum terms; MapQueryInt queries; Collection docIDs; int64_t bytesUsed; public: int32_t size(); void update(BufferedDeletesPtr in); void clear(); void addBytesUsed(int64_t b); bool any(); void remap(MergeDocIDRemapperPtr mapper, SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergedDocCount); }; /// Number of documents a delete term applies to. class Num : public LuceneObject { public: Num(int32_t num); protected: int32_t num; public: int32_t getNum(); void setNum(int32_t num); }; } #endif LucenePlusPlus-rel_3.0.4/include/BufferedIndexInput.h000066400000000000000000000104401217574114600226400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BUFFEREDINDEXINPUT_H #define BUFFEREDINDEXINPUT_H #include "IndexInput.h" namespace Lucene { /// Base implementation class for buffered {@link IndexInput}. class LPPAPI BufferedIndexInput : public IndexInput { public: /// Construct BufferedIndexInput with a specific bufferSize. BufferedIndexInput(int32_t bufferSize = BUFFER_SIZE); virtual ~BufferedIndexInput(); LUCENE_CLASS(BufferedIndexInput); public: /// Default buffer size. static const int32_t BUFFER_SIZE; protected: int32_t bufferSize; int64_t bufferStart; // position in file of buffer int32_t bufferLength; // end of valid bytes int32_t bufferPosition; // next byte to read ByteArray buffer; public: /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte(); /// Change the buffer size used by this IndexInput. void setBufferSize(int32_t newSize); /// Returns buffer size. /// @see #setBufferSize int32_t getBufferSize(); /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*,int) /// @see #readInternal(uint8_t*, int32_t, int32_t) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Reads a specified number of bytes into an array at the specified offset with control over whether the /// read should be buffered (callers who have their own buffer should pass in "false" for useBuffer). /// Currently only {@link BufferedIndexInput} respects this parameter. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @param useBuffer set to false if the caller will handle buffering. /// @see IndexOutput#writeBytes(const uint8_t*,int) /// @see #readInternal(uint8_t*, int32_t, int32_t) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer); /// Closes the stream to further operations. virtual void close(); /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() /// @see #seekInternal(int64_t) virtual void seek(int64_t pos); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); protected: virtual void newBuffer(ByteArray newBuffer); void checkBufferSize(int32_t bufferSize); /// Refill buffer in preparation for reading. /// @see #readInternal(uint8_t*, int32_t, int32_t) /// @see #seekInternal(int64_t) virtual void refill(); /// Implements buffer refill. Reads bytes from the current position in the input. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. virtual void readInternal(uint8_t* b, int32_t offset, int32_t length) = 0; /// Implements seek. Sets current position in this file, where the next {@link /// #readInternal(uint8_t*, int32_t, int32_t)} will occur. /// @param pos position to set next write. /// @see #readInternal(uint8_t*, int32_t, int32_t) virtual void seekInternal(int64_t pos) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/BufferedIndexOutput.h000066400000000000000000000050751217574114600230510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BUFFEREDINDEXOUTPUT_H #define BUFFEREDINDEXOUTPUT_H #include "IndexOutput.h" namespace Lucene { /// Base implementation class for buffered {@link IndexOutput}. class LPPAPI BufferedIndexOutput : public IndexOutput { public: BufferedIndexOutput(); virtual ~BufferedIndexOutput(); LUCENE_CLASS(BufferedIndexOutput); public: static const int32_t BUFFER_SIZE; protected: int64_t bufferStart; // position in file of buffer int32_t bufferPosition; // position in buffer ByteArray buffer; public: /// Writes a single byte. /// @see IndexInput#readByte() virtual void writeByte(uint8_t b); /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); /// Forces any buffered output to be written. virtual void flush(); /// Implements buffer write. Writes bytes at the current /// position in the output. /// @param b the bytes to write. /// @param offset the offset in the byte array. /// @param length the number of bytes to write. virtual void flushBuffer(const uint8_t* b, int32_t offset, int32_t length); /// Closes this stream to further operations. virtual void close(); /// Returns the current position in this file, where the next write will occur. /// @see #seek(long) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next write will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// The number of bytes in the file. virtual int64_t length() = 0; protected: /// Implements buffer write. Writes bytes at the current /// position in the output. /// @param b the bytes to write. /// @param length the number of bytes to write. void flushBuffer(const uint8_t* b, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.4/include/BufferedReader.h000066400000000000000000000035461217574114600217640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BUFFEREDREADER_H #define BUFFEREDREADER_H #include "Reader.h" namespace Lucene { /// Read text from a character-input stream, buffering characters so as to provide /// for the efficient reading of characters, arrays, and lines. class LPPAPI BufferedReader : public Reader { public: /// Create a buffering character-input stream. BufferedReader(ReaderPtr reader, int32_t size = READER_BUFFER); virtual ~BufferedReader(); LUCENE_CLASS(BufferedReader); protected: ReaderPtr reader; int32_t bufferSize; int32_t bufferLength; // end of valid bytes int32_t bufferPosition; // next byte to read CharArray buffer; public: static const int32_t READER_BUFFER; public: /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* b, int32_t offset, int32_t length); /// Read a line of text. virtual bool readLine(String& line); /// Close the stream. virtual void close(); /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Reset the stream. virtual void reset(); protected: /// Refill buffer in preparation for reading. int32_t refill(); /// Read a single character without moving position. int32_t peek(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ByteBlockPool.h000066400000000000000000000050001217574114600216120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BYTEBLOCKPOOL_H #define BYTEBLOCKPOOL_H #include "LuceneObject.h" namespace Lucene { /// Class that Posting and PostingVector use to write byte streams into shared fixed-size byte[] arrays. /// The idea is to allocate slices of increasing lengths. For example, the first slice is 5 bytes, the /// next slice is 14, etc. We start by writing our bytes into the first 5 bytes. When we hit the end of /// the slice, we allocate the next slice and then write the address of the new slice into the last 4 /// bytes of the previous slice (the "forwarding address"). /// /// Each slice is filled with 0's initially, and we mark the end with a non-zero byte. This way the methods /// that are writing into the slice don't need to record its length and instead allocate a new slice once /// they hit a non-zero byte. class ByteBlockPool : public LuceneObject { public: ByteBlockPool(ByteBlockPoolAllocatorBasePtr allocator, bool trackAllocations); virtual ~ByteBlockPool(); LUCENE_CLASS(ByteBlockPool); public: Collection buffers; int32_t bufferUpto; // Which buffer we are up to int32_t byteUpto; // Where we are in head buffer ByteArray buffer; int32_t byteOffset; static const int32_t nextLevelArray[]; static const int32_t levelSizeArray[]; protected: bool trackAllocations; ByteBlockPoolAllocatorBasePtr allocator; public: static int32_t FIRST_LEVEL_SIZE(); void reset(); void nextBuffer(); int32_t newSlice(int32_t size); int32_t allocSlice(ByteArray slice, int32_t upto); }; class ByteBlockPoolAllocatorBase : public LuceneObject { public: virtual ~ByteBlockPoolAllocatorBase(); LUCENE_CLASS(ByteBlockPoolAllocatorBase); public: virtual void recycleByteBlocks(Collection blocks, int32_t start, int32_t end) = 0; virtual void recycleByteBlocks(Collection blocks) = 0; virtual ByteArray getByteBlock(bool trackAllocations) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/ByteFieldSource.h000066400000000000000000000035541217574114600221460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BYTEFIELDSOURCE_H #define BYTEFIELDSOURCE_H #include "FieldCacheSource.h" namespace Lucene { /// Obtains byte field values from the {@link FieldCache} using getBytes() and makes those values available /// as other numeric types, casting as needed. /// /// @see FieldCacheSource for requirements on the field. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU /// per lookup but will not consume double the FieldCache RAM. class LPPAPI ByteFieldSource : public FieldCacheSource { public: /// Create a cached byte field source with a specific string-to-byte parser. ByteFieldSource(const String& field, ByteParserPtr parser = ByteParserPtr()); virtual ~ByteFieldSource(); LUCENE_CLASS(ByteFieldSource); protected: ByteParserPtr parser; public: virtual String description(); virtual DocValuesPtr getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader); virtual bool cachedFieldSourceEquals(FieldCacheSourcePtr other); virtual int32_t cachedFieldSourceHashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ByteSliceReader.h000066400000000000000000000035201217574114600221150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BYTESLICEREADER_H #define BYTESLICEREADER_H #include "IndexInput.h" namespace Lucene { /// IndexInput that knows how to read the byte slices written by Posting and PostingVector. We read the bytes in each slice /// until we hit the end of that slice at which point we read the forwarding address of the next slice and then jump to it. class ByteSliceReader : public IndexInput { public: ByteSliceReader(); virtual ~ByteSliceReader(); LUCENE_CLASS(ByteSliceReader); public: ByteBlockPoolPtr pool; int32_t bufferUpto; ByteArray buffer; int32_t upto; int32_t limit; int32_t level; int32_t bufferOffset; int32_t endIndex; public: void init(ByteBlockPoolPtr pool, int32_t startIndex, int32_t endIndex); bool eof(); /// Reads and returns a single byte. virtual uint8_t readByte(); int64_t writeTo(IndexOutputPtr out); void nextSlice(); /// Reads a specified number of bytes into an array at the specified offset. virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Not implemented virtual int64_t getFilePointer(); /// Not implemented virtual int64_t length(); /// Not implemented virtual void seek(int64_t pos); /// Not implemented virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ByteSliceWriter.h000066400000000000000000000024141217574114600221700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BYTESLICEWRITER_H #define BYTESLICEWRITER_H #include "LuceneObject.h" namespace Lucene { /// Class to write byte streams into slices of shared byte[]. This is used by DocumentsWriter to hold /// the posting list for many terms in RAM. class ByteSliceWriter : public LuceneObject { public: ByteSliceWriter(ByteBlockPoolPtr pool); virtual ~ByteSliceWriter(); LUCENE_CLASS(ByteSliceWriter); protected: ByteArray slice; int32_t upto; ByteBlockPoolPtr pool; public: int32_t offset0; public: /// Set up the writer to write at address. void init(int32_t address); /// Write byte into byte slice stream void writeByte(uint8_t b); void writeBytes(const uint8_t* b, int32_t offset, int32_t length); int32_t getAddress(); void writeVInt(int32_t i); }; } #endif LucenePlusPlus-rel_3.0.4/include/CachingSpanFilter.h000066400000000000000000000031051217574114600224320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CACHINGSPANFILTER_H #define CACHINGSPANFILTER_H #include "SpanFilter.h" #include "CachingWrapperFilter.h" namespace Lucene { /// Wraps another SpanFilter's result and caches it. The purpose is to allow filters to simply filter, /// and then wrap with this class to add caching. class LPPAPI CachingSpanFilter : public SpanFilter { public: /// New deletions always result in a cache miss, by default ({@link CachingWrapperFilter#RECACHE}. CachingSpanFilter(SpanFilterPtr filter, CachingWrapperFilter::DeletesMode deletesMode = CachingWrapperFilter::DELETES_RECACHE); virtual ~CachingSpanFilter(); LUCENE_CLASS(CachingSpanFilter); protected: SpanFilterPtr filter; FilterCachePtr cache; public: // for testing int32_t hitCount; int32_t missCount; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); virtual SpanFilterResultPtr bitSpans(IndexReaderPtr reader); virtual String toString(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); protected: SpanFilterResultPtr getCachedResult(IndexReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/CachingTokenFilter.h000066400000000000000000000025041217574114600226130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CACHINGTOKENFILTER_H #define CACHINGTOKENFILTER_H #include "TokenFilter.h" namespace Lucene { /// This class can be used if the token attributes of a TokenStream are intended to be consumed more than once. /// It caches all token attribute states locally in a List. /// /// CachingTokenFilter implements the optional method {@link TokenStream#reset()}, which repositions the stream /// to the first Token. class LPPAPI CachingTokenFilter : public TokenFilter { public: CachingTokenFilter(TokenStreamPtr input); virtual ~CachingTokenFilter(); LUCENE_CLASS(CachingTokenFilter); protected: Collection cache; Collection::iterator iterator; AttributeSourceStatePtr finalState; public: virtual bool incrementToken(); virtual void end(); virtual void reset(); protected: void fillCache(); }; } #endif LucenePlusPlus-rel_3.0.4/include/CachingWrapperFilter.h000066400000000000000000000061431217574114600231560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CACHINGWRAPPERFILTER_H #define CACHINGWRAPPERFILTER_H #include "Filter.h" namespace Lucene { /// Wraps another filter's result and caches it. The purpose is to allow filters to simply filter, and /// then wrap with this class to add caching. class LPPAPI CachingWrapperFilter : public Filter { public: /// Specifies how new deletions against a reopened reader should be handled. /// /// The default is IGNORE, which means the cache entry will be re-used for a given segment, even when /// that segment has been reopened due to changes in deletions. This is a big performance gain, /// especially with near-real-timer readers, since you don't hit a cache miss on every reopened reader /// for prior segments. /// /// However, in some cases this can cause invalid query results, allowing deleted documents to be /// returned. This only happens if the main query does not rule out deleted documents on its own, /// such as a toplevel ConstantScoreQuery. To fix this, use RECACHE to re-create the cached filter /// (at a higher per-reopen cost, but at faster subsequent search performance), or use DYNAMIC to /// dynamically intersect deleted docs (fast reopen time but some hit to search performance). enum DeletesMode { DELETES_IGNORE, DELETES_RECACHE, DELETES_DYNAMIC }; /// New deletes are ignored by default, which gives higher cache hit rate on reopened readers. /// Most of the time this is safe, because the filter will be AND'd with a Query that fully enforces /// deletions. If instead you need this filter to always enforce deletions, pass either {@link /// DeletesMode#RECACHE} or {@link DeletesMode#DYNAMIC}. CachingWrapperFilter(FilterPtr filter, DeletesMode deletesMode = DELETES_IGNORE); virtual ~CachingWrapperFilter(); LUCENE_CLASS(CachingWrapperFilter); INTERNAL: FilterPtr filter; // for testing int32_t hitCount; int32_t missCount; protected: /// A Filter cache FilterCachePtr cache; /// Provide the DocIdSet to be cached, using the DocIdSet provided by the wrapped Filter. /// /// This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} returns /// true, else it copies the {@link DocIdSetIterator} into an {@link OpenBitSetDISI}. DocIdSetPtr docIdSetToCache(DocIdSetPtr docIdSet, IndexReaderPtr reader); public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); virtual String toString(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/CharArraySet.h000066400000000000000000000036151217574114600214440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARARRAYSET_H #define CHARARRAYSET_H #include "LuceneObject.h" namespace Lucene { /// A simple class that stores Strings as char[]'s in a hash table. Note that this is not a general purpose class. /// For example, it cannot remove items from the set, nor does it resize its hash table to be smaller, etc. It is /// designed to be quick to test if a char[] is in the set without the necessity of converting it to a String first. class LPPAPI CharArraySet : public LuceneObject { public: CharArraySet(bool ignoreCase); /// Create set from a set of strings. CharArraySet(HashSet entries, bool ignoreCase); /// Create set from a collection of strings. CharArraySet(Collection entries, bool ignoreCase); virtual ~CharArraySet(); LUCENE_CLASS(CharArraySet); protected: HashSet entries; bool ignoreCase; public: virtual bool contains(const String& text); /// True if the length chars of text starting at offset are in the set virtual bool contains(const wchar_t* text, int32_t offset, int32_t length); /// Add this String into the set virtual bool add(const String& text); /// Add this char[] into the set. virtual bool add(CharArray text); virtual int32_t size(); virtual bool isEmpty(); HashSet::iterator begin(); HashSet::iterator end(); }; } #endif LucenePlusPlus-rel_3.0.4/include/CharBlockPool.h000066400000000000000000000020731217574114600215730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARBLOCKPOOL_H #define CHARBLOCKPOOL_H #include "LuceneObject.h" namespace Lucene { class CharBlockPool : public LuceneObject { public: CharBlockPool(DocumentsWriterPtr docWriter); virtual ~CharBlockPool(); LUCENE_CLASS(CharBlockPool); public: Collection buffers; int32_t numBuffer; int32_t bufferUpto; // Which buffer we are up to int32_t charUpto; // Where we are in head buffer CharArray buffer; // Current head buffer int32_t charOffset; // Current head offset protected: DocumentsWriterWeakPtr _docWriter; public: void reset(); void nextBuffer(); }; } #endif LucenePlusPlus-rel_3.0.4/include/CharFilter.h000066400000000000000000000030251217574114600211320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARFILTER_H #define CHARFILTER_H #include "CharStream.h" namespace Lucene { /// Subclasses of CharFilter can be chained to filter CharStream. They can be used as {@link Reader} with /// additional offset correction. {@link Tokenizer}s will automatically use {@link #correctOffset} if a /// CharFilter/CharStream subclass is used. class LPPAPI CharFilter : public CharStream { protected: CharFilter(CharStreamPtr in); public: virtual ~CharFilter(); LUCENE_CLASS(CharFilter); protected: CharStreamPtr input; protected: /// Subclass may want to override to correct the current offset. /// @param currentOff current offset /// @return corrected offset virtual int32_t correct(int32_t currentOff); /// Chains the corrected offset through the input CharFilter. virtual int32_t correctOffset(int32_t currentOff); virtual void close(); virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); virtual bool markSupported(); virtual void mark(int32_t readAheadLimit); virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.4/include/CharFolder.h000066400000000000000000000025511217574114600211230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARFOLDER_H #define CHARFOLDER_H #include "LuceneObject.h" namespace Lucene { /// Utility class for folding character case. class LPPAPI CharFolder : public LuceneObject { public: virtual ~CharFolder(); LUCENE_CLASS(CharFolder); protected: static bool lowerCache; static bool upperCache; static wchar_t lowerChars[CHAR_MAX - CHAR_MIN + 1]; static wchar_t upperChars[CHAR_MAX - CHAR_MIN + 1]; public: static wchar_t toLower(wchar_t ch); static wchar_t toUpper(wchar_t ch); template static void toLower(ITER first, ITER last) { for (; first != last; ++first) *first = toLower(*first); } template static void toUpper(ITER first, ITER last) { for (; first != last; ++first) *first = toUpper(*first); } protected: static bool fillLower(); static bool fillUpper(); }; } #endif LucenePlusPlus-rel_3.0.4/include/CharReader.h000066400000000000000000000023741217574114600211150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARREADER_H #define CHARREADER_H #include "CharStream.h" namespace Lucene { /// CharReader is a Reader wrapper. It reads chars from Reader and outputs {@link CharStream}, defining an /// identify function {@link #correctOffset} method that simply returns the provided offset. class LPPAPI CharReader : public CharStream { public: CharReader(ReaderPtr in); virtual ~CharReader(); LUCENE_CLASS(CharReader); protected: ReaderPtr input; public: using CharStream::read; static CharStreamPtr get(ReaderPtr input); virtual int32_t correctOffset(int32_t currentOff); virtual void close(); virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); virtual bool markSupported(); virtual void mark(int32_t readAheadLimit); virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.4/include/CharStream.h000066400000000000000000000024261217574114600211440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARSTREAM_H #define CHARSTREAM_H #include "Reader.h" namespace Lucene { /// CharStream adds {@link #correctOffset} functionality over {@link Reader}. All Tokenizers accept a CharStream /// instead of {@link Reader} as input, which enables arbitrary character based filtering before tokenization. /// The {@link #correctOffset} method fixed offsets to account for removal or insertion of characters, so that the /// offsets reported in the tokens match the character offsets of the original Reader. class LPPAPI CharStream : public Reader { public: virtual ~CharStream(); LUCENE_CLASS(CharStream); public: /// Called by CharFilter(s) and Tokenizer to correct token offset. /// /// @param currentOff offset as seen in the output /// @return corrected offset based on the input virtual int32_t correctOffset(int32_t currentOff) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/CharTokenizer.h000066400000000000000000000035551217574114600216670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARTOKENIZER_H #define CHARTOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// An abstract base class for simple, character-oriented tokenizers. class LPPAPI CharTokenizer : public Tokenizer { public: CharTokenizer(ReaderPtr input); CharTokenizer(AttributeSourcePtr source, ReaderPtr input); CharTokenizer(AttributeFactoryPtr factory, ReaderPtr input); virtual ~CharTokenizer(); LUCENE_CLASS(CharTokenizer); protected: int32_t offset; int32_t bufferIndex; int32_t dataLen; static const int32_t MAX_WORD_LEN; static const int32_t IO_BUFFER_SIZE; CharArray ioBuffer; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken(); virtual void end(); virtual void reset(ReaderPtr input); protected: /// Returns true if a character should be included in a token. This tokenizer generates as tokens adjacent /// sequences of characters which satisfy this predicate. Characters for which this is false are used to /// define token boundaries and are not included in tokens. virtual bool isTokenChar(wchar_t c) = 0; /// Called on each token character to normalize it before it is added to the token. The default implementation /// does nothing. Subclasses may use this to, eg., lowercase tokens. virtual wchar_t normalize(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.4/include/CheckIndex.h000066400000000000000000000271241217574114600211220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHECKINDEX_H #define CHECKINDEX_H #include "SegmentTermDocs.h" namespace Lucene { /// Basic tool and API to check the health of an index and write a new segments file that removes reference to /// problematic segments. /// /// As this tool checks every byte in the index, on a large index it can take quite a long time to run. /// /// WARNING: Please make a complete backup of your index before using this to fix your index! class LPPAPI CheckIndex : public LuceneObject { public: /// Create a new CheckIndex on the directory. CheckIndex(DirectoryPtr dir); virtual ~CheckIndex(); LUCENE_CLASS(CheckIndex); protected: InfoStreamPtr infoStream; DirectoryPtr dir; static bool _assertsOn; public: /// Set infoStream where messages should go. If null, no messages are printed void setInfoStream(InfoStreamPtr out); /// Returns a {@link IndexStatus} instance detailing the state of the index. /// /// As this method checks every byte in the index, on a large index it can take quite a long time to run. /// /// WARNING: make sure you only call this when the index is not opened by any writer. IndexStatusPtr checkIndex(); /// Returns a {@link IndexStatus} instance detailing the state of the index. /// /// @param onlySegments list of specific segment names to check /// /// As this method checks every byte in the specified segments, on a large index it can take quite a long /// time to run. /// /// WARNING: make sure you only call this when the index is not opened by any writer. IndexStatusPtr checkIndex(Collection onlySegments); /// Repairs the index using previously returned result from {@link #checkIndex}. Note that this does not /// remove any of the unreferenced files after it's done; you must separately open an {@link IndexWriter}, /// which deletes unreferenced files when it's created. /// /// WARNING: this writes a new segments file into the index, effectively removing all documents in broken /// segments from the index. BE CAREFUL. /// /// WARNING: Make sure you only call this when the index is not opened by any writer. void fixIndex(IndexStatusPtr result); static bool testAsserts(); static bool assertsOn(); /// Command-line interface to check and fix an index. /// /// Run it like this: /// CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] /// /// -fix: actually write a new segments_N file, removing any problematic segments /// /// -segment X: only check the specified segment(s). This can be specified multiple times, /// to check more than one segment, eg -segment _2 -segment _a. /// You can't use this with the -fix option. /// /// WARNING: -fix should only be used on an emergency basis as it will cause documents (perhaps many) /// to be permanently removed from the index. Always make a backup copy of your index before running /// this! Do not run this tool on an index that is actively being written to. You have been warned! /// /// Run without -fix, this tool will open the index, report version information and report any exceptions /// it hits and what action it would take if -fix were specified. With -fix, this tool will remove any /// segments that have issues and write a new segments_N file. This means all documents contained in the /// affected segments will be removed. /// /// This tool exits with exit code 1 if the index cannot be opened or has any corruption, else 0. static int main(Collection args); protected: void msg(const String& msg); /// Test field norms. FieldNormStatusPtr testFieldNorms(Collection fieldNames, SegmentReaderPtr reader); /// Test the term index. TermIndexStatusPtr testTermIndex(SegmentInfoPtr info, SegmentReaderPtr reader); /// Test stored fields for a segment. StoredFieldStatusPtr testStoredFields(SegmentInfoPtr info, SegmentReaderPtr reader); /// Test term vectors for a segment. TermVectorStatusPtr testTermVectors(SegmentInfoPtr info, SegmentReaderPtr reader); }; /// Returned from {@link #checkIndex()} detailing the health and status of the index. class LPPAPI IndexStatus : public LuceneObject { public: IndexStatus(); virtual ~IndexStatus(); LUCENE_CLASS(IndexStatus); public: /// True if no problems were found with the index. bool clean; /// True if we were unable to locate and load the segments_N file. bool missingSegments; /// True if we were unable to open the segments_N file. bool cantOpenSegments; /// True if we were unable to read the version number from segments_N file. bool missingSegmentVersion; /// Name of latest segments_N file in the index. String segmentsFileName; /// Number of segments in the index. int32_t numSegments; /// String description of the version of the index. String segmentFormat; /// Empty unless you passed specific segments list to check as optional 3rd argument. /// @see CheckIndex#checkIndex(List) Collection segmentsChecked; /// True if the index was created with a newer version of Lucene than the CheckIndex tool. bool toolOutOfDate; /// List of {@link SegmentInfoStatus} instances, detailing status of each segment. Collection segmentInfos; /// Directory index is in. DirectoryPtr dir; /// SegmentInfos instance containing only segments that had no problems (this is used with the /// {@link CheckIndex#fixIndex} method to repair the index. SegmentInfosPtr newSegments; /// How many documents will be lost to bad segments. int32_t totLoseDocCount; /// How many bad segments were found. int32_t numBadSegments; /// True if we checked only specific segments ({@link #checkIndex(List)}) was called with non-null argument). bool partial; /// Holds the userData of the last commit in the index MapStringString userData; }; /// Holds the status of each segment in the index. See {@link #segmentInfos}. class LPPAPI SegmentInfoStatus : public LuceneObject { public: SegmentInfoStatus(); virtual ~SegmentInfoStatus(); LUCENE_CLASS(SegmentInfoStatus); public: /// Name of the segment. String name; /// Document count (does not take deletions into account). int32_t docCount; /// True if segment is compound file format. bool compound; /// Number of files referenced by this segment. int32_t numFiles; /// Net size (MB) of the files referenced by this segment. double sizeMB; /// Doc store offset, if this segment shares the doc store files (stored fields and term vectors) with /// other segments. This is -1 if it does not share. int32_t docStoreOffset; /// String of the shared doc store segment, or null if this segment does not share the doc store files. String docStoreSegment; /// True if the shared doc store files are compound file format. bool docStoreCompoundFile; /// True if this segment has pending deletions. bool hasDeletions; /// Name of the current deletions file name. String deletionsFileName; /// Number of deleted documents. int32_t numDeleted; /// True if we were able to open a SegmentReader on this segment. bool openReaderPassed; /// Number of fields in this segment. int32_t numFields; /// True if at least one of the fields in this segment does not omitTermFreqAndPositions. /// @see AbstractField#setOmitTermFreqAndPositions bool hasProx; /// Map that includes certain debugging details that IndexWriter records into each segment it creates MapStringString diagnostics; /// Status for testing of field norms (null if field norms could not be tested). FieldNormStatusPtr fieldNormStatus; /// Status for testing of indexed terms (null if indexed terms could not be tested). TermIndexStatusPtr termIndexStatus; /// Status for testing of stored fields (null if stored fields could not be tested). StoredFieldStatusPtr storedFieldStatus; /// Status for testing of term vectors (null if term vectors could not be tested). TermVectorStatusPtr termVectorStatus; }; /// Status from testing field norms. class LPPAPI FieldNormStatus : public LuceneObject { public: FieldNormStatus(); virtual ~FieldNormStatus(); LUCENE_CLASS(FieldNormStatus); public: /// Number of fields successfully tested int64_t totFields; /// Exception thrown during term index test (null on success) LuceneException error; }; /// Status from testing term index. class LPPAPI TermIndexStatus : public LuceneObject { public: TermIndexStatus(); virtual ~TermIndexStatus(); LUCENE_CLASS(TermIndexStatus); public: /// Total term count int64_t termCount; /// Total frequency across all terms. int64_t totFreq; /// Total number of positions. int64_t totPos; /// Exception thrown during term index test (null on success) LuceneException error; }; /// Status from testing stored fields. class LPPAPI StoredFieldStatus : public LuceneObject { public: StoredFieldStatus(); virtual ~StoredFieldStatus(); LUCENE_CLASS(StoredFieldStatus); public: /// Number of documents tested. int32_t docCount; /// Total number of stored fields tested. int64_t totFields; /// Exception thrown during stored fields test (null on success) LuceneException error; }; /// Status from testing stored fields. class LPPAPI TermVectorStatus : public LuceneObject { public: TermVectorStatus(); virtual ~TermVectorStatus(); LUCENE_CLASS(TermVectorStatus); public: /// Number of documents tested. int32_t docCount; /// Total number of term vectors tested. int64_t totVectors; /// Exception thrown during term vector test (null on success) LuceneException error; }; } #endif LucenePlusPlus-rel_3.0.4/include/ChecksumIndexInput.h000066400000000000000000000042351217574114600226650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHECKSUMINDEXINPUT_H #define CHECKSUMINDEXINPUT_H #include #include "IndexInput.h" namespace Lucene { /// Writes bytes through to a primary IndexInput, computing checksum as it goes. /// Note that you cannot use seek(). class LPPAPI ChecksumIndexInput : public IndexInput { public: ChecksumIndexInput(IndexInputPtr main); virtual ~ChecksumIndexInput(); LUCENE_CLASS(ChecksumIndexInput); protected: IndexInputPtr main; boost::crc_32_type checksum; public: /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte(); /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*,int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Return calculated checksum. int64_t getChecksum(); /// Closes the stream to further operations. virtual void close(); /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// The number of bytes in the file. virtual int64_t length(); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/ChecksumIndexOutput.h000066400000000000000000000045251217574114600230700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHECKSUMINDEXOUTPUT_H #define CHECKSUMINDEXOUTPUT_H #include #include "IndexOutput.h" namespace Lucene { /// Writes bytes through to a primary IndexOutput, computing /// checksum. Note that you cannot use seek(). class LPPAPI ChecksumIndexOutput : public IndexOutput { public: ChecksumIndexOutput(IndexOutputPtr main); virtual ~ChecksumIndexOutput(); LUCENE_CLASS(ChecksumIndexOutput); protected: IndexOutputPtr main; boost::crc_32_type checksum; public: /// Writes a single byte. /// @see IndexInput#readByte() virtual void writeByte(uint8_t b); /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); /// Return calculated checksum. int64_t getChecksum(); /// Forces any buffered output to be written. virtual void flush(); /// Closes the stream to further operations. virtual void close(); /// Returns the current position in this file, where the next write will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next write will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// Starts but does not complete the commit of this file (= writing of /// the final checksum at the end). After this is called must call /// {@link #finishCommit} and the {@link #close} to complete the commit. void prepareCommit(); /// See {@link #prepareCommit} void finishCommit(); /// The number of bytes in the file. virtual int64_t length(); }; } #endif LucenePlusPlus-rel_3.0.4/include/CloseableThreadLocal.h000066400000000000000000000033471217574114600231120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CLOSEABLETHREADLOCAL_H #define CLOSEABLETHREADLOCAL_H #include "LuceneThread.h" namespace Lucene { /// General purpose thread-local map. template class CloseableThreadLocal : public LuceneObject { public: typedef boost::shared_ptr localDataPtr; typedef Map MapLocalData; CloseableThreadLocal() { localData = MapLocalData::newInstance(); } public: localDataPtr get() { SyncLock syncLock(this); typename MapLocalData::iterator local = localData.find(LuceneThread::currentId()); if (local != localData.end()) return local->second; localDataPtr initial(initialValue()); if (initial) localData.put(LuceneThread::currentId(), initial); return initial; } void set(localDataPtr data) { SyncLock syncLock(this); localData.put(LuceneThread::currentId(), data); } void close() { SyncLock syncLock(this); localData.remove(LuceneThread::currentId()); } protected: MapLocalData localData; virtual localDataPtr initialValue() { return localDataPtr(); // override } }; } #endif LucenePlusPlus-rel_3.0.4/include/Collator.h000066400000000000000000000015661217574114600206760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COLLATOR_H #define COLLATOR_H #include "LuceneObject.h" namespace Lucene { /// Convenience class for storing collate objects. class LPPAPI Collator : public LuceneObject { public: /// Creates a new Collator, given the file to read from. Collator(std::locale locale); virtual ~Collator(); LUCENE_CLASS(Collator); protected: const std::collate& collate; public: int32_t compare(const String& first, const String& second); }; } #endif LucenePlusPlus-rel_3.0.4/include/Collection.h000066400000000000000000000206561217574114600212130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COLLECTION_H #define COLLECTION_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle collections that can be safely copied and shared template class Collection : public LuceneSync { public: typedef Collection this_type; typedef boost::shared_ptr shared_ptr; typedef std::vector< TYPE, LuceneAllocator > collection_type; typedef typename collection_type::iterator iterator; typedef typename collection_type::const_iterator const_iterator; typedef TYPE value_type; virtual ~Collection() { } protected: boost::shared_ptr container; public: static this_type newInstance(int32_t size = 0) { this_type instance; instance.container = Lucene::newInstance(size); return instance; } template static this_type newInstance(ITER first, ITER last) { this_type instance; instance.container = Lucene::newInstance(first, last); return instance; } void reset() { resize(0); } void resize(int32_t size) { if (size == 0) container.reset(); else container->resize(size); } int32_t size() const { return (int32_t)container->size(); } bool empty() const { return container->empty(); } void clear() { container->clear(); } iterator begin() { return container->begin(); } iterator end() { return container->end(); } const_iterator begin() const { return container->begin(); } const_iterator end() const { return container->end(); } void add(const TYPE& type) { container->push_back(type); } void add(int32_t pos, const TYPE& type) { container->insert(container->begin() + pos, type); } template void addAll(ITER first, ITER last) { container->insert(container->end(), first, last); } template void insert(ITER pos, const TYPE& type) { container->insert(pos, type); } template ITER remove(ITER pos) { return container->erase(pos); } template ITER remove(ITER first, ITER last) { return container->erase(first, last); } void remove(const TYPE& type) { container->erase(std::remove(container->begin(), container->end(), type), container->end()); } template void remove_if(PRED comp) { container->erase(std::remove_if(container->begin(), container->end(), comp), container->end()); } TYPE removeFirst() { TYPE front = container->front(); container->erase(container->begin()); return front; } TYPE removeLast() { TYPE back = container->back(); container->pop_back(); return back; } iterator find(const TYPE& type) { return std::find(container->begin(), container->end(), type); } template iterator find_if(PRED comp) { return std::find_if(container->begin(), container->end(), comp); } bool contains(const TYPE& type) const { return (std::find(container->begin(), container->end(), type) != container->end()); } template bool contains_if(PRED comp) const { return (std::find_if(container->begin(), container->end(), comp) != container->end()); } bool equals(const this_type& other) const { return equals(other, std::equal_to()); } template bool equals(const this_type& other, PRED comp) const { if (container->size() != other.container->size()) return false; return std::equal(container->begin(), container->end(), other.container->begin(), comp); } int32_t hashCode() { return (int32_t)(int64_t)container.get(); } void swap(this_type& other) { container.swap(other->container); } TYPE& operator[] (int32_t pos) { return (*container)[pos]; } const TYPE& operator[] (int32_t pos) const { return (*container)[pos]; } operator bool() const { return container; } bool operator! () const { return !container; } bool operator== (const this_type& other) { return (container == other.container); } bool operator!= (const this_type& other) { return (container != other.container); } }; template Collection newCollection(const TYPE& a1) { Collection result = Collection::newInstance(); result.add(a1); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2) { Collection result = newCollection(a1); result.add(a2); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3) { Collection result = newCollection(a1, a2); result.add(a3); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4) { Collection result = newCollection(a1, a2, a3); result.add(a4); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5) { Collection result = newCollection(a1, a2, a3, a4); result.add(a5); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6) { Collection result = newCollection(a1, a2, a3, a4, a5); result.add(a6); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7) { Collection result = newCollection(a1, a2, a3, a4, a5, a6); result.add(a7); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8) { Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7); result.add(a8); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8, const TYPE& a9) { Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7, a8); result.add(a9); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8, const TYPE& a9, const TYPE& a10) { Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7, a8, a9); result.add(a10); return result; } } #endif LucenePlusPlus-rel_3.0.4/include/Collector.h000066400000000000000000000145561217574114600210500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COLLECTOR_H #define COLLECTOR_H #include "LuceneObject.h" namespace Lucene { /// Collectors are primarily meant to be used to gather raw results from a search, and implement sorting /// or custom result filtering, collation, etc. /// /// Lucene's core collectors are derived from Collector. Likely your application can use one of these /// classes, or subclass {@link TopDocsCollector}, instead of implementing Collector directly: /// ///
    ///
  • {@link TopDocsCollector} is an abstract base class that assumes you will retrieve the top N docs, /// according to some criteria, after collection is done. /// ///
  • {@link TopScoreDocCollector} is a concrete subclass {@link TopDocsCollector} and sorts according /// to score + docID. This is used internally by the {@link IndexSearcher} search methods that do not take /// an explicit {@link Sort}. It is likely the most frequently used collector. /// ///
  • {@link TopFieldCollector} subclasses {@link TopDocsCollector} and sorts according to a specified /// {@link Sort} object (sort by field). This is used internally by the {@link IndexSearcher} search methods /// that take an explicit {@link Sort}. /// ///
  • {@link TimeLimitingCollector}, which wraps any other Collector and aborts the search if it's taken too /// much time. /// ///
  • {@link PositiveScoresOnlyCollector} wraps any other Collector and prevents collection of hits whose /// score is <= 0.0 /// ///
/// /// Collector decouples the score from the collected doc: the score computation is skipped entirely if it's not /// needed. Collectors that do need the score should implement the {@link #setScorer} method, to hold onto the /// passed {@link Scorer} instance, and call {@link Scorer#score()} within the collect method to compute the /// current hit's score. If your collector may request the score for a single hit multiple times, you should use /// {@link ScoreCachingWrappingScorer}. /// /// NOTE: The doc that is passed to the collect method is relative to the current reader. If your collector needs /// to resolve this to the docID space of the Multi*Reader, you must re-base it by recording the docBase from the /// most recent setNextReader call. Here's a simple example showing how to collect docIDs into a BitSet: /// ///
    /// class MyCollector : public Collector
    /// {
    /// public:
    ///     MyCollector(BitSetPtr bits)
    ///     {
    ///         this->bits = bits;
    ///         this->docBase = 0;
    ///     }
    /// 
    /// protected:
    ///     BitSetPtr bits;
    ///     int32_t docBase;
    /// 
    /// public:
    ///     virtual void setScorer(ScorerPtr scorer)
    ///     {
    ///         // ignore scorer
    ///     }
    ///     
    ///     virtual void collect(int32_t doc)
    ///     {
    ///         bits->set(doc + docBase);
    ///     }
    ///     
    ///     virtual void setNextReader(IndexReaderPtr reader, int32_t docBase)
    ///     {
    ///         this->docBase = docBase;
    ///     }
    ///     
    ///     virtual bool acceptsDocsOutOfOrder()
    ///     {
    ///         return true; // accept docs out of order (for a BitSet it doesn't matter)
    ///     }
    /// };
    /// 
    /// ...
    /// 
    /// SearcherPtr searcher = newLucene(indexReader);
    /// BitSetPtr bits = newLucene(indexReader->maxDoc());
    /// searcher->search(query, newLucene(bits));
    ///
    /// 
/// Not all collectors will need to rebase the docID. For example, a collector that simply counts the /// total number of hits would skip it. /// /// NOTE: Prior to 2.9, Lucene silently filtered out hits with score <= 0. As of 2.9, the core Collectors /// no longer do that. It's very unusual to have such hits (a negative query boost, or function query /// returning negative custom scores, could cause it to happen). If you need that behavior, use {@link /// PositiveScoresOnlyCollector}. class LPPAPI Collector : public LuceneObject { public: virtual ~Collector(); LUCENE_CLASS(Collector); public: /// Called before successive calls to {@link #collect(int32_t)}. Implementations that need the score /// of the current document (passed-in to {@link #collect(int32_t)}), should save the passed-in Scorer /// and call scorer.score() when needed. virtual void setScorer(ScorerPtr scorer) = 0; /// Called once for every document matching a query, with the unbased document number. /// /// Note: This is called in an inner search loop. For good search performance, implementations of this /// method should not call {@link Searcher#doc(int32_t)} or {@link IndexReader#document(int32_t)} on /// every hit. Doing so can slow searches by an order of magnitude or more. virtual void collect(int32_t doc) = 0; /// Called before collecting from each IndexReader. All doc ids in {@link #collect(int32_t)} will /// correspond to reader. Add docBase to the current IndexReaders internal document id to re-base ids /// in {@link #collect(int32_t)}. /// @param reader next IndexReader /// @param docBase virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) = 0; /// Return true if this collector does not require the matching docIDs to be delivered in int sort /// order (smallest to largest) to {@link #collect}. /// /// Most Lucene Query implementations will visit matching docIDs in order. However, some queries /// (currently limited to certain cases of {@link BooleanQuery}) can achieve faster searching if the /// Collector allows them to deliver the docIDs out of order. /// /// Many collectors don't mind getting docIDs out of order, so it's important to return true here. virtual bool acceptsDocsOutOfOrder() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/ComplexExplanation.h000066400000000000000000000026671217574114600227340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COMPLEXEXPLANATION_H #define COMPLEXEXPLANATION_H #include "Explanation.h" namespace Lucene { /// Describes the score computation for document and query, and can distinguish a match independent /// of a positive value. class LPPAPI ComplexExplanation : public Explanation { public: ComplexExplanation(bool match = false, double value = 0, const String& description = EmptyString); virtual ~ComplexExplanation(); LUCENE_CLASS(ComplexExplanation); protected: bool match; public: /// The match status of this explanation node. bool getMatch(); /// Sets the match status assigned to this explanation node. void setMatch(bool match); /// Indicates whether or not this Explanation models a good match. /// /// If the match status is explicitly set this method uses it; otherwise it defers to the /// superclass. /// /// @see #getMatch virtual bool isMatch(); protected: virtual String getSummary(); }; } #endif LucenePlusPlus-rel_3.0.4/include/CompoundFileReader.h000066400000000000000000000104531217574114600226210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COMPOUNDFILEREADER_H #define COMPOUNDFILEREADER_H #include "Directory.h" #include "BufferedIndexInput.h" namespace Lucene { /// Class for accessing a compound stream. /// This class implements a directory, but is limited to only read operations. /// Directory methods that would normally modify data throw an exception. class CompoundFileReader : public Directory { public: CompoundFileReader(DirectoryPtr dir, const String& name); CompoundFileReader(DirectoryPtr dir, const String& name, int32_t readBufferSize); virtual ~CompoundFileReader(); LUCENE_CLASS(CompoundFileReader); protected: struct FileEntry { FileEntry(int64_t offset = 0, int64_t length = 0) { this->offset = offset; this->length = length; } int64_t offset; int64_t length; }; typedef boost::shared_ptr FileEntryPtr; typedef HashMap MapStringFileEntryPtr; DirectoryPtr directory; String fileName; int32_t readBufferSize; IndexInputPtr stream; MapStringFileEntryPtr entries; protected: void ConstructReader(DirectoryPtr dir, const String& name, int32_t readBufferSize); public: DirectoryPtr getDirectory(); String getName(); virtual void close(); virtual IndexInputPtr openInput(const String& name); virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); /// Returns an array of strings, one for each file in the directory. virtual HashSet listAll(); /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name); /// Returns the time the compound file was last modified. virtual uint64_t fileModified(const String& name); /// Set the modified time of the compound file to now. virtual void touchFile(const String& name); /// Not implemented virtual void deleteFile(const String& name); /// Not implemented virtual void renameFile(const String& from, const String& to); /// Returns the length of a file in the directory. virtual int64_t fileLength(const String& name); /// Not implemented virtual IndexOutputPtr createOutput(const String& name); /// Not implemented virtual LockPtr makeLock(const String& name); }; /// Implementation of an IndexInput that reads from a portion of the compound file. class CSIndexInput : public BufferedIndexInput { public: CSIndexInput(); CSIndexInput(IndexInputPtr base, int64_t fileOffset, int64_t length); CSIndexInput(IndexInputPtr base, int64_t fileOffset, int64_t length, int32_t readBufferSize); virtual ~CSIndexInput(); LUCENE_CLASS(CSIndexInput); public: IndexInputPtr base; int64_t fileOffset; int64_t _length; public: /// Closes the stream to further operations. virtual void close(); virtual int64_t length(); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); protected: /// Implements buffer refill. Reads bytes from the current position in the input. /// @param b the array to read bytes into /// @param offset the offset in the array to start storing bytes /// @param len the number of bytes to read virtual void readInternal(uint8_t* b, int32_t offset, int32_t length); /// Implements seek. Sets current position in this file, where the next {@link /// #readInternal(byte[],int,int)} will occur. virtual void seekInternal(int64_t pos); }; } #endif LucenePlusPlus-rel_3.0.4/include/CompoundFileWriter.h000066400000000000000000000054561217574114600227020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COMPOUNDFILEWRITER_H #define COMPOUNDFILEWRITER_H #include "LuceneObject.h" namespace Lucene { /// Combines multiple files into a single compound file. /// The file format: /// VInt fileCount /// {Directory} /// fileCount entries with the following structure: /// int64_t dataOffset /// String fileName /// {File Data} /// fileCount entries with the raw data of the corresponding file /// /// The fileCount integer indicates how many files are contained in this compound file. The {directory} /// that follows has that many entries. Each directory entry contains a long pointer to the start of /// this file's data section, and a string with that file's name. class CompoundFileWriter : public LuceneObject { public: CompoundFileWriter(DirectoryPtr dir, const String& name, CheckAbortPtr checkAbort = CheckAbortPtr()); virtual ~CompoundFileWriter(); LUCENE_CLASS(CompoundFileWriter); protected: struct FileEntry { /// source file String file; /// temporary holder for the start of directory entry for this file int64_t directoryOffset; /// temporary holder for the start of this file's data section int64_t dataOffset; }; DirectoryWeakPtr _directory; String fileName; HashSet ids; Collection entries; bool merged; CheckAbortPtr checkAbort; public: /// Returns the directory of the compound file. DirectoryPtr getDirectory(); /// Returns the name of the compound file. String getName(); /// Add a source stream. file is the string by which the sub-stream will be known in the /// compound stream. void addFile(const String& file); /// Merge files with the extensions added up to now. All files with these extensions are /// combined sequentially into the compound stream. After successful merge, the source /// are deleted.files void close(); protected: /// Copy the contents of the file with specified extension into the provided output stream. /// Use the provided buffer for moving data to reduce memory allocation. void copyFile(const FileEntry& source, IndexOutputPtr os, ByteArray buffer); }; } #endif LucenePlusPlus-rel_3.0.4/include/CompressionTools.h000066400000000000000000000036231217574114600224350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COMPRESSIONTOOLS_H #define COMPRESSIONTOOLS_H #include "LuceneObject.h" namespace Lucene { /// Simple utility class providing static methods to compress and decompress binary data for stored fields. class LPPAPI CompressionTools : public LuceneObject { public: virtual ~CompressionTools(); LUCENE_CLASS(CompressionTools); public: /// Compresses the specified byte range using the specified compressionLevel static ByteArray compress(uint8_t* value, int32_t offset, int32_t length, int32_t compressionLevel); /// Compresses the specified byte range, with default BEST_COMPRESSION level static ByteArray compress(uint8_t* value, int32_t offset, int32_t length); /// Compresses all bytes in the array, with default BEST_COMPRESSION level static ByteArray compress(ByteArray value); /// Compresses the String value, with default BEST_COMPRESSION level static ByteArray compressString(const String& value); /// Compresses the String value using the specified compressionLevel static ByteArray compressString(const String& value, int32_t compressionLevel); /// Decompress the byte array previously returned by compress static ByteArray decompress(ByteArray value); /// Decompress the byte array previously returned by compressString back into a String static String decompressString(ByteArray value); protected: static const int32_t COMPRESS_BUFFER; }; } #endif LucenePlusPlus-rel_3.0.4/include/ConcurrentMergeScheduler.h000066400000000000000000000070641217574114600240570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONCURRENTMERGESCHEDULER_H #define CONCURRENTMERGESCHEDULER_H #include "MergeScheduler.h" namespace Lucene { /// A {@link MergeScheduler} that runs each merge using a separate thread, up until a /// maximum number of threads ({@link #setMaxThreadCount}) at which when a merge is needed, /// the thread(s) that are updating the index will pause until one or more merges completes. /// This is a simple way to use concurrency in the indexing process without having to create /// and manage application level threads. class LPPAPI ConcurrentMergeScheduler : public MergeScheduler { public: ConcurrentMergeScheduler(); virtual ~ConcurrentMergeScheduler(); LUCENE_CLASS(ConcurrentMergeScheduler); protected: int32_t mergeThreadPriority; SetMergeThread mergeThreads; /// Max number of threads allowed to be merging at once int32_t maxThreadCount; DirectoryPtr dir; bool closed; IndexWriterWeakPtr _writer; static Collection allInstances; bool suppressExceptions; static bool anyExceptions; public: virtual void initialize(); /// Sets the max # simultaneous threads that may be running. If a merge is necessary yet /// we already have this many threads running, the incoming thread (that is calling /// add/updateDocument) will block until a merge thread has completed. virtual void setMaxThreadCount(int32_t count); /// Get the max # simultaneous threads that may be running. @see #setMaxThreadCount. virtual int32_t getMaxThreadCount(); /// Return the priority that merge threads run at. By default the priority is 1 plus the /// priority of (ie, slightly higher priority than) the first thread that calls merge. virtual int32_t getMergeThreadPriority(); /// Set the priority that merge threads run at. virtual void setMergeThreadPriority(int32_t pri); virtual void close(); virtual void sync(); virtual void merge(IndexWriterPtr writer); /// Used for testing static bool anyUnhandledExceptions(); static void clearUnhandledExceptions(); /// Used for testing void setSuppressExceptions(); void clearSuppressExceptions(); /// Used for testing static void setTestMode(); protected: virtual bool verbose(); virtual void message(const String& message); virtual void initMergeThreadPriority(); virtual int32_t mergeThreadCount(); /// Does the actual merge, by calling {@link IndexWriter#merge} virtual void doMerge(OneMergePtr merge); virtual MergeThreadPtr getMergeThread(IndexWriterPtr writer, OneMergePtr merge); /// Called when an exception is hit in a background merge thread virtual void handleMergeException(const LuceneException& exc); virtual void addMyself(); friend class MergeThread; }; } #endif LucenePlusPlus-rel_3.0.4/include/Config.h.cmake000066400000000000000000000043771217574114600214060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONFIG_H #define CONFIG_H #if defined(_WIN32) || defined(_WIN64) #pragma warning(disable:4251) #pragma warning(disable:4275) #pragma warning(disable:4005) #pragma warning(disable:4996) #ifndef _WIN64 #pragma warning(disable:4244) #endif #endif #if defined(_WIN32) || defined(_WIN64) #define LPP_IMPORT __declspec(dllimport) #define LPP_EXPORT __declspec(dllexport) #else #ifdef LPP_HAVE_GXXCLASSVISIBILITY #define LPP_IMPORT __attribute__ ((visibility("default"))) #define LPP_EXPORT __attribute__ ((visibility("default"))) #else #define LPP_IMPORT #define LPP_EXPORT #endif #endif // Define LPPAPI for dll builds #ifdef LPP_HAVE_DLL #ifdef LPP_BUILDING_LIB #define LPPAPI LPP_EXPORT #define LPPCONTRIBAPI LPP_EXPORT #else #define LPPAPI LPP_IMPORT #define LPPCONTRIBAPI LPP_IMPORT #endif #else #define LPPAPI #define LPPCONTRIBAPI #endif // Check windows #if defined(_WIN32) || defined(_WIN64) #define LPP_UNICODE_CHAR_SIZE_2 #if defined(_WIN64) #define LPP_BUILD_64 #else #define LPP_BUILD_32 #endif #endif // Check GCC #if defined(__GNUC__) #define LPP_UNICODE_CHAR_SIZE_4 #if defined(__x86_64__) || defined(__ppc64__) #define LPP_BUILD_64 #else #define LPP_BUILD_32 #endif #endif // Default to 32-bit platforms #if !defined(LPP_BUILD_32) && !defined(LPP_BUILD_64) #define LPP_BUILD_32 #endif // Default to 4-byte unicode format #if !defined(LPP_UNICODE_CHAR_SIZE_2) && !defined(LPP_UNICODE_CHAR_SIZE_4) #define LPP_UNICODE_CHAR_SIZE_4 #endif // Define to enable cyclic checking in debug builds #@DEFINE_USE_CYCLIC_CHECK@ LPP_USE_CYCLIC_CHECK // Define to use custom allocator (useful in Windows builds and when using nedmalloc) #@DEFINE_USE_ALLOCATOR@ LPP_USE_ALLOCATOR // Define to use nedmalloc memory allocator #@DEFINE_USE_NEDMALLOC@ LPP_USE_NEDMALLOC #ifdef LPP_USE_NEDMALLOC #define EXTSPEC LPPAPI #endif // Make internal bitset storage public #define BOOST_DYNAMIC_BITSET_DONT_USE_FRIENDS #define BOOST_FILESYSTEM_VERSION 3 #endif LucenePlusPlus-rel_3.0.4/include/ConjunctionScorer.h000066400000000000000000000020661217574114600225620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONJUNCTIONSCORER_H #define CONJUNCTIONSCORER_H #include "Scorer.h" namespace Lucene { /// Scorer for conjunctions, sets of queries, all of which are required. class ConjunctionScorer : public Scorer { public: ConjunctionScorer(SimilarityPtr similarity, Collection scorers); virtual ~ConjunctionScorer(); LUCENE_CLASS(ConjunctionScorer); protected: Collection scorers; double coord; int32_t lastDoc; public: virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); protected: int32_t doNext(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ConstantScoreQuery.h000066400000000000000000000030571217574114600227270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONSTANTSCOREQUERY_H #define CONSTANTSCOREQUERY_H #include "Query.h" #include "Weight.h" #include "Scorer.h" namespace Lucene { /// A query that wraps a filter and simply returns a constant score equal to the query boost for every /// document in the filter. class LPPAPI ConstantScoreQuery : public Query { public: ConstantScoreQuery(FilterPtr filter); virtual ~ConstantScoreQuery(); LUCENE_CLASS(ConstantScoreQuery); protected: FilterPtr filter; public: using Query::toString; /// Returns the encapsulated filter FilterPtr getFilter(); virtual QueryPtr rewrite(IndexReaderPtr reader); virtual void extractTerms(SetTerm terms); virtual WeightPtr createWeight(SearcherPtr searcher); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); friend class ConstantWeight; friend class ConstantScorer; }; } #endif LucenePlusPlus-rel_3.0.4/include/Constants.h000066400000000000000000000057541217574114600210760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONSTANTS_H #define CONSTANTS_H #include "Lucene.h" namespace Lucene { /// Some useful Lucene constants. class LPPAPI Constants { private: Constants(); public: virtual ~Constants(); public: static String OS_NAME; static String LUCENE_MAIN_VERSION; static String LUCENE_VERSION; }; /// Use by certain classes to match version compatibility across releases of Lucene. /// /// WARNING: When changing the version parameter that you supply to components in Lucene, do not simply /// change the version at search-time, but instead also adjust your indexing code to match, and re-index. class LPPAPI LuceneVersion { private: LuceneVersion(); public: virtual ~LuceneVersion(); public: enum Version { /// Match settings and bugs in Lucene's 2.0 release. LUCENE_20 = 0, /// Match settings and bugs in Lucene's 2.1 release. LUCENE_21, /// Match settings and bugs in Lucene's 2.2 release. LUCENE_22, /// Match settings and bugs in Lucene's 2.3 release. LUCENE_23, /// Match settings and bugs in Lucene's 2.4 release. LUCENE_24, /// Match settings and bugs in Lucene's 2.9 release. LUCENE_29, /// Match settings and bugs in Lucene's 3.0 release. /// /// Use this to get the latest & greatest settings, bug fixes, etc, for Lucene. LUCENE_30, /// Add new constants for later versions **here** to respect order! /// Warning: If you use this setting, and then upgrade to a newer release of Lucene, /// sizable changes may happen. If backwards compatibility is important then you /// should instead explicitly specify an actual version. /// /// If you use this constant then you may need to re-index all of your documents /// when upgrading Lucene, as the way text is indexed may have changed. Additionally, /// you may need to re-test your entire application to ensure it behaves as /// expected, as some defaults may have changed and may break functionality in your /// application. /// /// Deprecated: Use an actual version instead. LUCENE_CURRENT }; public: static bool onOrAfter(LuceneVersion::Version first, LuceneVersion::Version second); }; } #endif LucenePlusPlus-rel_3.0.4/include/CustomScoreProvider.h000066400000000000000000000111461217574114600230730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CUSTOMSCOREPROVIDER_H #define CUSTOMSCOREPROVIDER_H #include "LuceneObject.h" namespace Lucene { /// An instance of this subclass should be returned by {@link CustomScoreQuery#getCustomScoreProvider}, /// if you want to modify the custom score calculation of a {@link CustomScoreQuery}. /// /// Since Lucene 2.9, queries operate on each segment of an Index separately, so overriding the similar /// (now deprecated) methods in {@link CustomScoreQuery} is no longer suitable, as the supplied doc ID /// is per-segment and without knowledge of the IndexReader you cannot access the document or {@link /// FieldCache}. class LPPAPI CustomScoreProvider : public LuceneObject { public: /// Creates a new instance of the provider class for the given {@link IndexReader}. CustomScoreProvider(IndexReaderPtr reader); virtual ~CustomScoreProvider(); LUCENE_CLASS(CustomScoreProvider); protected: IndexReaderPtr reader; public: /// Compute a custom score by the subQuery score and a number of ValueSourceQuery scores. /// /// Subclasses can override this method to modify the custom score. /// /// If your custom scoring is different than the default herein you should override at least one of /// the two customScore() methods. If the number of ValueSourceQueries is always < 2 it is /// sufficient to override the other {@link #customScore(int32_t, double, double) customScore()} /// method, which is simpler. /// /// The default computation herein is a multiplication of given scores: ///
        /// ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ...
        /// 
/// /// @param doc id of scored doc. /// @param subQueryScore score of that doc by the subQuery. /// @param valSrcScores scores of that doc by the ValueSourceQuery. /// @return custom score. virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); /// Compute a custom score by the subQuery score and the ValueSourceQuery score. /// /// Subclasses can override this method to modify the custom score. /// /// If your custom scoring is different than the default herein you should override at least one of the /// two customScore() methods. If the number of ValueSourceQueries is always < 2 it is sufficient to /// override this customScore() method, which is simpler. /// /// The default computation herein is a multiplication of the two scores: ///
        /// ModifiedScore = subQueryScore * valSrcScore
        /// 
/// /// @param doc id of scored doc. /// @param subQueryScore score of that doc by the subQuery. /// @param valSrcScore score of that doc by the ValueSourceQuery. /// @return custom score. virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); /// Explain the custom score. Whenever overriding {@link #customScore(int32_t, double, Collection)}, /// this method should also be overridden to provide the correct explanation for the part of the custom scoring. /// /// @param doc doc being explained. /// @param subQueryExpl explanation for the sub-query part. /// @param valSrcExpls explanation for the value source part. /// @return an explanation for the custom score virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls); /// Explain the custom score. Whenever overriding {@link #customScore(int32_t, double, double)}, /// this method should also be overridden to provide the correct explanation for the part of the custom scoring. /// @param doc doc being explained. /// @param subQueryExpl explanation for the sub-query part. /// @param valSrcExpl explanation for the value source part. /// @return an explanation for the custom score virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl); }; } #endif LucenePlusPlus-rel_3.0.4/include/CustomScoreQuery.h000066400000000000000000000151271217574114600224110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CUSTOMSCOREQUERY_H #define CUSTOMSCOREQUERY_H #include "Query.h" namespace Lucene { /// Query that sets document score as a programmatic function of several (sub) scores: ///
    ///
  1. the score of its subQuery (any query) ///
  2. (optional) the score of its ValueSourceQuery (or queries). For most simple/convenient use cases /// this query is likely to be a {@link FieldScoreQuery} ///
/// Subclasses can modify the computation by overriding {@link #getCustomScoreProvider}. class LPPAPI CustomScoreQuery : public Query { public: /// Create a CustomScoreQuery over input subQuery. /// @param subQuery the sub query whose scored is being customed. Must not be null. CustomScoreQuery(QueryPtr subQuery); /// Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. /// @param subQuery the sub query whose score is being customized. Must not be null. /// @param valSrcQuery a value source query whose scores are used in the custom score computation. For /// most simple/convenient use case this would be a {@link FieldScoreQuery}. This parameter is /// optional - it can be null. CustomScoreQuery(QueryPtr subQuery, ValueSourceQueryPtr valSrcQuery); /// Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. /// @param subQuery the sub query whose score is being customized. Must not be null. /// @param valSrcQueries value source queries whose scores are used in the custom score computation. /// For most simple/convenient use case these would be {@link FieldScoreQueries}. This parameter is /// optional - it can be null or even an empty array. CustomScoreQuery(QueryPtr subQuery, Collection valSrcQueries); virtual ~CustomScoreQuery(); LUCENE_CLASS(CustomScoreQuery); protected: QueryPtr subQuery; Collection valSrcQueries; // never null (empty array if there are no valSrcQueries). bool strict; // if true, valueSource part of query does not take part in weights normalization. public: using Query::toString; virtual QueryPtr rewrite(IndexReaderPtr reader); virtual void extractTerms(SetTerm terms); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); /// Compute a custom score by the subQuery score and a number of ValueSourceQuery scores. /// /// Deprecated: Will be removed in Lucene 3.1. /// /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment /// search (since Lucene 2.9). /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} /// for the given {@link IndexReader}. virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); /// Compute a custom score by the subQuery score and the ValueSourceQuery score. /// /// Deprecated: Will be removed in Lucene 3.1. /// /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment /// search (since Lucene 2.9). /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} /// for the given {@link IndexReader}. virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); /// Explain the custom score. /// /// Deprecated: Will be removed in Lucene 3.1. /// /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment /// search (since Lucene 2.9). /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} /// for the given {@link IndexReader}. virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls); /// Explain the custom score. /// /// Deprecated Will be removed in Lucene 3.1. /// /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment /// search (since Lucene 2.9). /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} /// for the given {@link IndexReader}. virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl); virtual WeightPtr createWeight(SearcherPtr searcher); /// Checks if this is strict custom scoring. In strict custom scoring, the ValueSource part does not /// participate in weight normalization. This may be useful when one wants full control over how scores /// are modified, and does not care about normalizing by the ValueSource part. One particular case where /// this is useful if for testing this query. /// /// Note: only has effect when the ValueSource part is not null. virtual bool isStrict(); /// Set the strict mode of this query. /// @param strict The strict mode to set. /// @see #isStrict() virtual void setStrict(bool strict); /// A short name of this query, used in {@link #toString(String)}. virtual String name(); protected: void ConstructQuery(QueryPtr subQuery, Collection valSrcQueries); /// Returns a {@link CustomScoreProvider} that calculates the custom scores for the given {@link /// IndexReader}. The default implementation returns a default implementation as specified in /// the docs of {@link CustomScoreProvider}. virtual CustomScoreProviderPtr getCustomScoreProvider(IndexReaderPtr reader); friend class CustomWeight; friend class CustomScorer; }; } #endif LucenePlusPlus-rel_3.0.4/include/CycleCheck.h000066400000000000000000000024731217574114600211120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CYCLECHECK_H #define CYCLECHECK_H #include "Lucene.h" namespace Lucene { /// Debug utility to track shared_ptr utilization. class LPPAPI CycleCheck { public: virtual ~CycleCheck(); protected: static MapStringInt cycleMap; static Set staticRefs; protected: void addRef(const String& className, int32_t ref); static void addStatic(LuceneObjectPtr* staticRef); public: template static void addStatic(TYPE& staticRef) { addStatic(reinterpret_cast(&staticRef)); } static void dumpRefs(); }; template class CycleCheckT : public CycleCheck { public: CycleCheckT() { addRef(TYPE::_getClassName(), 1); } virtual ~CycleCheckT() { addRef(TYPE::_getClassName(), -1); } }; } #endif LucenePlusPlus-rel_3.0.4/include/DateField.h000066400000000000000000000050171217574114600207330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DATEFIELD_H #define DATEFIELD_H #include "LuceneObject.h" namespace Lucene { /// Provides support for converting dates to strings and vice-versa. The strings are structured so that /// lexicographic sorting orders by date, which makes them suitable for use as field values and search terms. /// /// Note that this class saves dates with millisecond granularity, which is bad for {@link TermRangeQuery} and /// {@link PrefixQuery}, as those queries are expanded to a BooleanQuery with a potentially large number of terms /// when searching. Thus you might want to use {@link DateTools} instead. /// /// Note: dates before 1970 cannot be used, and therefore cannot be indexed when using this class. See {@link /// DateTools} for an alternative without such a limitation. /// /// Another approach is {@link NumericUtils}, which provides a sortable binary representation (prefix encoded) /// of numeric values, which date/time are. For indexing a {@link Date} or {@link Calendar}, just get the unix /// timestamp as long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and index this as a numeric /// value with {@link NumericField} and use {@link NumericRangeQuery} to query it. /// /// @deprecated If you build a new index, use {@link DateTools} or {@link NumericField} instead. This class is /// included for use with existing indices and will be removed in a future release (possibly Lucene 4.0). class LPPAPI DateField : public LuceneObject { public: virtual ~DateField(); LUCENE_CLASS(DateField); protected: static int32_t DATE_LEN(); public: static const String& MIN_DATE_STRING(); static const String& MAX_DATE_STRING(); /// Converts a Date to a string suitable for indexing. static String dateToString(const boost::posix_time::ptime& date); /// Converts a millisecond time to a string suitable for indexing. static String timeToString(int64_t time); /// Converts a string-encoded date into a millisecond time. static int64_t stringToTime(const String& s); }; } #endif LucenePlusPlus-rel_3.0.4/include/DateTools.h000066400000000000000000000124541217574114600210130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DATETOOLS_H #define DATETOOLS_H #include "LuceneObject.h" namespace Lucene { /// Provides support for converting dates to strings and vice-versa. The strings are structured so that /// lexicographic sorting orders them by date, which makes them suitable for use as field values and search /// terms. /// /// This class also helps you to limit the resolution of your dates. Do not save dates with a finer resolution /// than you really need, as then RangeQuery and PrefixQuery will require more memory and become slower. /// /// Compared to {@link DateField} the strings generated by the methods in this class take slightly more space, /// unless your selected resolution is set to Resolution.DAY or lower. /// /// Another approach is {@link NumericUtils}, which provides a sortable binary representation (prefix encoded) /// of numeric values, which date/time are. For indexing a {@link Date} or {@link Calendar}, just get the unix /// timestamp as long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and index this as a numeric /// value with {@link NumericField} and use {@link NumericRangeQuery} to query it. class LPPAPI DateTools : public LuceneObject { public: virtual ~DateTools(); LUCENE_CLASS(DateTools); public: enum Resolution { RESOLUTION_NULL, RESOLUTION_YEAR, RESOLUTION_MONTH, RESOLUTION_DAY, RESOLUTION_HOUR, RESOLUTION_MINUTE, RESOLUTION_SECOND, RESOLUTION_MILLISECOND }; enum DateOrder { DATEORDER_LOCALE, DATEORDER_YMD, DATEORDER_DMY, DATEORDER_MDY }; protected: static DateOrder dateOrder; public: /// Converts a Date to a string suitable for indexing. /// @param date the date to be converted /// @param resolution the desired resolution /// @return a string in format yyyyMMddHHmmssSSS or shorter, depending on resolution; using GMT as timezone static String dateToString(const boost::posix_time::ptime& date, Resolution resolution); /// Converts a millisecond time to a string suitable for indexing. /// @param time the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT /// @param resolution the desired resolution /// @return a string in format yyyyMMddHHmmssSSS or shorter, depending on resolution; using GMT as timezone static String timeToString(int64_t time, Resolution resolution); /// Converts a string produced by timeToString or dateToString back to a time, represented as the number of /// milliseconds since January 1, 1970, 00:00:00 GMT. /// @param dateString the date string to be converted /// @return the number of milliseconds since January 1, 1970, 00:00:00 GMT static int64_t stringToTime(const String& dateString); /// Converts a string produced by timeToString or dateToString back to a time, represented as a ptime object. /// @param dateString the date string to be converted /// @return the parsed time as a ptime object static boost::posix_time::ptime stringToDate(const String& dateString); /// Limit a date's resolution. For example, the date 2004-09-21 13:50:11 will be changed to 2004-09-01 00:00:00 /// when using Resolution.MONTH. /// @param resolution The desired resolution of the date to be returned /// @return the date with all values more precise than resolution set to 0 or 1 static boost::posix_time::ptime round(const boost::posix_time::ptime& date, Resolution resolution); /// Limit a date's resolution. For example, the date 1095767411000 (which represents 2004-09-21 13:50:11) will /// be changed to 1093989600000 (2004-09-01 00:00:00) when using Resolution.MONTH. /// @param resolution The desired resolution of the date to be returned /// @return the date with all values more precise than resolution set to 0 or 1, expressed as milliseconds /// since January 1, 1970, 00:00:00 GMT static int64_t round(int64_t time, Resolution resolution); /// Allow overriding of date ordering. static void setDateOrder(DateTools::DateOrder order); /// Return date ordering based on given locale (or overridden in {@link #setDateOrder(DateTools::DateOrder)}). static DateTools::DateOrder getDateOrder(std::locale locale = std::locale()); /// Parse a given date using locale date format /// @param dateString the date string to be converted /// @param locale the locale to use for parsing /// @return the parsed time as a ptime object static boost::posix_time::ptime parseDate(const String& dateString, std::locale locale = std::locale()); }; } #endif LucenePlusPlus-rel_3.0.4/include/DefaultSimilarity.h000066400000000000000000000043031217574114600225420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DEFAULTSIMILARITY_H #define DEFAULTSIMILARITY_H #include "Similarity.h" namespace Lucene { /// Default scoring implementation. class LPPAPI DefaultSimilarity : public Similarity { public: DefaultSimilarity(); virtual ~DefaultSimilarity(); LUCENE_CLASS(DefaultSimilarity); protected: bool discountOverlaps; // Default false public: /// Implemented as state->getBoost() * lengthNorm(numTerms), where numTerms is {@link /// FieldInvertState#getLength()} if {@link #setDiscountOverlaps} is false, else it's {@link /// FieldInvertState#getLength()} - {@link FieldInvertState#getNumOverlap()}. virtual double computeNorm(const String& fieldName, FieldInvertStatePtr state); /// Implemented as 1 / sqrt(numTerms). virtual double lengthNorm(const String& fieldName, int32_t numTokens); /// Implemented as 1 / sqrt(sumOfSquaredWeights). virtual double queryNorm(double sumOfSquaredWeights); /// Implemented as sqrt(freq). virtual double tf(double freq); /// Implemented as 1 / (distance + 1). virtual double sloppyFreq(int32_t distance); /// Implemented as log(numDocs / (docFreq + 1)) + 1. virtual double idf(int32_t docFreq, int32_t numDocs); /// Implemented as overlap / maxOverlap. virtual double coord(int32_t overlap, int32_t maxOverlap); /// Determines whether overlap tokens (Tokens with 0 position increment) are ignored when computing /// norm. By default this is false, meaning overlap tokens are counted just like non-overlap tokens. /// @see #computeNorm void setDiscountOverlaps(bool v); /// @see #setDiscountOverlaps bool getDiscountOverlaps(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DefaultSkipListReader.h000066400000000000000000000044411217574114600233040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DEFAULTSKIPLISTREADER_H #define DEFAULTSKIPLISTREADER_H #include "MultiLevelSkipListReader.h" namespace Lucene { /// Implements the skip list reader for the default posting list format that stores positions and payloads. class DefaultSkipListReader : public MultiLevelSkipListReader { public: DefaultSkipListReader(IndexInputPtr skipStream, int32_t maxSkipLevels, int32_t skipInterval); virtual ~DefaultSkipListReader(); LUCENE_CLASS(DefaultSkipListReader); protected: bool currentFieldStoresPayloads; Collection freqPointer; Collection proxPointer; Collection payloadLength; int64_t lastFreqPointer; int64_t lastProxPointer; int32_t lastPayloadLength; public: void init(int64_t skipPointer, int64_t freqBasePointer, int64_t proxBasePointer, int32_t df, bool storesPayloads); /// Returns the freq pointer of the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} /// has skipped. int64_t getFreqPointer(); /// Returns the prox pointer of the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} /// has skipped. int64_t getProxPointer(); /// Returns the payload length of the payload stored just before the doc to which the last call of {@link /// MultiLevelSkipListReader#skipTo(int)} has skipped. int32_t getPayloadLength(); protected: /// Seeks the skip entry on the given level virtual void seekChild(int32_t level); /// Copies the values of the last read skip entry on this level virtual void setLastSkipData(int32_t level); /// Subclasses must implement the actual skip data encoding in this method. virtual int32_t readSkipData(int32_t level, IndexInputPtr skipStream); }; } #endif LucenePlusPlus-rel_3.0.4/include/DefaultSkipListWriter.h000066400000000000000000000034451217574114600233610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DEFAULTSKIPLISTWRITER_H #define DEFAULTSKIPLISTWRITER_H #include "MultiLevelSkipListWriter.h" namespace Lucene { /// Implements the skip list writer for the default posting list format that stores positions and payloads. class DefaultSkipListWriter : public MultiLevelSkipListWriter { public: DefaultSkipListWriter(int32_t skipInterval, int32_t numberOfSkipLevels, int32_t docCount, IndexOutputPtr freqOutput, IndexOutputPtr proxOutput); virtual ~DefaultSkipListWriter(); LUCENE_CLASS(DefaultSkipListWriter); protected: Collection lastSkipDoc; Collection lastSkipPayloadLength; Collection lastSkipFreqPointer; Collection lastSkipProxPointer; IndexOutputPtr freqOutput; IndexOutputPtr proxOutput; int32_t curDoc; bool curStorePayloads; int32_t curPayloadLength; int64_t curFreqPointer; int64_t curProxPointer; public: void setFreqOutput(IndexOutputPtr freqOutput); void setProxOutput(IndexOutputPtr proxOutput); /// Sets the values for the current skip data. void setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength); protected: virtual void resetSkip(); virtual void writeSkipData(int32_t level, IndexOutputPtr skipBuffer); friend class FormatPostingsTermsWriter; }; } #endif LucenePlusPlus-rel_3.0.4/include/Directory.h000066400000000000000000000121031217574114600210500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DIRECTORY_H #define DIRECTORY_H #include "LuceneObject.h" namespace Lucene { /// A Directory is a flat list of files. Files may be written once, when they are created. Once a file /// is created it may only be opened for read, or deleted. Random access is permitted both when reading /// and writing. Directory locking is implemented by an instance of {@link LockFactory}, and can be changed /// for each Directory instance using {@link #setLockFactory}. class LPPAPI Directory : public LuceneObject { public: Directory(); virtual ~Directory(); LUCENE_CLASS(Directory); protected: bool isOpen; /// Holds the LockFactory instance (implements locking for this Directory instance). LockFactoryPtr lockFactory; public: /// Returns an array of strings, one for each file in the directory. virtual HashSet listAll() = 0; /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name) = 0; /// Returns the time the named file was last modified. virtual uint64_t fileModified(const String& name) = 0; /// Set the modified time of an existing file to now. virtual void touchFile(const String& name) = 0; /// Removes an existing file in the directory. virtual void deleteFile(const String& name) = 0; /// Returns the length of a file in the directory. virtual int64_t fileLength(const String& name) = 0; /// Creates a new, empty file in the directory with the given name. /// Returns a stream writing this file. virtual IndexOutputPtr createOutput(const String& name) = 0; /// Returns a stream reading an existing file. virtual IndexInputPtr openInput(const String& name) = 0; /// Closes the store. virtual void close() = 0; /// Ensure that any writes to this file are moved to stable storage. Lucene uses this to properly commit /// changes to the index, to prevent a machine/OS crash from corrupting the index. virtual void sync(const String& name); /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory /// implementation may ignore the buffer size. Currently the only Directory implementations that respect /// this parameter are {@link FSDirectory} and {@link CompoundFileReader}. virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); /// Construct a {@link Lock}. /// @param name the name of the lock file. virtual LockPtr makeLock(const String& name); /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you /// are certain this lock is no longer in use. /// @param name name of the lock to be cleared. void clearLock(const String& name); /// Set the LockFactory that this Directory instance should use for its locking implementation. Each * instance /// of LockFactory should only be used for one directory (ie, do not share a single instance across multiple /// Directories). /// @param lockFactory instance of {@link LockFactory}. void setLockFactory(LockFactoryPtr lockFactory); /// Get the LockFactory that this Directory instance is using for its locking implementation. Note that this /// may be null for Directory implementations that provide their own locking implementation. LockFactoryPtr getLockFactory(); /// Return a string identifier that uniquely differentiates this Directory instance from other Directory /// instances. This ID should be the same if two Directory instances are considered "the same index". /// This is how locking "scopes" to the right index. virtual String getLockID(); virtual String toString(); /// Copy contents of a directory src to a directory dest. If a file in src already exists in dest then the one /// in dest will be blindly overwritten. NOTE: the source directory cannot change while this method is running. /// Otherwise the results are undefined. /// @param src source directory. /// @param dest destination directory. /// @param closeDirSrc if true, call {@link #close()} method on source directory. static void copy(DirectoryPtr src, DirectoryPtr dest, bool closeDirSrc); protected: /// @throws AlreadyClosed if this Directory is closed. void ensureOpen(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DirectoryReader.h000066400000000000000000000333541217574114600222060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DIRECTORYREADER_H #define DIRECTORYREADER_H #include "IndexReader.h" #include "TermEnum.h" #include "TermPositions.h" #include "IndexCommit.h" #include "SegmentMergeQueue.h" namespace Lucene { /// An IndexReader which reads indexes with multiple segments. class DirectoryReader : public IndexReader { public: /// Construct reading the named set of readers. DirectoryReader(DirectoryPtr directory, SegmentInfosPtr sis, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); /// Used by near real-time search. DirectoryReader(IndexWriterPtr writer, SegmentInfosPtr infos, int32_t termInfosIndexDivisor); /// This constructor is only used for {@link #reopen()} DirectoryReader(DirectoryPtr directory, SegmentInfosPtr infos, Collection oldReaders, Collection oldStarts, MapStringByteArray oldNormsCache, bool readOnly, bool doClone, int32_t termInfosIndexDivisor); virtual ~DirectoryReader(); LUCENE_CLASS(DirectoryReader); protected: DirectoryPtr _directory; bool readOnly; IndexWriterWeakPtr _writer; IndexDeletionPolicyPtr deletionPolicy; HashSet synced; LockPtr writeLock; SegmentInfosPtr segmentInfos; SegmentInfosPtr segmentInfosStart; bool stale; int32_t termInfosIndexDivisor; bool rollbackHasChanges; Collection subReaders; Collection starts; // 1st docno for each segment MapStringByteArray normsCache; int32_t _maxDoc; int32_t _numDocs; bool _hasDeletions; // Max version in index as of when we opened; this can be > our current segmentInfos version // in case we were opened on a past IndexCommit int64_t maxIndexVersion; public: void _initialize(Collection subReaders); static IndexReaderPtr open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, IndexCommitPtr commit, bool readOnly, int32_t termInfosIndexDivisor); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual LuceneObjectPtr clone(bool openReadOnly, LuceneObjectPtr other = LuceneObjectPtr()); virtual IndexReaderPtr reopen(); virtual IndexReaderPtr reopen(bool openReadOnly); virtual IndexReaderPtr reopen(IndexCommitPtr commit); /// Version number when this IndexReader was opened. virtual int64_t getVersion(); /// Return an array of term frequency vectors for the specified document. virtual Collection getTermFreqVectors(int32_t docNumber); /// Return a term frequency vector for the specified document and field. virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of the {@link TermFreqVector}. virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); /// Map all the term vectors for all fields in a Document virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); /// Checks is the index is optimized (if it has a single segment and no deletions). Not implemented in the IndexReader base class. /// @return true if the index is optimized; false otherwise virtual bool isOptimized(); /// Returns the number of documents in this index. virtual int32_t numDocs(); /// Returns one greater than the largest possible document number. virtual int32_t maxDoc(); /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine what {@link Field}s to load and how they should be loaded. virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n); /// Returns true if any documents have been deleted virtual bool hasDeletions(); /// Find reader for doc n static int32_t readerIndex(int32_t n, Collection starts, int32_t numSubReaders); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. virtual ByteArray norms(const String& field); /// Reads the byte-encoded normalization factor for the named field of every document. virtual void norms(const String& field, ByteArray norms, int32_t offset); /// Returns an enumeration of all the terms in the index. virtual TermEnumPtr terms(); /// Returns an enumeration of all terms starting at a given term. virtual TermEnumPtr terms(TermPtr t); /// Returns the number of documents containing the term t. virtual int32_t docFreq(TermPtr t); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs(); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions(); /// Tries to acquire the WriteLock on this directory. this method is only valid if this /// IndexReader is directory owner. virtual void acquireWriteLock(); void startCommit(); void rollbackCommit(); /// Retrieve the String userData optionally passed to IndexWriter#commit. virtual MapStringString getCommitUserData(); /// Check whether any new changes have occurred to the index since this reader was opened. virtual bool isCurrent(); /// Get a list of unique field names that exist in this index and have the specified field /// option information. virtual HashSet getFieldNames(FieldOption fieldOption); static HashSet getFieldNames(FieldOption fieldOption, Collection subReaders); /// Returns the sequential sub readers that this reader is logically composed of. virtual Collection getSequentialSubReaders(); /// Returns the directory this index resides in. virtual DirectoryPtr directory(); virtual int32_t getTermInfosIndexDivisor(); /// Return the IndexCommit that this reader has opened. virtual IndexCommitPtr getIndexCommit(); /// Returns all commit points that exist in the Directory. static Collection listCommits(DirectoryPtr dir); protected: IndexReaderPtr doReopenFromWriter(bool openReadOnly, IndexCommitPtr commit); IndexReaderPtr doReopen(bool openReadOnly, IndexCommitPtr commit); IndexReaderPtr doReopenNoWriter(bool openReadOnly, IndexCommitPtr commit); DirectoryReaderPtr doReopen(SegmentInfosPtr infos, bool doClone, bool openReadOnly); /// Implements deletion of the document numbered docNum. virtual void doDelete(int32_t docNum); /// Implements actual undeleteAll() in subclass. virtual void doUndeleteAll(); int32_t readerIndex(int32_t n); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); /// Commit changes resulting from delete, undeleteAll, or setNorm operations /// /// If an exception is hit, then either no changes or all changes will have been committed to the index (transactional semantics). virtual void doCommit(MapStringString commitUserData); /// Implements close. virtual void doClose(); friend class FindSegmentsReopen; }; class MultiTermEnum : public TermEnum { public: MultiTermEnum(IndexReaderPtr topReader, Collection readers, Collection starts, TermPtr t); virtual ~MultiTermEnum(); LUCENE_CLASS(MultiTermEnum); protected: SegmentMergeQueuePtr queue; TermPtr _term; int32_t _docFreq; public: IndexReaderWeakPtr _topReader; Collection matchingSegments; // null terminated array of matching segments public: /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Returns the current Term in the enumeration. virtual TermPtr term(); /// Returns the docFreq of the current Term in the enumeration. virtual int32_t docFreq(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); }; class MultiTermDocs : public TermPositions, public LuceneObject { public: MultiTermDocs(IndexReaderPtr topReader, Collection r, Collection s); virtual ~MultiTermDocs(); LUCENE_CLASS(MultiTermDocs); protected: IndexReaderWeakPtr _topReader; // used for matching TermEnum to TermDocs Collection readers; Collection starts; TermPtr term; int32_t base; int32_t pointer; Collection readerTermDocs; TermDocsPtr current; MultiTermEnumPtr tenum; // the term enum used for seeking int32_t matchingSegmentPos; // position into the matching segments from tenum SegmentMergeInfoPtr smi; // current segment mere info public: /// Returns the current document number. virtual int32_t doc(); /// Returns the frequency of the term within the current document. virtual int32_t freq(); /// Sets this to the data for a term. virtual void seek(TermPtr term); /// Sets this to the data for the current term in a {@link TermEnum}. virtual void seek(TermEnumPtr termEnum); /// Moves to the next pair in the enumeration. virtual bool next(); /// Attempts to read multiple entries from the enumeration, up to length of docs. /// Optimized implementation. virtual int32_t read(Collection docs, Collection freqs); /// Skips entries to the first beyond the current whose document number is greater than or equal to target. virtual bool skipTo(int32_t target); /// Frees associated resources. virtual void close(); protected: virtual TermDocsPtr termDocs(int32_t i); virtual TermDocsPtr termDocs(IndexReaderPtr reader); }; class MultiTermPositions : public MultiTermDocs { public: MultiTermPositions(IndexReaderPtr topReader, Collection r, Collection s); virtual ~MultiTermPositions(); LUCENE_CLASS(MultiTermPositions); public: /// Returns next position in the current document. virtual int32_t nextPosition(); /// Returns the length of the payload at the current term position. virtual int32_t getPayloadLength(); /// Returns the payload data at the current term position. virtual ByteArray getPayload(ByteArray data, int32_t offset); /// Checks if a payload can be loaded at this position. virtual bool isPayloadAvailable(); protected: virtual TermDocsPtr termDocs(IndexReaderPtr reader); }; class ReaderCommit : public IndexCommit { public: ReaderCommit(SegmentInfosPtr infos, DirectoryPtr dir); virtual ~ReaderCommit(); LUCENE_CLASS(ReaderCommit); protected: String segmentsFileName; HashSet files; DirectoryPtr dir; int64_t generation; int64_t version; bool _isOptimized; MapStringString userData; public: virtual String toString(); /// Returns true if this commit is an optimized index. virtual bool isOptimized(); /// Two IndexCommits are equal if both their Directory and versions are equal. virtual String getSegmentsFileName(); /// Returns all index files referenced by this commit point. virtual HashSet getFileNames(); /// Returns the {@link Directory} for the index. virtual DirectoryPtr getDirectory(); /// Returns the version for this IndexCommit. virtual int64_t getVersion(); /// Returns the generation (the _N in segments_N) for this IndexCommit. virtual int64_t getGeneration(); virtual bool isDeleted(); /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. virtual MapStringString getUserData(); virtual void deleteCommit(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DisjunctionMaxQuery.h000066400000000000000000000115231217574114600230760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DISJUNCTIONMAXQUERY_H #define DISJUNCTIONMAXQUERY_H #include "Query.h" namespace Lucene { /// A query that generates the union of documents produced by its subqueries, and that scores each /// document with the maximum score for that document as produced by any subquery, plus a tie breaking /// increment for any additional matching subqueries. This is useful when searching for a word in /// multiple fields with different boost factors (so that the fields cannot be combined equivalently /// into a single search field). We want the primary score to be the one associated with the highest /// boost, not the sum of the field scores (as BooleanQuery would give). If the query is "albino /// elephant" this ensures that "albino" matching one field and "elephant" matching another gets a /// higher score than "albino" matching both fields. To get this result, use both BooleanQuery and /// DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in each field, while the /// set of these DisjunctionMaxQuery's is combined into a BooleanQuery. The tie breaker capability /// allows results that include the same term in multiple fields to be judged better than results that /// include this term in only the best of those multiple fields, without confusing this with the better /// case of two different terms in the multiple fields. class LPPAPI DisjunctionMaxQuery : public Query { public: /// Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries. /// @param tieBreakerMultiplier the score of each non-maximum disjunct for a document is multiplied /// by this weight and added into the final score. If non-zero, the value should be small, on the /// order of 0.1, which says that 10 occurrences of word in a lower-scored field that is also in a /// higher scored field is just as good as a unique word in the lower scored field (ie., one that is /// not in any higher scored field. DisjunctionMaxQuery(double tieBreakerMultiplier = 0.0); /// Creates a new DisjunctionMaxQuery /// @param disjuncts A Collection of all the disjuncts to add /// @param tieBreakerMultiplier The weight to give to each matching non-maximum disjunct DisjunctionMaxQuery(Collection disjuncts, double tieBreakerMultiplier); virtual ~DisjunctionMaxQuery(); LUCENE_CLASS(DisjunctionMaxQuery); protected: /// The subqueries Collection disjuncts; /// Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. double tieBreakerMultiplier; public: using Query::toString; /// Add a subquery to this disjunction /// @param query the disjunct added void add(QueryPtr query); /// Add a collection of disjuncts to this disjunction void add(Collection disjuncts); /// An iterator over the disjuncts Collection::iterator begin(); Collection::iterator end(); /// Create the Weight used to score us virtual WeightPtr createWeight(SearcherPtr searcher); /// Optimize our representation and our subqueries representations /// @param reader the IndexReader we query /// @return an optimized copy of us (which may not be a copy if there is nothing to optimize) virtual QueryPtr rewrite(IndexReaderPtr reader); /// Create a shallow copy of us - used in rewriting if necessary /// @return a copy of us (but reuse, don't copy, our subqueries) virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Adds all terms occurring in this query to the terms set. virtual void extractTerms(SetTerm terms); /// Pretty print us. /// @param field the field to which we are applied /// @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" virtual String toString(const String& field); /// @return true if other is a DisjunctionMaxQuery with the same boost and the same subqueries, in the /// same order, as us virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); friend class DisjunctionMaxWeight; }; } #endif LucenePlusPlus-rel_3.0.4/include/DisjunctionMaxScorer.h000066400000000000000000000050621217574114600232270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DISJUNCTIONMAXSCORER_H #define DISJUNCTIONMAXSCORER_H #include "Scorer.h" namespace Lucene { /// The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers /// is generated in document number order. The score for each document is the maximum of the scores computed /// by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores /// for the other subqueries that generate the document. class DisjunctionMaxScorer : public Scorer { public: DisjunctionMaxScorer(double tieBreakerMultiplier, SimilarityPtr similarity, Collection subScorers, int32_t numScorers); virtual ~DisjunctionMaxScorer(); LUCENE_CLASS(DisjunctionMaxScorer); protected: /// The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. Collection subScorers; int32_t numScorers; /// Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. double tieBreakerMultiplier; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); /// Determine the current document score. Initially invalid, until {@link #next()} is called the first time. /// @return the score of the current generated document virtual double score(); virtual int32_t advance(int32_t target); protected: /// Recursively iterate all subScorers that generated last doc computing sum and max void scoreAll(int32_t root, int32_t size, int32_t doc, Collection sum, Collection max); /// Organize subScorers into a min heap with scorers generating the earliest document on top. void heapify(); /// The subtree of subScorers at root is a min heap except possibly for its root element. Bubble the root /// down as required to make the subtree a heap. void heapAdjust(int32_t root); /// Remove the root Scorer from subScorers and re-establish it as a heap void heapRemoveRoot(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DisjunctionSumScorer.h000066400000000000000000000104521217574114600232450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DISJUNCTIONSUMSCORER_H #define DISJUNCTIONSUMSCORER_H #include "Scorer.h" namespace Lucene { /// A Scorer for OR like queries, counterpart of ConjunctionScorer. This Scorer implements {@link /// Scorer#skipTo(int32_t)} and uses skipTo() on the given Scorers. class DisjunctionSumScorer : public Scorer { public: DisjunctionSumScorer(Collection subScorers, int32_t minimumNrMatchers = 1); virtual ~DisjunctionSumScorer(); LUCENE_CLASS(DisjunctionSumScorer); protected: /// The number of subscorers. int32_t nrScorers; /// The subscorers. Collection subScorers; /// The minimum number of scorers that should match. int32_t minimumNrMatchers; /// The scorerDocQueue contains all subscorers ordered by their current doc(), with the minimum at /// the top. The scorerDocQueue is initialized the first time next() or skipTo() is called. An exhausted /// scorer is immediately removed from the scorerDocQueue. If less than the minimumNrMatchers scorers /// remain in the scorerDocQueue next() and skipTo() return false. /// /// After each to call to next() or skipTo() currentSumScore is the total score of the current matching doc, /// nrMatchers is the number of matching scorers, and all scorers are after the matching doc, or are exhausted. ScorerDocQueuePtr scorerDocQueue; /// The document number of the current match. int32_t currentDoc; /// The number of subscorers that provide the current match. int32_t _nrMatchers; double currentScore; public: virtual void initialize(); virtual void score(CollectorPtr collector); virtual int32_t nextDoc(); /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} /// is called the first time. virtual double score(); virtual int32_t docID(); /// Returns the number of subscorers matching the current document. Initially invalid, until {@link #next()} /// is called the first time. int32_t nrMatchers(); /// Advances to the first match beyond the current whose document number is greater than or equal to a given /// target. The implementation uses the skipTo() method on the subscorers. /// /// @param target The target document number. /// @return the document whose number is greater than or equal to the given target, or -1 if none exist. virtual int32_t advance(int32_t target); protected: /// Called the first time next() or skipTo() is called to initialize scorerDocQueue. void initScorerDocQueue(); /// Collects matching documents in a range. Hook for optimization. Note that {@link #next()} must be /// called once before this method is called for the first time. /// @param collector The collector to which all matching documents are passed through. /// @param max Do not score documents past this. /// @return true if more matching documents may remain. virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); /// Advance all subscorers after the current document determined by the top of the scorerDocQueue. Repeat /// until at least the minimum number of subscorers match on the same document and all subscorers are after /// that document or are exhausted. On entry the scorerDocQueue has at least minimumNrMatchers available. /// At least the scorer with the minimum document number will be advanced. /// @return true if there is a match. In case there is a match, currentDoc, currentSumScore and nrMatchers /// describe the match. bool advanceAfterCurrent(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocConsumer.h000066400000000000000000000016611217574114600213340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCCONSUMER_H #define DOCCONSUMER_H #include "LuceneObject.h" namespace Lucene { class DocConsumer : public LuceneObject { public: virtual ~DocConsumer(); LUCENE_CLASS(DocConsumer); public: virtual DocConsumerPerThreadPtr addThread(DocumentsWriterThreadStatePtr perThread) = 0; virtual void flush(Collection threads, SegmentWriteStatePtr state) = 0; virtual void closeDocStore(SegmentWriteStatePtr state) = 0; virtual void abort() = 0; virtual bool freeRAM() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/DocConsumerPerThread.h000066400000000000000000000017441217574114600231350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCCONSUMERPERTHREAD_H #define DOCCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class DocConsumerPerThread : public LuceneObject { public: virtual ~DocConsumerPerThread(); LUCENE_CLASS(DocConsumerPerThread); public: /// Process the document. If there is something for this document to be done in docID order, /// you should encapsulate that as a DocWriter and return it. /// DocumentsWriter then calls finish() on this object when it's its turn. virtual DocWriterPtr processDocument() = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/DocFieldConsumer.h000066400000000000000000000030471217574114600223000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMER_H #define DOCFIELDCONSUMER_H #include "LuceneObject.h" namespace Lucene { class DocFieldConsumer : public LuceneObject { public: virtual ~DocFieldConsumer(); LUCENE_CLASS(DocFieldConsumer); protected: FieldInfosPtr fieldInfos; public: /// Called when DocumentsWriter decides to create a new segment virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state) = 0; /// Called when DocumentsWriter decides to close the doc stores virtual void closeDocStore(SegmentWriteStatePtr state) = 0; /// Called when an aborting exception is hit virtual void abort() = 0; /// Add a new thread virtual DocFieldConsumerPerThreadPtr addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread) = 0; /// Called when DocumentsWriter is using too much RAM. The consumer should free RAM, if possible, returning /// true if any RAM was in fact freed. virtual bool freeRAM() = 0; virtual void setFieldInfos(FieldInfosPtr fieldInfos); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocFieldConsumerPerField.h000066400000000000000000000015261217574114600237130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERPERFIELD_H #define DOCFIELDCONSUMERPERFIELD_H #include "LuceneObject.h" namespace Lucene { class DocFieldConsumerPerField : public LuceneObject { public: virtual ~DocFieldConsumerPerField(); LUCENE_CLASS(DocFieldConsumerPerField); public: /// Processes all occurrences of a single field virtual void processFields(Collection fields, int32_t count) = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/DocFieldConsumerPerThread.h000066400000000000000000000015521217574114600240760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERPERTHREAD_H #define DOCFIELDCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class DocFieldConsumerPerThread : public LuceneObject { public: virtual ~DocFieldConsumerPerThread(); LUCENE_CLASS(DocFieldConsumerPerThread); public: virtual void startDocument() = 0; virtual DocWriterPtr finishDocument() = 0; virtual DocFieldConsumerPerFieldPtr addField(FieldInfoPtr fi) = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/DocFieldConsumers.h000066400000000000000000000045231217574114600224630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERS_H #define DOCFIELDCONSUMERS_H #include "DocFieldConsumer.h" #include "DocumentsWriter.h" namespace Lucene { /// This is just a "splitter" class: it lets you wrap two DocFieldConsumer instances as a single consumer. class DocFieldConsumers : public DocFieldConsumer { public: DocFieldConsumers(DocFieldConsumerPtr one, DocFieldConsumerPtr two); virtual ~DocFieldConsumers(); LUCENE_CLASS(DocFieldConsumers); public: DocFieldConsumerPtr one; DocFieldConsumerPtr two; Collection docFreeList; int32_t freeCount; int32_t allocCount; public: virtual void setFieldInfos(FieldInfosPtr fieldInfos); /// Called when DocumentsWriter decides to create a new segment virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state); /// Called when DocumentsWriter decides to close the doc stores virtual void closeDocStore(SegmentWriteStatePtr state); /// Called when DocumentsWriter is using too much RAM. virtual bool freeRAM(); /// Add a new thread virtual DocFieldConsumerPerThreadPtr addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread); DocFieldConsumersPerDocPtr getPerDoc(); void freePerDoc(DocFieldConsumersPerDocPtr perDoc); }; class DocFieldConsumersPerDoc : public DocWriter { public: DocFieldConsumersPerDoc(DocFieldConsumersPtr fieldConsumers); virtual ~DocFieldConsumersPerDoc(); LUCENE_CLASS(DocFieldConsumersPerDoc); protected: DocFieldConsumersWeakPtr _fieldConsumers; public: DocWriterPtr one; DocWriterPtr two; public: virtual int64_t sizeInBytes(); virtual void finish(); virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocFieldConsumersPerField.h000066400000000000000000000022321217574114600240710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERSPERFIELD_H #define DOCFIELDCONSUMERSPERFIELD_H #include "DocFieldConsumerPerField.h" namespace Lucene { class DocFieldConsumersPerField : public DocFieldConsumerPerField { public: DocFieldConsumersPerField(DocFieldConsumersPerThreadPtr perThread, DocFieldConsumerPerFieldPtr one, DocFieldConsumerPerFieldPtr two); virtual ~DocFieldConsumersPerField(); LUCENE_CLASS(DocFieldConsumersPerField); public: DocFieldConsumerPerFieldPtr one; DocFieldConsumerPerFieldPtr two; DocFieldConsumersPerThreadWeakPtr _perThread; public: /// Processes all occurrences of a single field virtual void processFields(Collection fields, int32_t count); virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocFieldConsumersPerThread.h000066400000000000000000000024311217574114600242560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERSPERTHREAD_H #define DOCFIELDCONSUMERSPERTHREAD_H #include "DocFieldConsumerPerThread.h" namespace Lucene { class DocFieldConsumersPerThread : public DocFieldConsumerPerThread { public: DocFieldConsumersPerThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread, DocFieldConsumersPtr parent, DocFieldConsumerPerThreadPtr one, DocFieldConsumerPerThreadPtr two); virtual ~DocFieldConsumersPerThread(); LUCENE_CLASS(DocFieldConsumersPerThread); public: DocFieldConsumerPerThreadPtr one; DocFieldConsumerPerThreadPtr two; DocFieldConsumersWeakPtr _parent; DocStatePtr docState; public: virtual void startDocument(); virtual void abort(); virtual DocWriterPtr finishDocument(); virtual DocFieldConsumerPerFieldPtr addField(FieldInfoPtr fi); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocFieldProcessor.h000066400000000000000000000027101217574114600224600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDPROCESSOR_H #define DOCFIELDPROCESSOR_H #include "DocConsumer.h" namespace Lucene { /// This is a DocConsumer that gathers all fields under the same name, and calls per-field consumers to process /// field by field. This class doesn't doesn't do any "real" work of its own: it just forwards the fields to a /// DocFieldConsumer. class DocFieldProcessor : public DocConsumer { public: DocFieldProcessor(DocumentsWriterPtr docWriter, DocFieldConsumerPtr consumer); virtual ~DocFieldProcessor(); LUCENE_CLASS(DocFieldProcessor); public: DocumentsWriterWeakPtr _docWriter; FieldInfosPtr fieldInfos; DocFieldConsumerPtr consumer; StoredFieldsWriterPtr fieldsWriter; public: virtual void closeDocStore(SegmentWriteStatePtr state); virtual void flush(Collection threads, SegmentWriteStatePtr state); virtual void abort(); virtual bool freeRAM(); virtual DocConsumerPerThreadPtr addThread(DocumentsWriterThreadStatePtr perThread); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocFieldProcessorPerField.h000066400000000000000000000021271217574114600240750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDPROCESSORPERFIELD_H #define DOCFIELDPROCESSORPERFIELD_H #include "LuceneObject.h" namespace Lucene { /// Holds all per thread, per field state. class DocFieldProcessorPerField : public LuceneObject { public: DocFieldProcessorPerField(DocFieldProcessorPerThreadPtr perThread, FieldInfoPtr fieldInfo); virtual ~DocFieldProcessorPerField(); LUCENE_CLASS(DocFieldProcessorPerField); public: DocFieldConsumerPerFieldPtr consumer; FieldInfoPtr fieldInfo; DocFieldProcessorPerFieldPtr next; int32_t lastGen; int32_t fieldCount; Collection fields; public: virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocFieldProcessorPerThread.h000066400000000000000000000055521217574114600242660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDPROCESSORPERTHREAD_H #define DOCFIELDPROCESSORPERTHREAD_H #include "DocConsumerPerThread.h" #include "DocumentsWriter.h" namespace Lucene { /// Gathers all Fieldables for a document under the same name, updates FieldInfos, and calls per-field /// consumers to process field by field. /// /// Currently, only a single thread visits the fields, sequentially, for processing. class DocFieldProcessorPerThread : public DocConsumerPerThread { public: DocFieldProcessorPerThread(DocumentsWriterThreadStatePtr threadState, DocFieldProcessorPtr docFieldProcessor); virtual ~DocFieldProcessorPerThread(); LUCENE_CLASS(DocFieldProcessorPerThread); public: double docBoost; int32_t fieldGen; DocFieldProcessorWeakPtr _docFieldProcessor; FieldInfosPtr fieldInfos; DocFieldConsumerPerThreadPtr consumer; Collection _fields; // Holds all fields seen in current doc int32_t fieldCount; Collection fieldHash; // Hash table for all fields ever seen int32_t hashMask; int32_t totalFieldCount; StoredFieldsWriterPerThreadPtr fieldsWriter; DocStatePtr docState; Collection docFreeList; int32_t freeCount; int32_t allocCount; public: virtual void initialize(); virtual void abort(); Collection fields(); // If there are fields we've seen but did not see again in the last run, then free them up. void trimFields(SegmentWriteStatePtr state); virtual DocWriterPtr processDocument(); DocFieldProcessorPerThreadPerDocPtr getPerDoc(); void freePerDoc(DocFieldProcessorPerThreadPerDocPtr perDoc); protected: void rehash(); }; class DocFieldProcessorPerThreadPerDoc : public DocWriter { public: DocFieldProcessorPerThreadPerDoc(DocFieldProcessorPerThreadPtr docProcessor); virtual ~DocFieldProcessorPerThreadPerDoc(); LUCENE_CLASS(DocFieldProcessorPerThreadPerDoc); public: DocWriterPtr one; DocWriterPtr two; protected: DocFieldProcessorPerThreadWeakPtr _docProcessor; public: virtual int64_t sizeInBytes(); virtual void finish(); virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocIdBitSet.h000066400000000000000000000022501217574114600212030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCIDBITSET_H #define DOCIDBITSET_H #include "DocIdSet.h" namespace Lucene { /// Simple DocIdSet and DocIdSetIterator backed by a BitSet class LPPAPI DocIdBitSet : public DocIdSet { public: DocIdBitSet(); DocIdBitSet(BitSetPtr bitSet); virtual ~DocIdBitSet(); LUCENE_CLASS(DocIdBitSet); protected: BitSetPtr bitSet; public: virtual DocIdSetIteratorPtr iterator(); /// This DocIdSet implementation is cacheable. virtual bool isCacheable(); /// Returns the underlying BitSet. BitSetPtr getBitSet(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocIdSet.h000066400000000000000000000027731217574114600205560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCIDSET_H #define DOCIDSET_H #include "DocIdSetIterator.h" namespace Lucene { /// A DocIdSet contains a set of doc ids. Implementing classes must only implement {@link #iterator} to /// provide access to the set. class LPPAPI DocIdSet : public LuceneObject { public: virtual ~DocIdSet(); LUCENE_CLASS(DocIdSet); public: /// Provides a {@link DocIdSetIterator} to access the set. This implementation can return null or /// {@link #EmptyDocIdSet}.iterator() if there are no docs that match. virtual DocIdSetIteratorPtr iterator() = 0; /// This method is a hint for {@link CachingWrapperFilter}, if this DocIdSet should be cached without /// copying it into a BitSet. The default is to return false. If you have an own DocIdSet implementation /// that does its iteration very effective and fast without doing disk I/O, override this method and /// return true. virtual bool isCacheable(); /// An empty {@code DocIdSet} instance for easy use, eg. in Filters that hit no documents. static DocIdSetPtr EMPTY_DOCIDSET(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocIdSetIterator.h000066400000000000000000000062261217574114600222650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCIDSETITERATOR_H #define DOCIDSETITERATOR_H #include "LuceneObject.h" namespace Lucene { /// This abstract class defines methods to iterate over a set of non-decreasing doc ids. Note that this class /// assumes it iterates on doc Ids, and therefore {@link #NO_MORE_DOCS} is set to {@value #NO_MORE_DOCS} in order to /// be used as a sentinel object. Implementations of this class are expected to consider INT_MAX as an invalid value. class LPPAPI DocIdSetIterator : public LuceneObject { public: virtual ~DocIdSetIterator(); LUCENE_CLASS(DocIdSetIterator); public: /// When returned by {@link #nextDoc()}, {@link #advance(int)} and {@link #docID()} it means there are no more /// docs in the iterator. static const int32_t NO_MORE_DOCS; public: /// Returns the following: ///
    ///
  • -1 or {@link #NO_MORE_DOCS} if {@link #nextDoc()} or {@link #advance(int)} were not called yet. ///
  • {@link #NO_MORE_DOCS} if the iterator has exhausted. ///
  • Otherwise it should return the doc ID it is currently on. ///
virtual int32_t docID() = 0; /// Advances to the next document in the set and returns the doc it is currently on, or {@link #NO_MORE_DOCS} /// if there are no more docs in the set. /// /// NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted /// behaviour. virtual int32_t nextDoc() = 0; /// Advances to the first beyond the current whose document number is greater than or equal to target. Returns /// the current document number or {@link #NO_MORE_DOCS} if there are no more docs in the set. /// /// Behaves as if written: /// ///
        /// int32_t advance(int32_t target)
        /// {
        ///     int32_t doc;
        ///     while ((doc = nextDoc()) < target)
        ///     { }
        ///     return doc;
        /// }
        /// 
/// /// Some implementations are considerably more efficient than that. /// /// NOTE: certain implementations may return a different value (each time) if called several times in a row /// with the same target. /// /// NOTE: this method may be called with {@value #NO_MORE_DOCS} for efficiency by some Scorers. If your /// implementation cannot efficiently determine that it should exhaust, it is recommended that you check for /// that value in each call to this method. /// /// NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted /// behaviour. virtual int32_t advance(int32_t target) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/DocInverter.h000066400000000000000000000033261217574114600213370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCINVERTER_H #define DOCINVERTER_H #include "DocFieldConsumer.h" namespace Lucene { /// This is a DocFieldConsumer that inverts each field, separately, from a Document, and accepts a /// InvertedTermsConsumer to process those terms. class DocInverter : public DocFieldConsumer { public: DocInverter(InvertedDocConsumerPtr consumer, InvertedDocEndConsumerPtr endConsumer); virtual ~DocInverter(); LUCENE_CLASS(DocInverter); public: InvertedDocConsumerPtr consumer; InvertedDocEndConsumerPtr endConsumer; public: virtual void setFieldInfos(FieldInfosPtr fieldInfos); /// Called when DocumentsWriter decides to create a new segment virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state); /// Called when DocumentsWriter decides to close the doc stores virtual void closeDocStore(SegmentWriteStatePtr state); /// Called when an aborting exception is hit virtual void abort(); /// Called when DocumentsWriter is using too much RAM. virtual bool freeRAM(); /// Add a new thread virtual DocFieldConsumerPerThreadPtr addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocInverterPerField.h000066400000000000000000000030761217574114600227540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCINVERTERPERFIELD_H #define DOCINVERTERPERFIELD_H #include "DocFieldConsumerPerField.h" namespace Lucene { /// Holds state for inverting all occurrences of a single field in the document. This class doesn't do /// anything itself; instead, it forwards the tokens produced by analysis to its own consumer /// (InvertedDocConsumerPerField). It also interacts with an endConsumer (InvertedDocEndConsumerPerField). class DocInverterPerField : public DocFieldConsumerPerField { public: DocInverterPerField(DocInverterPerThreadPtr perThread, FieldInfoPtr fieldInfo); virtual ~DocInverterPerField(); LUCENE_CLASS(DocInverterPerField); protected: DocInverterPerThreadWeakPtr _perThread; FieldInfoPtr fieldInfo; public: InvertedDocConsumerPerFieldPtr consumer; InvertedDocEndConsumerPerFieldPtr endConsumer; DocStatePtr docState; FieldInvertStatePtr fieldState; public: virtual void initialize(); virtual void abort(); /// Processes all occurrences of a single field virtual void processFields(Collection fields, int32_t count); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocInverterPerThread.h000066400000000000000000000040231217574114600231310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCINVERTERPERTHREAD_H #define DOCINVERTERPERTHREAD_H #include "DocFieldConsumerPerThread.h" #include "AttributeSource.h" namespace Lucene { /// This is a DocFieldConsumer that inverts each field, separately, from a Document, and accepts a /// InvertedTermsConsumer to process those terms. class DocInverterPerThread : public DocFieldConsumerPerThread { public: DocInverterPerThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread, DocInverterPtr docInverter); virtual ~DocInverterPerThread(); LUCENE_CLASS(DocInverterPerThread); public: DocInverterWeakPtr _docInverter; InvertedDocConsumerPerThreadPtr consumer; InvertedDocEndConsumerPerThreadPtr endConsumer; SingleTokenAttributeSourcePtr singleToken; DocStatePtr docState; FieldInvertStatePtr fieldState; /// Used to read a string value for a field ReusableStringReaderPtr stringReader; public: virtual void initialize(); virtual void startDocument(); virtual DocWriterPtr finishDocument(); virtual void abort(); virtual DocFieldConsumerPerFieldPtr addField(FieldInfoPtr fi); }; class SingleTokenAttributeSource : public AttributeSource { public: SingleTokenAttributeSource(); virtual ~SingleTokenAttributeSource(); LUCENE_CLASS(SingleTokenAttributeSource); public: TermAttributePtr termAttribute; OffsetAttributePtr offsetAttribute; public: void reinit(const String& stringValue, int32_t startOffset, int32_t endOffset); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocValues.h000066400000000000000000000100111217574114600207650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCVALUES_H #define DOCVALUES_H #include "LuceneObject.h" namespace Lucene { /// Represents field values as different types. Normally created via a {@link ValueSuorce} for a /// particular field and reader. /// /// DocValues is distinct from ValueSource because there needs to be an object created at query /// evaluation time that is not referenced by the query itself because: /// - Query objects should be MT safe /// - For caching, Query objects are often used as keys... you don't want the Query carrying around /// big objects class LPPAPI DocValues : public LuceneObject { public: DocValues(); virtual ~DocValues(); LUCENE_CLASS(DocValues); protected: double minVal; double maxVal; double avgVal; bool computed; public: using LuceneObject::toString; /// Return doc value as a double. /// Mandatory: every DocValues implementation must implement at least this method. /// @param doc document whose double value is requested. virtual double doubleVal(int32_t doc) = 0; /// Return doc value as an int. /// Optional: DocValues implementation can (but don't have to) override this method. /// @param doc document whose int value is requested. virtual int32_t intVal(int32_t doc); /// Return doc value as a long. /// Optional: DocValues implementation can (but don't have to) override this method. /// @param doc document whose long value is requested. virtual int64_t longVal(int32_t doc); /// Return doc value as a string. /// Optional: DocValues implementation can (but don't have to) override this method. /// @param doc document whose string value is requested. virtual String strVal(int32_t doc); /// Return a string representation of a doc value, as required for Explanations. virtual String toString(int32_t doc) = 0; /// Explain the scoring value for the input doc. virtual ExplanationPtr explain(int32_t doc); /// For test purposes only, return the inner array of values, or null if not applicable. /// /// Allows tests to verify that loaded values are: ///
    ///
  1. indeed cached/reused. ///
  2. stored in the expected size/type (byte/short/int/float). ///
/// /// Note: implementations of DocValues must override this method for these test elements to be tested, /// Otherwise the test would not fail, just print a warning. virtual CollectionValue getInnerArray(); /// Returns the minimum of all values or NaN if this DocValues instance does not contain any value. /// This operation is optional /// @return the minimum of all values or NaN if this DocValues instance does not contain any value. virtual double getMinValue(); /// Returns the maximum of all values or NaN if this DocValues instance does not contain any value. /// This operation is optional /// @return the maximum of all values or NaN if this DocValues instance does not contain any value. virtual double getMaxValue(); /// Returns the average of all values or NaN if this DocValues instance does not contain any value. /// This operation is optional /// @return the average of all values or NaN if this DocValues instance does not contain any value. virtual double getAverageValue(); protected: /// Compute optional values void compute(); }; } #endif LucenePlusPlus-rel_3.0.4/include/Document.h000066400000000000000000000171761217574114600207010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCUMENT_H #define DOCUMENT_H #include "LuceneObject.h" namespace Lucene { /// Documents are the unit of indexing and search. /// /// A Document is a set of fields. Each field has a name and a textual value. A field may be {@link /// Fieldable#isStored() stored} with the document, in which case it is returned with search hits on the /// document. Thus each document should typically contain one or more stored fields which uniquely /// identify it. /// /// Note that fields which are not {@link Fieldable#isStored() stored} are not available in documents /// retrieved from the index, eg. with {@link ScoreDoc#doc}, {@link Searcher#doc(int)} or {@link /// IndexReader#document(int)}. class LPPAPI Document : public LuceneObject { public: /// Constructs a new document with no fields. Document(); virtual ~Document(); LUCENE_CLASS(Document); protected: Collection fields; double boost; public: /// Sets a boost factor for hits on any field of this document. This value will be multiplied into the /// score of all hits on this document. /// /// The default value is 1.0. /// /// Values are multiplied into the value of {@link Fieldable#getBoost()} of each field in this document. /// Thus, this method in effect sets a default boost for the fields of this document. /// /// @see Fieldable#setBoost(double) void setBoost(double boost); /// Returns, at indexing time, the boost factor as set by {@link #setBoost(double)}. /// /// Note that once a document is indexed this value is no longer available from the index. At search time, /// for retrieved documents, this method always returns 1. This however does not mean that the boost value /// set at indexing time was ignored - it was just combined with other indexing time factors and stored /// elsewhere, for better indexing and search performance. (For more information see the "norm(t,d)" part /// of the scoring formula in {@link Similarity}.) /// /// @see #setBoost(double) double getBoost(); /// Adds a field to a document. Several fields may be added with the same name. In this case, if the fields /// are indexed, their text is treated as though appended for the purposes of search. /// /// Note that add like the removeField(s) methods only makes sense prior to adding a document to an index. /// These methods cannot be used to change the content of an existing index! In order to achieve this, a /// document has to be deleted from an index and a new changed version of that document has to be added. void add(FieldablePtr field); /// Removes field with the specified name from the document. If multiple fields exist with this name, this /// method removes the first field that has been added. If there is no field with the specified name, the /// document remains unchanged. /// /// Note that the removeField(s) methods like the add method only make sense prior to adding a document to /// an index. These methods cannot be used to change the content of an existing index! In order to achieve /// this, a document has to be deleted from an index and a new changed version of that document has to be added. void removeField(const String& name); /// Removes all fields with the given name from the document. If there is no field with the specified name, /// the document remains unchanged. /// /// Note that the removeField(s) methods like the add method only make sense prior to adding a document to an /// index. These methods cannot be used to change the content of an existing index! In order to achieve this, /// a document has to be deleted from an index and a new changed version of that document has to be added. void removeFields(const String& name); /// Returns a field with the given name if any exist in this document, or null. If multiple fields exists with /// this name, this method returns the first value added. /// Do not use this method with lazy loaded fields. FieldPtr getField(const String& name); /// Returns a field with the given name if any exist in this document, or null. If multiple fields exists with /// this name, this method returns the first value added. FieldablePtr getFieldable(const String& name); /// Returns the string value of the field with the given name if any exist in this document, or null. If multiple /// fields exist with this name, this method returns the first value added. If only binary fields with this name /// exist, returns null. String get(const String& name); /// Returns a List of all the fields in a document. /// /// Note that fields which are not {@link Fieldable#isStored() stored} are not available in documents /// retrieved from the index, eg. {@link Searcher#doc(int)} or {@link IndexReader#document(int)}. Collection getFields(); /// Returns an array of {@link Field}s with the given name. Do not use with lazy loaded fields. This method /// returns an empty array when there are no matching fields. It never returns null. /// @param name the name of the field /// @return a Field[] array Collection getFields(const String& name); /// Returns an array of {@link Fieldable}s with the given name. /// This method returns an empty array when there are no matching fields. It never returns null. /// @param name the name of the field /// @return a Fieldable[] array Collection getFieldables(const String& name); /// Returns an array of values of the field specified as the method parameter. /// This method returns an empty array when there are no matching fields. It never returns null. /// @param name the name of the field /// @return a String[] of field values Collection getValues(const String& name); /// Returns an array of byte arrays for of the fields that have the name specified as the method parameter. /// This method returns an empty array when there are no matching fields. It never returns null. /// @param name the name of the field /// @return a byte[][] of binary field values Collection getBinaryValues(const String& name); /// Returns an array of bytes for the first (or only) field that has the name specified as the method parameter. /// This method will return null if no binary fields with the specified name are available. There may be /// non-binary fields with the same name. /// @param name the name of the field. /// @return a byte[] containing the binary field value or null ByteArray getBinaryValue(const String& name); /// Returns a string representation of the object virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocumentsWriter.h000066400000000000000000000520041217574114600222460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCUMENTSWRITER_H #define DOCUMENTSWRITER_H #include "ByteBlockPool.h" #include "RAMFile.h" namespace Lucene { /// This class accepts multiple added documents and directly writes a single segment file. It does this more /// efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on /// those segments. /// /// Each added document is passed to the {@link DocConsumer}, which in turn processes the document and interacts /// with other consumers in the indexing chain. Certain consumers, like {@link StoredFieldsWriter} and {@link /// TermVectorsTermsWriter}, digest a document and immediately write bytes to the "doc store" files (ie, /// they do not consume RAM per document, except while they are processing the document). /// /// Other consumers, eg {@link FreqProxTermsWriter} and {@link NormsWriter}, buffer bytes in RAM and flush only /// when a new segment is produced. /// /// Once we have used our allowed RAM buffer, or the number of added docs is large enough (in the case we are /// flushing by doc count instead of RAM usage), we create a real segment and flush it to the Directory. /// /// Threads: /// Multiple threads are allowed into addDocument at once. There is an initial synchronized call to /// getThreadState which allocates a ThreadState for this thread. The same thread will get the same ThreadState /// over time (thread affinity) so that if there are consistent patterns (for example each thread is indexing a /// different content source) then we make better use of RAM. Then processDocument is called on that ThreadState /// without synchronization (most of the "heavy lifting" is in this call). Finally the synchronized /// "finishDocument" is called to flush changes to the directory. /// /// When flush is called by IndexWriter we forcefully idle all threads and flush only once they are all idle. /// This means you can call flush with a given thread even while other threads are actively adding/deleting /// documents. /// /// Exceptions: /// Because this class directly updates in-memory posting lists, and flushes stored fields and term vectors /// directly to files in the directory, there are certain limited times when an exception can corrupt this state. /// For example, a disk full while flushing stored fields leaves this file in a corrupt state. Or, an /// std::bad_alloc exception while appending to the in-memory posting lists can corrupt that posting list. /// We call such exceptions "aborting exceptions". In these cases we must call abort() to discard all docs added /// since the last flush. /// /// All other exceptions ("non-aborting exceptions") can still partially update the index structures. These /// updates are consistent, but, they represent only a part of the document seen up until the exception was hit. /// When this happens, we immediately mark the document as deleted so that the document is always atomically /// ("all or none") added to the index. class DocumentsWriter : public LuceneObject { public: DocumentsWriter(DirectoryPtr directory, IndexWriterPtr writer, IndexingChainPtr indexingChain); virtual ~DocumentsWriter(); LUCENE_CLASS(DocumentsWriter); protected: String docStoreSegment; // Current doc-store segment we are writing int32_t docStoreOffset; // Current starting doc-store offset of current segment int32_t nextDocID; // Next docID to be added int32_t numDocsInRAM; // # docs buffered in RAM /// Max # ThreadState instances; if there are more threads than this they share ThreadStates static const int32_t MAX_THREAD_STATE; Collection threadStates; MapThreadDocumentsWriterThreadState threadBindings; int32_t pauseThreads; // Non-zero when we need all threads to pause (eg to flush) bool aborting; // True if an abort is pending DocFieldProcessorPtr docFieldProcessor; /// Deletes done after the last flush; these are discarded on abort BufferedDeletesPtr deletesInRAM; /// Deletes done before the last flush; these are still kept on abort BufferedDeletesPtr deletesFlushed; /// The max number of delete terms that can be buffered before they must be flushed to disk. int32_t maxBufferedDeleteTerms; /// How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead. int64_t ramBufferSize; int64_t waitQueuePauseBytes; int64_t waitQueueResumeBytes; /// If we've allocated 5% over our RAM budget, we then free down to 95% int64_t freeTrigger; int64_t freeLevel; /// Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead. int32_t maxBufferedDocs; /// How many docs already flushed to index int32_t flushedDocCount; bool closed; /// List of files that were written before last abort() HashSet _abortedFiles; SegmentWriteStatePtr flushState; Collection freeIntBlocks; Collection freeCharBlocks; public: /// Coarse estimates used to measure RAM usage of buffered deletes static const int32_t OBJECT_HEADER_BYTES; static const int32_t POINTER_NUM_BYTE; static const int32_t INT_NUM_BYTE; static const int32_t CHAR_NUM_BYTE; /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object /// with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is /// object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since /// it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). /// BufferedDeletes.num is OBJ_HEADER + INT. static const int32_t BYTES_PER_DEL_TERM; /// Rough logic: del docIDs are List. Say list allocates ~2X size (2*POINTER). Integer is /// OBJ_HEADER + int static const int32_t BYTES_PER_DEL_DOCID; /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object /// with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount /// (say 24 bytes). Integer is OBJ_HEADER + INT. static const int32_t BYTES_PER_DEL_QUERY; /// Initial chunks size of the shared byte[] blocks used to store postings data static const int32_t BYTE_BLOCK_SHIFT; static const int32_t BYTE_BLOCK_SIZE; static const int32_t BYTE_BLOCK_MASK; static const int32_t BYTE_BLOCK_NOT_MASK; /// Initial chunk size of the shared char[] blocks used to store term text static const int32_t CHAR_BLOCK_SHIFT; static const int32_t CHAR_BLOCK_SIZE; static const int32_t CHAR_BLOCK_MASK; static const int32_t MAX_TERM_LENGTH; /// Initial chunks size of the shared int[] blocks used to store postings data static const int32_t INT_BLOCK_SHIFT; static const int32_t INT_BLOCK_SIZE; static const int32_t INT_BLOCK_MASK; static const int32_t PER_DOC_BLOCK_SIZE; INTERNAL: IndexWriterWeakPtr _writer; DirectoryPtr directory; IndexingChainPtr indexingChain; String segment; // Current segment we are working on int32_t numDocsInStore; // # docs written to doc stores bool flushPending; // True when a thread has decided to flush bool bufferIsFull; // True when it's time to write segment InfoStreamPtr infoStream; int32_t maxFieldLength; SimilarityPtr similarity; DocConsumerPtr consumer; HashSet _openFiles; HashSet _closedFiles; WaitQueuePtr waitQueue; SkipDocWriterPtr skipDocWriter; ByteBlockAllocatorPtr byteBlockAllocator; ByteBlockAllocatorPtr perDocAllocator; int64_t numBytesAlloc; int64_t numBytesUsed; // used only by assert TermPtr lastDeleteTerm; public: virtual void initialize(); /// Create and return a new DocWriterBuffer. PerDocBufferPtr newPerDocBuffer(); static IndexingChainPtr getDefaultIndexingChain(); void updateFlushedDocCount(int32_t n); int32_t getFlushedDocCount(); void setFlushedDocCount(int32_t n); /// Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false bool hasProx(); /// If non-null, various details of indexing are printed here. void setInfoStream(InfoStreamPtr infoStream); void setMaxFieldLength(int32_t maxFieldLength); void setSimilarity(SimilarityPtr similarity); /// Set how much RAM we can use before flushing. void setRAMBufferSizeMB(double mb); double getRAMBufferSizeMB(); /// Set max buffered docs, which means we will flush by doc count instead of by RAM usage. void setMaxBufferedDocs(int32_t count); int32_t getMaxBufferedDocs(); /// Get current segment name we are writing. String getSegment(); /// Returns how many docs are currently buffered in RAM. int32_t getNumDocsInRAM(); /// Returns the current doc store segment we are writing to. String getDocStoreSegment(); /// Returns the doc offset into the shared doc store for the current buffered docs. int32_t getDocStoreOffset(); /// Closes the current open doc stores an returns the doc store segment name. This returns null if there /// are no buffered documents. String closeDocStore(); HashSet abortedFiles(); void message(const String& message); /// Returns Collection of files in use by this instance, including any flushed segments. HashSet openFiles(); HashSet closedFiles(); void addOpenFile(const String& name); void removeOpenFile(const String& name); void setAborting(); /// Called if we hit an exception at a bad time (when updating the index files) and must discard all /// currently buffered docs. This resets our state, discarding any docs added since last flush. void abort(); /// Returns true if an abort is in progress bool pauseAllThreads(); void resumeAllThreads(); bool anyChanges(); void initFlushState(bool onlyDocStore); /// Flush all pending docs to a new segment int32_t flush(bool _closeDocStore); HashSet getFlushedFiles(); /// Build compound file for the segment we just flushed void createCompoundFile(const String& segment); /// Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter /// to trigger a single flush even when multiple threads are trying to do so. bool setFlushPending(); void clearFlushPending(); void pushDeletes(); void close(); void initSegmentName(bool onlyDocStore); /// Returns a free (idle) ThreadState that may be used for indexing this one document. This call also /// pauses if a flush is pending. If delTerm is non-null then we buffer this deleted term after the /// thread state has been acquired. DocumentsWriterThreadStatePtr getThreadState(DocumentPtr doc, TermPtr delTerm); /// Returns true if the caller (IndexWriter) should now flush. bool addDocument(DocumentPtr doc, AnalyzerPtr analyzer); bool updateDocument(TermPtr t, DocumentPtr doc, AnalyzerPtr analyzer); bool updateDocument(DocumentPtr doc, AnalyzerPtr analyzer, TermPtr delTerm); int32_t getNumBufferedDeleteTerms(); // for testing MapTermNum getBufferedDeleteTerms(); // for testing /// Called whenever a merge has completed and the merged segments had deletions void remapDeletes(SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergeDocCount); bool bufferDeleteTerms(Collection terms); bool bufferDeleteTerm(TermPtr term); bool bufferDeleteQueries(Collection queries); bool bufferDeleteQuery(QueryPtr query); bool deletesFull(); bool doApplyDeletes(); void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms); int32_t getMaxBufferedDeleteTerms(); bool hasDeletes(); bool applyDeletes(SegmentInfosPtr infos); bool doBalanceRAM(); void waitForWaitQueue(); int64_t getRAMUsed(); IntArray getIntBlock(bool trackAllocations); void bytesAllocated(int64_t numBytes); void bytesUsed(int64_t numBytes); void recycleIntBlocks(Collection blocks, int32_t start, int32_t end); CharArray getCharBlock(); void recycleCharBlocks(Collection blocks, int32_t numBlocks); String toMB(int64_t v); /// We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds /// characters in the term) and per-doc buffers (stored fields/term vectors). Different docs require /// varying amount of storage from these four classes. /// /// For example, docs with many unique single-occurrence short terms will use up the Postings /// RAM and hardly any of the other two. Whereas docs with very large terms will use alot of char blocks /// RAM and relatively less of the other two. This method just frees allocations from the pools once we /// are over-budget, which balances the pools to match the current docs. void balanceRAM(); protected: /// Reset after a flush void doAfterFlush(); bool allThreadsIdle(); void waitReady(DocumentsWriterThreadStatePtr state); bool timeToFlushDeletes(); // used only by assert bool checkDeleteTerm(TermPtr term); bool applyDeletes(IndexReaderPtr reader, int32_t docIDStart); void addDeleteTerm(TermPtr term, int32_t docCount); /// Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a document void addDeleteDocID(int32_t docID); void addDeleteQuery(QueryPtr query, int32_t docID); /// Does the synchronized work to finish/flush the inverted document. void finishDocument(DocumentsWriterThreadStatePtr perThread, DocWriterPtr docWriter); friend class WaitQueue; }; class DocState : public LuceneObject { public: DocState(); virtual ~DocState(); LUCENE_CLASS(DocState); public: DocumentsWriterWeakPtr _docWriter; AnalyzerPtr analyzer; int32_t maxFieldLength; InfoStreamPtr infoStream; SimilarityPtr similarity; int32_t docID; DocumentPtr doc; String maxTermPrefix; public: /// Only called by asserts virtual bool testPoint(const String& name); void clear(); }; /// RAMFile buffer for DocWriters. class PerDocBuffer : public RAMFile { public: PerDocBuffer(DocumentsWriterPtr docWriter); virtual ~PerDocBuffer(); LUCENE_CLASS(PerDocBuffer); protected: DocumentsWriterWeakPtr _docWriter; public: /// Recycle the bytes used. void recycle(); protected: /// Allocate bytes used from shared pool. virtual ByteArray newBuffer(int32_t size); }; /// Consumer returns this on each doc. This holds any state that must be flushed synchronized /// "in docID order". We gather these and flush them in order. class DocWriter : public LuceneObject { public: DocWriter(); virtual ~DocWriter(); LUCENE_CLASS(DocWriter); public: DocWriterPtr next; int32_t docID; public: virtual void finish() = 0; virtual void abort() = 0; virtual int64_t sizeInBytes() = 0; virtual void setNext(DocWriterPtr next); }; /// The IndexingChain must define the {@link #getChain(DocumentsWriter)} method which returns the DocConsumer /// that the DocumentsWriter calls to process the documents. class IndexingChain : public LuceneObject { public: virtual ~IndexingChain(); LUCENE_CLASS(IndexingChain); public: virtual DocConsumerPtr getChain(DocumentsWriterPtr documentsWriter) = 0; }; /// This is the current indexing chain: /// DocConsumer / DocConsumerPerThread /// --> code: DocFieldProcessor / DocFieldProcessorPerThread /// --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField /// --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField /// --> code: DocInverter / DocInverterPerThread / DocInverterPerField /// --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField /// --> code: TermsHash / TermsHashPerThread / TermsHashPerField /// --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField /// --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField /// --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField /// --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField /// --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField /// --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField class DefaultIndexingChain : public IndexingChain { public: virtual ~DefaultIndexingChain(); LUCENE_CLASS(DefaultIndexingChain); public: virtual DocConsumerPtr getChain(DocumentsWriterPtr documentsWriter); }; class SkipDocWriter : public DocWriter { public: virtual ~SkipDocWriter(); LUCENE_CLASS(SkipDocWriter); public: virtual void finish(); virtual void abort(); virtual int64_t sizeInBytes(); }; class WaitQueue : public LuceneObject { public: WaitQueue(DocumentsWriterPtr docWriter); virtual ~WaitQueue(); LUCENE_CLASS(WaitQueue); protected: DocumentsWriterWeakPtr _docWriter; public: Collection waiting; int32_t nextWriteDocID; int32_t nextWriteLoc; int32_t numWaiting; int64_t waitingBytes; public: void reset(); bool doResume(); bool doPause(); void abort(); bool add(DocWriterPtr doc); protected: void writeDocument(DocWriterPtr doc); }; class ByteBlockAllocator : public ByteBlockPoolAllocatorBase { public: ByteBlockAllocator(DocumentsWriterPtr docWriter, int32_t blockSize); virtual ~ByteBlockAllocator(); LUCENE_CLASS(ByteBlockAllocator); protected: DocumentsWriterWeakPtr _docWriter; public: int32_t blockSize; Collection freeByteBlocks; public: /// Allocate another byte[] from the shared pool virtual ByteArray getByteBlock(bool trackAllocations); /// Return byte[]'s to the pool virtual void recycleByteBlocks(Collection blocks, int32_t start, int32_t end); virtual void recycleByteBlocks(Collection blocks); }; } #endif LucenePlusPlus-rel_3.0.4/include/DocumentsWriterThreadState.h000066400000000000000000000025571217574114600244070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCUMENTSWRITERTHREADSTATE_H #define DOCUMENTSWRITERTHREADSTATE_H #include "LuceneObject.h" namespace Lucene { /// Used by DocumentsWriter to maintain per-thread state. /// We keep a separate Posting hash and other state for each thread and then merge postings /// hashes from all threads when writing the segment. class DocumentsWriterThreadState : public LuceneObject { public: DocumentsWriterThreadState(DocumentsWriterPtr docWriter); virtual ~DocumentsWriterThreadState(); LUCENE_CLASS(DocumentsWriterThreadState); public: bool isIdle; // false if this is currently in use by a thread int32_t numThreads; // Number of threads that share this instance bool doFlushAfter; // true if we should flush after processing current doc DocConsumerPerThreadPtr consumer; DocStatePtr docState; DocumentsWriterWeakPtr _docWriter; public: virtual void initialize(); void doAfterFlush(); }; } #endif LucenePlusPlus-rel_3.0.4/include/DoubleFieldSource.h000066400000000000000000000045741217574114600224600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOUBLEFIELDSOURCE_H #define DOUBLEFIELDSOURCE_H #include "FieldCacheSource.h" #include "DocValues.h" namespace Lucene { /// Obtains double field values from the {@link FieldCache} using getDoubles() and makes those values available /// as other numeric types, casting as needed. /// /// @see FieldCacheSource for requirements on the field. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU /// per lookup but will not consume double the FieldCache RAM. class DoubleFieldSource : public FieldCacheSource { public: /// Create a cached double field source with a specific string-to-double parser. DoubleFieldSource(const String& field, DoubleParserPtr parser = DoubleParserPtr()); virtual ~DoubleFieldSource(); LUCENE_CLASS(DoubleFieldSource); protected: DoubleParserPtr parser; public: virtual String description(); virtual DocValuesPtr getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader); virtual bool cachedFieldSourceEquals(FieldCacheSourcePtr other); virtual int32_t cachedFieldSourceHashCode(); }; class DoubleDocValues : public DocValues { public: DoubleDocValues(DoubleFieldSourcePtr source, Collection arr); virtual ~DoubleDocValues(); LUCENE_CLASS(DoubleDocValues); protected: DoubleFieldSourceWeakPtr _source; Collection arr; public: virtual double doubleVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ExactPhraseScorer.h000066400000000000000000000014631217574114600225000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef EXACTPHRASESCORER_H #define EXACTPHRASESCORER_H #include "PhraseScorer.h" namespace Lucene { class ExactPhraseScorer : public PhraseScorer { public: ExactPhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, ByteArray norms); virtual ~ExactPhraseScorer(); LUCENE_CLASS(ExactPhraseScorer); protected: virtual double phraseFreq(); }; } #endif LucenePlusPlus-rel_3.0.4/include/Explanation.h000066400000000000000000000055301217574114600213740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef EXPLANATION_H #define EXPLANATION_H #include "LuceneObject.h" namespace Lucene { /// Describes the score computation for document and query. class LPPAPI Explanation : public LuceneObject { public: Explanation(double value = 0, const String& description = EmptyString); virtual ~Explanation(); LUCENE_CLASS(Explanation); protected: double value; // the value of this node String description; // what it represents Collection details; // sub-explanations public: /// Indicates whether or not this Explanation models a good match. /// /// By default, an Explanation represents a "match" if the value is positive. /// /// @see #getValue virtual bool isMatch(); /// The value assigned to this explanation node. virtual double getValue(); /// Sets the value assigned to this explanation node. virtual void setValue(double value); /// A description of this explanation node. virtual String getDescription(); /// Sets the description of this explanation node. virtual void setDescription(const String& description); /// The sub-nodes of this explanation node. virtual Collection getDetails(); /// Adds a sub-node to this explanation node. virtual void addDetail(ExplanationPtr detail); /// Render an explanation as text. virtual String toString(); /// Render an explanation as HTML. virtual String toHtml(); protected: /// A short one line summary which should contain all high level information about this Explanation, /// without the "Details" virtual String getSummary(); virtual String toString(int32_t depth); }; /// Small Util class used to pass both an idf factor as well as an explanation for that factor. /// /// This class will likely be held on a {@link Weight}, so be aware before storing any large fields. class LPPAPI IDFExplanation : public LuceneObject { public: virtual ~IDFExplanation(); LUCENE_CLASS(IDFExplanation); public: /// @return the idf factor virtual double getIdf() = 0; /// This should be calculated lazily if possible. /// @return the explanation for the idf factor. virtual String explain() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FSDirectory.h000066400000000000000000000137611217574114600213140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FSDIRECTORY_H #define FSDIRECTORY_H #include "Directory.h" namespace Lucene { /// Base class for Directory implementations that store index files in the file system. There are currently three /// core subclasses: /// /// {@link SimpleFSDirectory} is a straightforward implementation using std::ofstream and std::ifstream. /// /// {@link MMapDirectory} uses memory-mapped IO when reading. This is a good choice if you have plenty of virtual /// memory relative to your index size, eg if you are running on a 64 bit operating system, oryour index sizes are /// small enough to fit into the virtual memory space. /// /// For users who have no reason to prefer a specific implementation, it's best to simply use {@link #open}. For /// all others, you should instantiate the desired implementation directly. /// /// The locking implementation is by default {@link NativeFSLockFactory}, but can be changed by passing in a custom /// {@link LockFactory} instance. /// @see Directory class LPPAPI FSDirectory : public Directory { protected: /// Create a new FSDirectory for the named location (ctor for subclasses). /// @param path the path of the directory. /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) FSDirectory(const String& path, LockFactoryPtr lockFactory); public: virtual ~FSDirectory(); LUCENE_CLASS(FSDirectory); public: /// Default read chunk size. This is a conditional default based on operating system. /// @see #setReadChunkSize static const int32_t DEFAULT_READ_CHUNK_SIZE; protected: bool checked; /// The underlying filesystem directory. String directory; /// @see #DEFAULT_READ_CHUNK_SIZE int32_t chunkSize; public: /// Creates an FSDirectory instance. static FSDirectoryPtr open(const String& path); /// Just like {@link #open(File)}, but allows you to also specify a custom {@link LockFactory}. static FSDirectoryPtr open(const String& path, LockFactoryPtr lockFactory); /// Lists all files (not subdirectories) in the directory. /// @throws NoSuchDirectoryException if the directory does not exist, or does exist but is not a directory. static HashSet listAll(const String& dir); /// Returns the time the named file was last modified. static uint64_t fileModified(const String& directory, const String& name); /// Create file system directory. void createDir(); /// Return file system directory. String getFile(); /// Sets the maximum number of bytes read at once from the underlying file during {@link IndexInput#readBytes}. /// The default value is {@link #DEFAULT_READ_CHUNK_SIZE}. Changes to this value will not impact any already-opened /// {@link IndexInput}s. You should call this before attempting to open an index on the directory. This value should /// be as large as possible to reduce any possible performance impact. void setReadChunkSize(int32_t chunkSize); /// The maximum number of bytes to read at once from the underlying file during {@link IndexInput#readBytes}. /// @see #setReadChunkSize int32_t getReadChunkSize(); /// Lists all files (not subdirectories) in the directory. /// @see #listAll(const String&) virtual HashSet listAll(); /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name); /// Returns the time the named file was last modified. virtual uint64_t fileModified(const String& name); /// Set the modified time of an existing file to now. virtual void touchFile(const String& name); /// Removes an existing file in the directory. virtual void deleteFile(const String& name); /// Returns the length in bytes of a file in the directory. virtual int64_t fileLength(const String& name); /// Ensure that any writes to this file are moved to stable storage. Lucene uses this to properly commit changes to /// the index, to prevent a machine/OS crash from corrupting the index. virtual void sync(const String& name); /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory /// implementation may ignore the buffer size. virtual IndexInputPtr openInput(const String& name); /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory /// implementation may ignore the buffer size. Currently the only Directory implementations that respect this parameter /// are {@link FSDirectory} and {@link CompoundFileReader}. virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); /// Return a string identifier that uniquely differentiates this Directory instance from other Directory instances. virtual String getLockID(); /// Closes the store to future operations. virtual void close(); /// For debug output. virtual String toString(); protected: /// Initializes the directory to create a new file with the given name. This method should be used in {@link #createOutput}. void initOutput(const String& name); }; } #endif LucenePlusPlus-rel_3.0.4/include/FSLockFactory.h000066400000000000000000000023351217574114600215630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FSLOCKFACTORY_H #define FSLOCKFACTORY_H #include "LockFactory.h" namespace Lucene { /// Base class for file system based locking implementation. class LPPAPI FSLockFactory : public LockFactory { protected: FSLockFactory(); public: virtual ~FSLockFactory(); LUCENE_CLASS(FSLockFactory); protected: /// Directory for the lock files. String lockDir; public: /// Set the lock directory. This method can be only called once to /// initialize the lock directory. It is used by {@link FSDirectory} /// to set the lock directory to itself. Subclasses can also use /// this method to set the directory in the constructor. void setLockDir(const String& lockDir); /// Retrieve the lock directory. String getLockDir(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FastCharStream.h000066400000000000000000000034701217574114600217620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FASTCHARSTREAM_H #define FASTCHARSTREAM_H #include "QueryParserCharStream.h" namespace Lucene { /// An efficient implementation of QueryParserCharStream interface. /// /// Note that this does not do line-number counting, but instead keeps track of the character position of /// the token in the input, as required by Lucene's {@link Token} API. class LPPAPI FastCharStream : public QueryParserCharStream, public LuceneObject { public: /// Constructs from a Reader. FastCharStream(ReaderPtr reader); virtual ~FastCharStream(); LUCENE_CLASS(FastCharStream); public: CharArray buffer; int32_t bufferLength; // end of valid chars int32_t bufferPosition; // next char to read int32_t tokenStart; // offset in buffer int32_t bufferStart; // position in file of buffer ReaderPtr input; // source of chars public: virtual wchar_t readChar(); virtual wchar_t BeginToken(); virtual void backup(int32_t amount); virtual String GetImage(); virtual CharArray GetSuffix(int32_t length); virtual void Done(); virtual int32_t getColumn(); virtual int32_t getLine(); virtual int32_t getEndColumn(); virtual int32_t getEndLine(); virtual int32_t getBeginColumn(); virtual int32_t getBeginLine(); protected: void refill(); }; } #endif LucenePlusPlus-rel_3.0.4/include/Field.h000066400000000000000000000176611217574114600201450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELD_H #define FIELD_H #include "AbstractField.h" namespace Lucene { class LPPAPI Field : public AbstractField { public: /// Create a field by specifying its name, value and how it will be saved in the index. Term vectors /// will not be stored in the index. /// /// @param name The name of the field /// @param value The string to process /// @param store Whether value should be stored in the index /// @param index Whether the field should be indexed, and if so, if it should be tokenized before indexing Field(const String& name, const String& value, Store store, Index index); /// Create a field by specifying its name, value and how it will be saved in the index. /// /// @param name The name of the field /// @param value The string to process /// @param store Whether value should be stored in the index /// @param index Whether the field should be indexed, and if so, if it should be tokenized before indexing /// @param termVector Whether term vector should be stored Field(const String& name, const String& value, Store store, Index index, TermVector termVector); /// Create a tokenized and indexed field that is not stored. Term vectors will not be stored. The Reader is /// read only when the Document is added to the index, ie. you may not close the Reader until {@link /// IndexWriter#addDocument(Document)} has been called. /// /// @param name The name of the field /// @param reader The reader with the content Field(const String& name, ReaderPtr reader); /// Create a tokenized and indexed field that is not stored, optionally with storing term vectors. The /// Reader is read only when the Document is added to the index, ie. you may not close the Reader until /// {@link IndexWriter#addDocument(Document)} has been called. /// /// @param name The name of the field /// @param reader The reader with the content /// @param termVector Whether term vector should be stored Field(const String& name, ReaderPtr reader, TermVector termVector); /// Create a tokenized and indexed field that is not stored. Term vectors will not be stored. This is useful /// for pre-analyzed fields. The TokenStream is read only when the Document is added to the index, ie. you /// may not close the TokenStream until {@link IndexWriter#addDocument(Document)} has been called. /// /// @param name The name of the field /// @param tokenStream The TokenStream with the content Field(const String& name, TokenStreamPtr tokenStream); /// Create a tokenized and indexed field that is not stored, optionally with storing term vectors. This is /// useful for pre-analyzed fields. The TokenStream is read only when the Document is added to the index, /// ie. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)} has been called. /// /// @param name The name of the field /// @param tokenStream The TokenStream with the content /// @param termVector Whether term vector should be stored Field(const String& name, TokenStreamPtr tokenStream, TermVector termVector); /// Create a stored field with binary value. Optionally the value may be compressed. /// /// @param name The name of the field /// @param value The binary value /// @param store How value should be stored (compressed or not) Field(const String& name, ByteArray value, Store store); /// Create a stored field with binary value. Optionally the value may be compressed. /// /// @param name The name of the field /// @param value The binary value /// @param offset Starting offset in value where this Field's bytes are /// @param length Number of bytes to use for this Field, starting at offset /// @param store How value should be stored (compressed or not) Field(const String& name, ByteArray value, int32_t offset, int32_t length, Store store); virtual ~Field(); LUCENE_CLASS(Field); public: using AbstractField::isStored; using AbstractField::isIndexed; /// Specifies whether and how a field should be stored. static bool isStored(Store store); /// Specifies whether and how a field should be indexed. static bool isIndexed(Index index); static bool isAnalyzed(Index index); static bool omitNorms(Index index); /// Get the best representation of the index given the flags. static Field::Index toIndex(bool indexed, bool analyzed); /// Get the best representation of the index given the flags. static Field::Index toIndex(bool indexed, bool analyzed, bool omitNorms); /// Specifies whether and how a field should have term vectors. static bool isStored(TermVector termVector); static bool withPositions(TermVector termVector); static bool withOffsets(TermVector termVector); /// Get the best representation of the index given the flags. static Field::TermVector toTermVector(bool stored, bool withOffsets, bool withPositions); /// The value of the field as a String, or null. If null, the Reader value or binary value is used. /// Exactly one of stringValue(), readerValue(), and getBinaryValue() must be set. virtual String stringValue(); /// The value of the field as a Reader, or null. If null, the String value or binary value is used. /// Exactly one of stringValue(), readerValue(), and getBinaryValue() must be set. virtual ReaderPtr readerValue(); /// The value of the field as a TokesStream, or null. If null, the Reader value or String value is /// analyzed to produce the indexed tokens. virtual TokenStreamPtr tokenStreamValue(); /// Change the value of this field. This can be used during indexing to re-use a single Field instance /// to improve indexing speed. Typically a single {@link Document} instance is re-used as well. This /// helps most on small documents. /// /// Each Field instance should only be used once within a single {@link Document} instance. virtual void setValue(const String& value); /// Change the value of this field. virtual void setValue(ReaderPtr value); /// Change the value of this field. virtual void setValue(ByteArray value); /// Change the value of this field. virtual void setValue(ByteArray value, int32_t offset, int32_t length); /// Sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return /// true. May be combined with stored values from stringValue() or getBinaryValue() virtual void setTokenStream(TokenStreamPtr tokenStream); protected: void ConstructField(const String& name, const String& value, Store store, Index index, TermVector termVector); void ConstructField(const String& name, ReaderPtr reader, TermVector termVector); void ConstructField(const String& name, TokenStreamPtr tokenStream, TermVector termVector); void ConstructField(const String& name, ByteArray value, int32_t offset, int32_t length, Store store); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldCache.h000066400000000000000000000306511217574114600210630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHE_H #define FIELDCACHE_H #include #include "LuceneObject.h" namespace Lucene { /// Maintains caches of term values. /// @see FieldCacheSanityChecker class LPPAPI FieldCache { public: virtual ~FieldCache(); LUCENE_INTERFACE(FieldCache); public: /// Specifies whether and how a field should be stored. enum CacheType { CACHE_BYTE = 1, CACHE_INT, CACHE_LONG, CACHE_DOUBLE, CACHE_STRING, CACHE_STRING_INDEX }; /// Indicator for StringIndex values in the cache. /// NOTE: the value assigned to this constant must not be the same as any of those in SortField static const int32_t STRING_INDEX; public: /// The cache used internally by sorting and range query classes. static FieldCachePtr DEFAULT(); /// The default parser for byte values, which are encoded by StringUtils::toInt static ByteParserPtr DEFAULT_BYTE_PARSER(); /// The default parser for int values, which are encoded by StringUtils::toInt static IntParserPtr DEFAULT_INT_PARSER(); /// The default parser for int values, which are encoded by StringUtils::toLong static LongParserPtr DEFAULT_LONG_PARSER(); /// The default parser for double values, which are encoded by StringUtils::toDouble static DoubleParserPtr DEFAULT_DOUBLE_PARSER(); /// A parser instance for int values encoded by {@link NumericUtils#prefixCodedToInt(String)}, /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. static IntParserPtr NUMERIC_UTILS_INT_PARSER(); /// A parser instance for long values encoded by {@link NumericUtils#prefixCodedToLong(String)}, /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. static LongParserPtr NUMERIC_UTILS_LONG_PARSER(); /// A parser instance for double values encoded by {@link NumericUtils}, /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. static DoubleParserPtr NUMERIC_UTILS_DOUBLE_PARSER(); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as a single byte and returns an array of size reader.maxDoc() of the value each document /// has in the given field. /// @param reader Used to get field values. /// @param field Which field contains the single byte values. /// @return The values in the given field for each document. virtual Collection getBytes(IndexReaderPtr reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as bytes and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the bytes. /// @param parser Computes byte for string values. /// @return The values in the given field for each document. virtual Collection getBytes(IndexReaderPtr reader, const String& field, ByteParserPtr parser); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as integers and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the integers. /// @return The values in the given field for each document. virtual Collection getInts(IndexReaderPtr reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as integers and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the integers. /// @param parser Computes integer for string values. /// @return The values in the given field for each document. virtual Collection getInts(IndexReaderPtr reader, const String& field, IntParserPtr parser); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as longs and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the longs. /// @return The values in the given field for each document. virtual Collection getLongs(IndexReaderPtr reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as longs and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the longs. /// @param parser Computes long for string values. /// @return The values in the given field for each document. virtual Collection getLongs(IndexReaderPtr reader, const String& field, LongParserPtr parser); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as integers and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the doubles. /// @return The values in the given field for each document. virtual Collection getDoubles(IndexReaderPtr reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as doubles and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the doubles. /// @param parser Computes double for string values. /// @return The values in the given field for each document. virtual Collection getDoubles(IndexReaderPtr reader, const String& field, DoubleParserPtr parser); /// Checks the internal cache for an appropriate entry, and if none are found, reads the term values in /// field and returns an array of size reader.maxDoc() containing the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the strings. /// @return The values in the given field for each document. virtual Collection getStrings(IndexReaderPtr reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found reads the term values in /// field and returns an array of them in natural order, along with an array telling which element in /// the term array each document uses. /// @param reader Used to get field values. /// @param field Which field contains the strings. /// @return Array of terms and index into the array for each document. virtual StringIndexPtr getStringIndex(IndexReaderPtr reader, const String& field); /// Generates an array of CacheEntry objects representing all items currently in the FieldCache. virtual Collection getCacheEntries() = 0; /// Instructs the FieldCache to forcibly expunge all entries from the underlying caches. This is intended /// only to be used for test methods as a way to ensure a known base state of the Cache. It should not be /// relied on for "Cache maintenance" in general application code. virtual void purgeAllCaches() = 0; /// Drops all cache entries associated with this reader. NOTE: this reader must precisely match the reader /// that the cache entry is keyed on. If you pass a top-level reader, it usually will have no effect as /// Lucene now caches at the segment reader level. virtual void purge(IndexReaderPtr r) = 0; /// If non-null, FieldCacheImpl will warn whenever entries are created that are not sane according to /// {@link FieldCacheSanityChecker}. virtual void setInfoStream(InfoStreamPtr stream); /// @see #setInfoStream virtual InfoStreamPtr getInfoStream(); }; class LPPAPI CreationPlaceholder : public LuceneObject { public: virtual ~CreationPlaceholder(); LUCENE_CLASS(CreationPlaceholder); public: boost::any value; }; /// Stores term text values and document ordering data. class LPPAPI StringIndex : public LuceneObject { public: StringIndex(Collection values, Collection lookup); virtual ~StringIndex(); LUCENE_CLASS(StringIndex); public: /// All the term values, in natural order. Collection lookup; /// For each document, an index into the lookup array. Collection order; public: int32_t binarySearchLookup(const String& key); }; /// Marker interface as super-interface to all parsers. It is used to specify a custom parser to {@link /// SortField#SortField(String, Parser)}. class LPPAPI Parser : public LuceneObject { public: virtual ~Parser(); LUCENE_CLASS(Parser); }; /// Interface to parse bytes from document fields. /// @see FieldCache#getBytes(IndexReaderPtr, String, ByteParserPtr) class LPPAPI ByteParser : public Parser { public: virtual ~ByteParser(); LUCENE_CLASS(ByteParser); public: /// Return a single Byte representation of this field's value. virtual uint8_t parseByte(const String& string); }; /// Interface to parse ints from document fields. /// @see FieldCache#getInts(IndexReaderPtr, String, IntParserPtr) class LPPAPI IntParser : public Parser { public: virtual ~IntParser(); LUCENE_CLASS(IntParser); public: /// Return a integer representation of this field's value. virtual int32_t parseInt(const String& string); }; /// Interface to parse longs from document fields. /// @see FieldCache#getLongs(IndexReaderPtr, String, LongParserPtr) class LPPAPI LongParser : public Parser { public: virtual ~LongParser(); LUCENE_CLASS(LongParser); public: /// Return a long representation of this field's value. virtual int64_t parseLong(const String& string); }; /// Interface to parse doubles from document fields. /// @see FieldCache#getDoubles(IndexReaderPtr, String, DoubleParserPtr) class LPPAPI DoubleParser : public Parser { public: virtual ~DoubleParser(); LUCENE_CLASS(DoubleParser); public: /// Return a double representation of this field's value. virtual double parseDouble(const String& string); }; /// A unique Identifier/Description for each item in the FieldCache. Can be useful for logging/debugging. class LPPAPI FieldCacheEntry : public LuceneObject { public: virtual ~FieldCacheEntry(); LUCENE_CLASS(FieldCacheEntry); public: virtual LuceneObjectPtr getReaderKey() = 0; virtual String getFieldName() = 0; virtual int32_t getCacheType() = 0; virtual boost::any getCustom() = 0; virtual boost::any getValue() = 0; virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldCacheImpl.h000066400000000000000000000136031217574114600217030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHEIMPL_H #define FIELDCACHEIMPL_H #include "FieldCache.h" namespace Lucene { /// The default cache implementation, storing all values in memory. A WeakHashMap is used for storage. class FieldCacheImpl : public FieldCache, public LuceneObject { public: FieldCacheImpl(); virtual ~FieldCacheImpl(); LUCENE_CLASS(FieldCacheImpl); protected: MapStringCache caches; InfoStreamPtr infoStream; public: virtual void initialize(); virtual void purgeAllCaches(); virtual void purge(IndexReaderPtr r); virtual Collection getCacheEntries(); virtual Collection getBytes(IndexReaderPtr reader, const String& field); virtual Collection getBytes(IndexReaderPtr reader, const String& field, ByteParserPtr parser); virtual Collection getInts(IndexReaderPtr reader, const String& field); virtual Collection getInts(IndexReaderPtr reader, const String& field, IntParserPtr parser); virtual Collection getLongs(IndexReaderPtr reader, const String& field); virtual Collection getLongs(IndexReaderPtr reader, const String& field, LongParserPtr parser); virtual Collection getDoubles(IndexReaderPtr reader, const String& field); virtual Collection getDoubles(IndexReaderPtr reader, const String& field, DoubleParserPtr parser); virtual Collection getStrings(IndexReaderPtr reader, const String& field); virtual StringIndexPtr getStringIndex(IndexReaderPtr reader, const String& field); virtual void setInfoStream(InfoStreamPtr stream); virtual InfoStreamPtr getInfoStream(); }; class Entry : public LuceneObject { public: /// Creates one of these objects for a custom comparator/parser. Entry(const String& field, boost::any custom); virtual ~Entry(); LUCENE_CLASS(Entry); public: String field; // which Fieldable boost::any custom; // which custom comparator or parser public: /// Two of these are equal if they reference the same field and type. virtual bool equals(LuceneObjectPtr other); /// Composes a hashcode based on the field and type. virtual int32_t hashCode(); }; /// Internal cache. class Cache : public LuceneObject { public: Cache(FieldCachePtr wrapper = FieldCachePtr()); virtual ~Cache(); LUCENE_CLASS(Cache); public: FieldCacheWeakPtr _wrapper; WeakMapLuceneObjectMapEntryAny readerCache; protected: virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key) = 0; public: /// Remove this reader from the cache, if present. virtual void purge(IndexReaderPtr r); virtual boost::any get(IndexReaderPtr reader, EntryPtr key); virtual void printNewInsanity(InfoStreamPtr infoStream, boost::any value); }; class ByteCache : public Cache { public: ByteCache(FieldCachePtr wrapper = FieldCachePtr()); virtual ~ByteCache(); LUCENE_CLASS(ByteCache); protected: virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); }; class IntCache : public Cache { public: IntCache(FieldCachePtr wrapper = FieldCachePtr()); virtual ~IntCache(); LUCENE_CLASS(IntCache); protected: virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); }; class LongCache : public Cache { public: LongCache(FieldCachePtr wrapper = FieldCachePtr()); virtual ~LongCache(); LUCENE_CLASS(LongCache); protected: virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); }; class DoubleCache : public Cache { public: DoubleCache(FieldCachePtr wrapper = FieldCachePtr()); virtual ~DoubleCache(); LUCENE_CLASS(DoubleCache); protected: virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); }; class StringCache : public Cache { public: StringCache(FieldCachePtr wrapper = FieldCachePtr()); virtual ~StringCache(); LUCENE_CLASS(StringCache); protected: virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); }; class StringIndexCache : public Cache { public: StringIndexCache(FieldCachePtr wrapper = FieldCachePtr()); virtual ~StringIndexCache(); LUCENE_CLASS(StringIndexCache); protected: virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); }; class FieldCacheEntryImpl : public FieldCacheEntry { public: FieldCacheEntryImpl(LuceneObjectPtr readerKey, const String& fieldName, int32_t cacheType, boost::any custom, boost::any value); virtual ~FieldCacheEntryImpl(); LUCENE_CLASS(FieldCacheEntryImpl); protected: LuceneObjectPtr readerKey; String fieldName; int32_t cacheType; boost::any custom; boost::any value; public: virtual LuceneObjectPtr getReaderKey(); virtual String getFieldName(); virtual int32_t getCacheType(); virtual boost::any getCustom(); virtual boost::any getValue(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldCacheRangeFilter.h000066400000000000000000000160531217574114600232060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHERANGEFILTER_H #define FIELDCACHERANGEFILTER_H #include "Filter.h" #include "FieldCache.h" namespace Lucene { /// A range filter built on top of a cached single term field (in {@link FieldCache}). /// /// FieldCacheRangeFilter builds a single cache for the field the first time it is used. Each subsequent /// FieldCacheRangeFilter on the same field then reuses this cache, even if the range itself changes. /// /// This means that FieldCacheRangeFilter is much faster (sometimes more than 100x as fast) as building a /// {@link TermRangeFilter}, if using a {@link #newStringRange}. However, if the range never changes it is /// slower (around 2x as slow) than building a CachingWrapperFilter on top of a single {@link TermRangeFilter}. /// /// For numeric data types, this filter may be significantly faster than {@link NumericRangeFilter}. /// Furthermore, it does not need the numeric values encoded by {@link NumericField}. But it has the problem /// that it only works with exact one value/document (see below). /// /// As with all {@link FieldCache} based functionality, FieldCacheRangeFilter is only valid for fields which /// exact one term for each document (except for {@link #newStringRange} where 0 terms are also allowed). Due /// to a restriction of {@link FieldCache}, for numeric ranges all terms that do not have a numeric value, 0 /// is assumed. /// /// Thus it works on dates, prices and other single value fields but will not work on regular text fields. It /// is preferable to use a NOT_ANALYZED field to ensure that there is only a single term. /// /// Do not instantiate this template directly, use one of the static factory methods available, that create a /// correct instance for different data types supported by {@link FieldCache}. class LPPAPI FieldCacheRangeFilter : public Filter { public: FieldCacheRangeFilter(const String& field, ParserPtr parser, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilter(); LUCENE_CLASS(FieldCacheRangeFilter); INTERNAL: String field; ParserPtr parser; bool includeLower; bool includeUpper; public: /// Creates a string range filter using {@link FieldCache#getStringIndex}. This works with all fields containing /// zero or one term in the field. The range can be half-open by setting one of the values to null. static FieldCacheRangeFilterPtr newStringRange(const String& field, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getBytes(IndexReaderPtr, String)}. This works with all /// byte fields containing exactly one numeric term in the field. The range can be half-open by setting one of the /// values to null. static FieldCacheRangeFilterPtr newByteRange(const String& field, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getBytes(IndexReaderPtr, String, ByteParserPtr)}. This /// works with all byte fields containing exactly one numeric term in the field. The range can be half-open by /// setting one of the values to null. static FieldCacheRangeFilterPtr newByteRange(const String& field, ByteParserPtr parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getInts(IndexReaderPtr, String)}. This works with all /// int fields containing exactly one numeric term in the field. The range can be half-open by setting one of the /// values to null. static FieldCacheRangeFilterPtr newIntRange(const String& field, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getInts(IndexReaderPtr, String, IntParserPtr)}. This /// works with all int fields containing exactly one numeric term in the field. The range can be half-open by /// setting one of the values to null. static FieldCacheRangeFilterPtr newIntRange(const String& field, IntParserPtr parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getLongs(IndexReaderPtr, String)}. This works with all /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one of the /// values to null. static FieldCacheRangeFilterPtr newLongRange(const String& field, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getLongs(IndexReaderPtr, String, LongParserPtr)}. This /// works with all long fields containing exactly one numeric term in the field. The range can be half-open by /// setting one of the values to null. static FieldCacheRangeFilterPtr newLongRange(const String& field, LongParserPtr parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getDoubles(IndexReaderPtr, String)}. This works with all /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one of the /// values to null. static FieldCacheRangeFilterPtr newDoubleRange(const String& field, double lowerVal, double upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getDoubles(IndexReaderPtr, String, DoubleParserPtr)}. This /// works with all long fields containing exactly one numeric term in the field. The range can be half-open by /// setting one of the values to null. static FieldCacheRangeFilterPtr newDoubleRange(const String& field, DoubleParserPtr parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper); virtual String toString() = 0; virtual bool equals(LuceneObjectPtr other) = 0; virtual int32_t hashCode() = 0; /// Returns the field name for this filter virtual String getField(); /// Returns true if the lower endpoint is inclusive virtual bool includesLower(); /// Returns true if the upper endpoint is inclusive virtual bool includesUpper(); /// Returns the current numeric parser virtual ParserPtr getParser(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldCacheSanityChecker.h000066400000000000000000000150541217574114600235400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHESANITYCHECKER_H #define FIELDCACHESANITYCHECKER_H #include "LuceneObject.h" #include "MapOfSets.h" namespace Lucene { /// Provides methods for sanity checking that entries in the FieldCache are not wasteful or inconsistent. /// /// Lucene 2.9 Introduced numerous enhancements into how the FieldCache is used by the low levels of Lucene /// searching (for Sorting and ValueSourceQueries) to improve both the speed for Sorting, as well as reopening /// of IndexReaders. But these changes have shifted the usage of FieldCache from "top level" IndexReaders /// (frequently a MultiReader or DirectoryReader) down to the leaf level SegmentReaders. As a result, /// existing applications that directly access the FieldCache may find RAM usage increase significantly when /// upgrading to 2.9 or later. This class provides an API for these applications (or their Unit tests) to /// check at run time if the FieldCache contains "insane" usages of the FieldCache. /// /// @see FieldCache /// @see FieldCacheSanityChecker.Insanity /// @see FieldCacheSanityChecker.InsanityType class LPPAPI FieldCacheSanityChecker : public LuceneObject { public: FieldCacheSanityChecker(); virtual ~FieldCacheSanityChecker(); LUCENE_CLASS(FieldCacheSanityChecker); public: typedef MapOfSets< int32_t, boost::hash, std::equal_to, FieldCacheEntryPtr, luceneHash, luceneEquals > MapSetIntFieldCacheEntry; typedef MapOfSets< ReaderFieldPtr, luceneHash, luceneEquals, int32_t, boost::hash, std::equal_to > MapSetReaderFieldInt; typedef MapOfSets< ReaderFieldPtr, luceneHash, luceneEquals, ReaderFieldPtr, luceneHash, luceneEquals > MapSetReaderFieldReaderField; /// An Enumeration of the different types of "insane" behaviour that may be detected in a FieldCache. enum InsanityType { /// Indicates an overlap in cache usage on a given field in sub/super readers. SUBREADER, /// Indicates entries have the same reader+fieldname but different cached values. This can happen /// if different datatypes, or parsers are used -- and while it's not necessarily a bug it's /// typically an indication of a possible problem. /// /// NOTE: Only the reader, fieldname, and cached value are actually tested -- if two cache entries /// have different parsers or datatypes but the cached values are the same Object (== not just equal()) /// this method does not consider that a red flag. This allows for subtle variations in the way a /// Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) VALUEMISMATCH, /// Indicates an expected bit of "insanity". This may be useful for clients that wish to preserve/log /// information about insane usage but indicate that it was expected. EXPECTED }; /// Quick and dirty convenience method /// @see #check static Collection checkSanity(FieldCachePtr cache); /// Quick and dirty convenience method that instantiates an instance with "good defaults" and uses it to /// test the CacheEntrys. /// @see #check static Collection checkSanity(Collection cacheEntries); /// Tests a CacheEntry[] for indication of "insane" cache usage. /// NOTE: FieldCache CreationPlaceholder objects are ignored. Collection check(Collection cacheEntries); protected: /// Internal helper method used by check that iterates over valMismatchKeys and generates a Collection of /// Insanity instances accordingly. The MapOfSets are used to populate the Insanity objects. /// @see InsanityType#VALUEMISMATCH Collection checkValueMismatch(MapSetIntFieldCacheEntry valIdToItems, MapSetReaderFieldInt readerFieldToValIds, SetReaderField valMismatchKeys); /// Internal helper method used by check that iterates over the keys of readerFieldToValIds and generates a /// Collection of Insanity instances whenever two (or more) ReaderField instances are found that have an /// ancestry relationships. /// @see InsanityType#SUBREADER Collection checkSubreaders(MapSetIntFieldCacheEntry valIdToItems, MapSetReaderFieldInt readerFieldToValIds); /// Checks if the seed is an IndexReader, and if so will walk the hierarchy of subReaders building up a /// list of the objects returned by obj.getFieldCacheKey() Collection getAllDecendentReaderKeys(LuceneObjectPtr seed); }; /// Simple container for a collection of related CacheEntry objects that in conjunction with each other /// represent some "insane" usage of the FieldCache. class LPPAPI Insanity : public LuceneObject { public: Insanity(FieldCacheSanityChecker::InsanityType type, const String& msg, Collection entries); virtual ~Insanity(); LUCENE_CLASS(Insanity); protected: FieldCacheSanityChecker::InsanityType type; String msg; Collection entries; public: /// Type of insane behavior this object represents FieldCacheSanityChecker::InsanityType getType(); /// Description of the insane behaviour String getMsg(); /// CacheEntry objects which suggest a problem Collection getCacheEntries(); /// Multi-Line representation of this Insanity object, starting with the Type and Msg, followed by each /// CacheEntry.toString() on it's own line prefaced by a tab character virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldCacheSource.h000066400000000000000000000053211217574114600222400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHESOURCE_H #define FIELDCACHESOURCE_H #include "ValueSource.h" namespace Lucene { /// A base class for ValueSource implementations that retrieve values for a single field from the /// {@link FieldCache}. /// /// Fields used herein must be indexed (doesn't matter if these fields are stored or not). /// /// It is assumed that each such indexed field is untokenized, or at least has a single token in a document. /// For documents with multiple tokens of the same field, behavior is undefined (It is likely that current /// code would use the value of one of these tokens, but this is not guaranteed). /// /// Document with no tokens in this field are assigned the Zero value. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. class LPPAPI FieldCacheSource : public ValueSource { public: /// Create a cached field source for the input field. FieldCacheSource(const String& field); virtual ~FieldCacheSource(); LUCENE_CLASS(FieldCacheSource); protected: String field; public: virtual DocValuesPtr getValues(IndexReaderPtr reader); virtual String description(); /// Return cached DocValues for input field and reader. /// @param cache FieldCache so that values of a field are loaded once per reader (RAM allowing) /// @param field Field for which values are required. /// @see ValueSource virtual DocValuesPtr getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader) = 0; virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); /// Check if equals to another {@link FieldCacheSource}, already knowing that cache and field are equal. virtual bool cachedFieldSourceEquals(FieldCacheSourcePtr other) = 0; /// Return a hash code of a {@link FieldCacheSource}, without the hash-codes of the field and the cache /// (those are taken care of elsewhere). virtual int32_t cachedFieldSourceHashCode() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldCacheTermsFilter.h000066400000000000000000000071001217574114600232350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHETERMSFILTER_H #define FIELDCACHETERMSFILTER_H #include "Filter.h" namespace Lucene { /// A {@link Filter} that only accepts documents whose single term value in the specified field is contained /// in the provided set of allowed terms. /// /// This is the same functionality as TermsFilter (from contrib/queries), except this filter requires that the /// field contains only a single term for all documents. Because of drastically different implementations, /// they also have different performance characteristics, as described below. /// /// The first invocation of this filter on a given field will be slower, since a {@link StringIndex} must be /// created. Subsequent invocations using the same field will re-use this cache. However, as with all /// functionality based on {@link FieldCache}, persistent RAM is consumed to hold the cache, and is not freed /// until the {@link IndexReader} is closed. In contrast, TermsFilter has no persistent RAM consumption. /// /// With each search, this filter translates the specified set of Terms into a private {@link OpenBitSet} keyed /// by term number per unique {@link IndexReader} (normally one reader per segment). Then, during matching, /// the term number for each docID is retrieved from the cache and then checked for inclusion using the {@link /// OpenBitSet}. Since all testing is done using RAM resident data structures, performance should be very fast, /// most likely fast enough to not require further caching of the DocIdSet for each possible combination of /// terms. However, because docIDs are simply scanned linearly, an index with a great many small documents may /// find this linear scan too costly. /// /// In contrast, TermsFilter builds up an {@link OpenBitSet}, keyed by docID, every time it's created, by /// enumerating through all matching docs using {@link TermDocs} to seek and scan through each term's docID list. /// While there is no linear scan of all docIDs, besides the allocation of the underlying array in the {@link /// OpenBitSet}, this approach requires a number of "disk seeks" in proportion to the number of terms, which can /// be exceptionally costly when there are cache misses in the OS's IO cache. /// /// Generally, this filter will be slower on the first invocation for a given field, but subsequent invocations, /// even if you change the allowed set of Terms, should be faster than TermsFilter, especially as the number of /// Terms being matched increases. If you are matching only a very small number of terms, and those terms in /// turn match a very small number of documents, TermsFilter may perform faster. /// /// Which filter is best is very application dependent. class LPPAPI FieldCacheTermsFilter : public Filter { public: FieldCacheTermsFilter(const String& field, Collection terms); virtual ~FieldCacheTermsFilter(); LUCENE_CLASS(FieldCacheTermsFilter); protected: String field; Collection terms; public: FieldCachePtr getFieldCache(); virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldComparator.h000066400000000000000000000331641217574114600221710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCOMPARATOR_H #define FIELDCOMPARATOR_H #include "LuceneObject.h" namespace Lucene { /// A FieldComparator compares hits so as to determine their sort order when collecting the top results with /// {@link TopFieldCollector}. The concrete public FieldComparator classes here correspond to the SortField types. /// /// This API is designed to achieve high performance sorting, by exposing a tight interaction with {@link /// FieldValueHitQueue} as it visits hits. Whenever a hit is competitive, it's enrolled into a virtual slot, /// which is an int ranging from 0 to numHits-1. The {@link FieldComparator} is made aware of segment transitions /// during searching in case any internal state it's tracking needs to be recomputed during these transitions. /// /// A comparator must define these functions: ///
    ///
  • {@link #compare} Compare a hit at 'slot a' with hit 'slot b'. /// ///
  • {@link #setBottom} This method is called by {@link FieldValueHitQueue} to notify the FieldComparator of /// the current weakest ("bottom") slot. Note that this slot may not hold the weakest value according to your /// comparator, in cases where your comparator is not the primary one (ie, is only used to break ties from the /// comparators before it). /// ///
  • {@link #compareBottom} Compare a new hit (docID) against the "weakest" (bottom) entry in the queue. /// ///
  • {@link #copy} Installs a new hit into the priority queue. The {@link FieldValueHitQueue} calls this /// method when a new hit is competitive. /// ///
  • {@link #setNextReader} Invoked when the search is switching to the next segment. You may need to update /// internal state of the comparator, for example retrieving new values from the {@link FieldCache}. /// ///
  • {@link #value} Return the sort value stored in the specified slot. This is only called at the end of /// the search, in order to populate {@link FieldDoc#fields} when returning the top results. ///
class LPPAPI FieldComparator : public LuceneObject { public: virtual ~FieldComparator(); LUCENE_CLASS(FieldComparator); public: /// Compare hit at slot1 with hit at slot2. /// @param slot1 first slot to compare /// @param slot2 second slot to compare /// @return any N < 0 if slot2's value is sorted after slot1, any N > 0 if the slot2's value is sorted /// before slot1 and 0 if they are equal virtual int32_t compare(int32_t slot1, int32_t slot2) = 0; /// Set the bottom slot, ie the "weakest" (sorted last) entry in the queue. When {@link #compareBottom} /// is called, you should compare against this slot. This will always be called before {@link #compareBottom}. /// @param slot the currently weakest (sorted last) slot in the queue virtual void setBottom(int32_t slot) = 0; /// Compare the bottom of the queue with doc. This will only invoked after setBottom has been called. /// This should return the same result as {@link #compare(int,int)}} as if bottom were slot1 and the new /// document were slot 2. /// /// For a search that hits many results, this method will be the hotspot (invoked by far the most frequently). /// /// @param doc that was hit /// @return any N < 0 if the doc's value is sorted after the bottom entry (not competitive), any N > 0 if /// the doc's value is sorted before the bottom entry and 0 if they are equal. virtual int32_t compareBottom(int32_t doc) = 0; /// This method is called when a new hit is competitive. You should copy any state associated with this /// document that will be required for future comparisons, into the specified slot. /// @param slot which slot to copy the hit to /// @param doc docID relative to current reader virtual void copy(int32_t slot, int32_t doc) = 0; /// Set a new Reader. All doc correspond to the current Reader. /// /// @param reader current reader /// @param docBase docBase of this reader virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) = 0; /// Sets the Scorer to use in case a document's score is needed. /// @param scorer Scorer instance that you should use to obtain the current hit's score, if necessary. virtual void setScorer(ScorerPtr scorer); /// Return the actual value in the slot. /// @param slot the value /// @return value in this slot upgraded to ComparableValue virtual ComparableValue value(int32_t slot) = 0; }; template class NumericComparator : public FieldComparator { public: NumericComparator(int32_t numHits, const String& field = EmptyString) { this->values = Collection::newInstance(numHits); this->field = field; this->bottom = 0; } virtual ~NumericComparator() { } protected: Collection values; Collection currentReaderValues; String field; TYPE bottom; public: virtual int32_t compare(int32_t slot1, int32_t slot2) { return (int32_t)(values[slot1] - values[slot2]); } virtual int32_t compareBottom(int32_t doc) { return (int32_t)(bottom - currentReaderValues[doc]); } virtual void copy(int32_t slot, int32_t doc) { values[slot] = currentReaderValues[doc]; } virtual void setBottom(int32_t slot) { bottom = values[slot]; } virtual ComparableValue value(int32_t slot) { return ComparableValue(values[slot]); } }; /// Parses field's values as byte (using {@link FieldCache#getBytes} and sorts by ascending value. class LPPAPI ByteComparator : public NumericComparator { public: ByteComparator(int32_t numHits, const String& field, ParserPtr parser); virtual ~ByteComparator(); LUCENE_CLASS(ByteComparator); protected: ByteParserPtr parser; public: virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); }; /// Sorts by ascending docID class LPPAPI DocComparator : public NumericComparator { public: DocComparator(int32_t numHits); virtual ~DocComparator(); LUCENE_CLASS(DocComparator); protected: int32_t docBase; public: virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); }; /// Parses field's values as double (using {@link FieldCache#getDoubles} and sorts by ascending value class LPPAPI DoubleComparator : public NumericComparator { public: DoubleComparator(int32_t numHits, const String& field, ParserPtr parser); virtual ~DoubleComparator(); LUCENE_CLASS(DoubleComparator); protected: DoubleParserPtr parser; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); }; /// Parses field's values as int (using {@link FieldCache#getInts} and sorts by ascending value class LPPAPI IntComparator : public NumericComparator { public: IntComparator(int32_t numHits, const String& field, ParserPtr parser); virtual ~IntComparator(); LUCENE_CLASS(IntComparator); protected: IntParserPtr parser; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); }; /// Parses field's values as long (using {@link FieldCache#getLongs} and sorts by ascending value class LPPAPI LongComparator : public NumericComparator { public: LongComparator(int32_t numHits, const String& field, ParserPtr parser); virtual ~LongComparator(); LUCENE_CLASS(LongComparator); protected: LongParserPtr parser; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); }; /// Sorts by descending relevance. NOTE: if you are sorting only by descending relevance and then secondarily /// by ascending docID, performance is faster using {@link TopScoreDocCollector} directly (which {@link /// IndexSearcher#search} uses when no {@link Sort} is specified). class LPPAPI RelevanceComparator : public NumericComparator { public: RelevanceComparator(int32_t numHits); virtual ~RelevanceComparator(); LUCENE_CLASS(RelevanceComparator); protected: ScorerPtr scorer; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setScorer(ScorerPtr scorer); }; /// Sorts by a field's value using the Collator for a given Locale. class LPPAPI StringComparatorLocale : public FieldComparator { public: StringComparatorLocale(int32_t numHits, const String& field, const std::locale& locale); virtual ~StringComparatorLocale(); LUCENE_CLASS(StringComparatorLocale); protected: Collection values; Collection currentReaderValues; String field; CollatorPtr collator; String bottom; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setBottom(int32_t slot); virtual ComparableValue value(int32_t slot); }; /// Sorts by field's natural String sort order, using ordinals. This is functionally equivalent to {@link /// StringValComparator}, but it first resolves the string to their relative ordinal positions (using the /// index returned by {@link FieldCache#getStringIndex}), and does most comparisons using the ordinals. /// For medium to large results, this comparator will be much faster than {@link StringValComparator}. For /// very small result sets it may be slower. class LPPAPI StringOrdValComparator : public FieldComparator { public: StringOrdValComparator(int32_t numHits, const String& field, int32_t sortPos, bool reversed); virtual ~StringOrdValComparator(); LUCENE_CLASS(StringOrdValComparator); protected: Collection ords; Collection values; Collection readerGen; int32_t currentReaderGen; Collection lookup; Collection order; String field; int32_t bottomSlot; int32_t bottomOrd; String bottomValue; bool reversed; int32_t sortPos; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setBottom(int32_t slot); virtual ComparableValue value(int32_t slot); virtual Collection getValues(); virtual int32_t getBottomSlot(); virtual String getField(); protected: void convert(int32_t slot); int32_t binarySearch(Collection lookup, const String& key, int32_t low, int32_t high); }; /// Sorts by field's natural String sort order. All comparisons are done using String.compare, which is /// slow for medium to large result sets but possibly very fast for very small results sets. class LPPAPI StringValComparator : public FieldComparator { public: StringValComparator(int32_t numHits, const String& field); virtual ~StringValComparator(); LUCENE_CLASS(StringOrdValComparator); protected: Collection values; Collection currentReaderValues; String field; String bottom; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setBottom(int32_t slot); virtual ComparableValue value(int32_t slot); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldComparatorSource.h000066400000000000000000000020001217574114600233330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCOMPARATORSOURCE_H #define FIELDCOMPARATORSOURCE_H #include "LuceneObject.h" namespace Lucene { /// Provides a {@link FieldComparator} for custom field sorting. class LPPAPI FieldComparatorSource : public LuceneObject { public: virtual ~FieldComparatorSource(); LUCENE_CLASS(FieldComparatorSource); public: /// Creates a comparator for the field in the given index. /// @param fieldname Name of the field to create comparator for. /// @return FieldComparator. virtual FieldComparatorPtr newComparator(const String& fieldname, int32_t numHits, int32_t sortPos, bool reversed) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldDoc.h000066400000000000000000000034351217574114600205650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDDOC_H #define FIELDDOC_H #include "ScoreDoc.h" namespace Lucene { /// A ScoreDoc which also contains information about how to sort the referenced document. In addition to the /// document number and score, this object contains an array of values for the document from the field(s) used /// to sort. For example, if the sort criteria was to sort by fields "a", "b" then "c", the fields object array /// will have three elements, corresponding respectively to the term values for the document in fields "a", "b" /// and "c". The class of each element in the array will be either Integer, Double or String depending on the /// type of values in the terms of each field. class LPPAPI FieldDoc : public ScoreDoc { public: FieldDoc(int32_t doc, double score, Collection fields = Collection()); virtual ~FieldDoc(); LUCENE_CLASS(FieldDoc); public: /// The values which are used to sort the referenced document. The order of these will match the original /// sort criteria given by a Sort object. Each Object will be either an Integer, Double or String, depending /// on the type of values in the terms of the original field. /// @see Sort /// @see Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr) Collection fields; public: virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldDocSortedHitQueue.h000066400000000000000000000040351217574114600234150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDDOCSORTEDHITQUEUE_H #define FIELDDOCSORTEDHITQUEUE_H #include "PriorityQueue.h" namespace Lucene { /// Collects sorted results from Searchable's and collates them. /// The elements put into this queue must be of type FieldDoc. class FieldDocSortedHitQueue : public PriorityQueue { public: FieldDocSortedHitQueue(int32_t size); virtual ~FieldDocSortedHitQueue(); LUCENE_CLASS(FieldDocSortedHitQueue); public: Collection fields; // used in the case where the fields are sorted by locale based strings Collection collators; public: /// Allows redefinition of sort fields if they are null. This is to handle the case using /// ParallelMultiSearcher where the original list contains AUTO and we don't know the actual sort /// type until the values come back. The fields can only be set once. This method should be /// synchronized external like all other PQ methods. void setFields(Collection fields); /// Returns the fields being used to sort. Collection getFields(); protected: /// Returns an array of collators, possibly null. The collators correspond to any SortFields which /// were given a specific locale. /// @param fields Array of sort fields. /// @return Array, possibly null. Collection hasCollators(Collection fields); /// Returns whether first is less relevant than second. virtual bool lessThan(const FieldDocPtr& first, const FieldDocPtr& second); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldInfo.h000066400000000000000000000031411217574114600207450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDINFO_H #define FIELDINFO_H #include "LuceneObject.h" namespace Lucene { class FieldInfo : public LuceneObject { public: FieldInfo(const String& na, bool tk, int32_t nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); virtual ~FieldInfo(); LUCENE_CLASS(FieldInfo); public: String name; bool isIndexed; int32_t number; // true if term vector for this field should be stored bool storeTermVector; bool storeOffsetWithTermVector; bool storePositionWithTermVector; bool omitNorms; // omit norms associated with indexed fields bool omitTermFreqAndPositions; bool storePayloads; // whether this field stores payloads together with term positions public: virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); void update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldInfos.h000066400000000000000000000173221217574114600211360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDINFOS_H #define FIELDINFOS_H #include "LuceneObject.h" namespace Lucene { /// Access to the Fieldable Info file that describes document fields and whether or not they are indexed. /// Each segment has a separate Fieldable Info file. Objects of this class are thread-safe for multiple /// readers, but only one thread can be adding documents at a time, with no other reader or writer threads /// accessing this object. class FieldInfos : public LuceneObject { public: FieldInfos(); /// Construct a FieldInfos object using the directory and the name of the file IndexInput /// @param d The directory to open the IndexInput from /// @param name The name of the file to open the IndexInput from in the Directory FieldInfos(DirectoryPtr d, const String& name); virtual ~FieldInfos(); LUCENE_CLASS(FieldInfos); public: // Used internally (ie not written to *.fnm files) for pre-2.9 files static const int32_t FORMAT_PRE; // First used in 2.9; prior to 2.9 there was no format header static const int32_t FORMAT_START; static const int32_t CURRENT_FORMAT; static const uint8_t IS_INDEXED; static const uint8_t STORE_TERMVECTOR; static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR; static const uint8_t STORE_OFFSET_WITH_TERMVECTOR; static const uint8_t OMIT_NORMS; static const uint8_t STORE_PAYLOADS; static const uint8_t OMIT_TERM_FREQ_AND_POSITIONS; protected: Collection byNumber; MapStringFieldInfo byName; int32_t format; public: /// Returns a deep clone of this FieldInfos instance. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Adds field info for a Document. void add(DocumentPtr doc); /// Returns true if any fields do not omitTermFreqAndPositions bool hasProx(); /// Add fields that are indexed. Whether they have termvectors has to be specified. /// @param names The names of the fields /// @param storeTermVectors Whether the fields store term vectors or not /// @param storePositionWithTermVector true if positions should be stored. /// @param storeOffsetWithTermVector true if offsets should be stored void addIndexed(HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector); /// Assumes the fields are not storing term vectors. /// @param names The names of the fields /// @param isIndexed Whether the fields are indexed or not /// @see #add(const String&, bool) void add(HashSet names, bool isIndexed); /// Calls 5 parameter add with false for all TermVector parameters. /// @param name The name of the Fieldable /// @param isIndexed true if the field is indexed /// @see #add(const String&, bool, bool, bool, bool) void add(const String& name, bool isIndexed); /// Calls 5 parameter add with false for term vector positions and offsets. /// @param name The name of the field /// @param isIndexed true if the field is indexed /// @param storeTermVector true if the term vector should be stored void add(const String& name, bool isIndexed, bool storeTermVector); /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed flag /// is the same as was given previously for this field. If not - marks it as being indexed. Same goes /// for the TermVector parameters. /// @param name The name of the field /// @param isIndexed true if the field is indexed /// @param storeTermVector true if the term vector should be stored /// @param storePositionWithTermVector true if the term vector with positions should be stored /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored void add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector); /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed flag /// is the same as was given previously for this field. If not - marks it as being indexed. Same goes /// for the TermVector parameters. /// @param name The name of the field /// @param isIndexed true if the field is indexed /// @param storeTermVector true if the term vector should be stored /// @param storePositionWithTermVector true if the term vector with positions should be stored /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored /// @param omitNorms true if the norms for the indexed field should be omitted void add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms); /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed /// flag is the same as was given previously for this field. If not - marks it as being indexed. /// Same goes for the TermVector parameters. /// @param name The name of the field /// @param isIndexed true if the field is indexed /// @param storeTermVector true if the term vector should be stored /// @param storePositionWithTermVector true if the term vector with positions should be stored /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored /// @param omitNorms true if the norms for the indexed field should be omitted /// @param storePayloads true if payloads should be stored for this field /// @param omitTermFreqAndPositions true if term freqs should be omitted for this field FieldInfoPtr add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); int32_t fieldNumber(const String& fieldName); FieldInfoPtr fieldInfo(const String& fieldName); /// Return the fieldName identified by its number. /// @return the fieldName or an empty string when the field with the given number doesn't exist. String fieldName(int32_t fieldNumber); /// Return the fieldinfo object referenced by the fieldNumber. /// @return the FieldInfo object or null when the given fieldNumber doesn't exist. FieldInfoPtr fieldInfo(int32_t fieldNumber); int32_t size(); bool hasVectors(); void write(DirectoryPtr d, const String& name); void write(IndexOutputPtr output); protected: FieldInfoPtr addInternal(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); void read(IndexInputPtr input, const String& fileName); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldInvertState.h000066400000000000000000000041061217574114600223240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDINVERTSTATE_H #define FIELDINVERTSTATE_H #include "LuceneObject.h" namespace Lucene { /// This class tracks the number and position / offset parameters of terms being added to the index. /// The information collected in this class is also used to calculate the normalization factor for a field. class LPPAPI FieldInvertState : public LuceneObject { public: FieldInvertState(int32_t position = 0, int32_t length = 0, int32_t numOverlap = 0, int32_t offset = 0, double boost = 0); virtual ~FieldInvertState(); LUCENE_CLASS(FieldInvertState); INTERNAL: int32_t position; int32_t length; int32_t numOverlap; int32_t offset; double boost; AttributeSourcePtr attributeSource; public: /// Re-initialize the state, using this boost value. /// @param docBoost boost value to use. void reset(double docBoost); /// Get the last processed term position. /// @return the position int32_t getPosition(); /// Get total number of terms in this field. /// @return the length int32_t getLength(); /// Get the number of terms with positionIncrement == 0. /// @return the numOverlap int32_t getNumOverlap(); /// Get end offset of the last processed term. /// @return the offset int32_t getOffset(); /// Get boost value. This is the cumulative product of document boost and field boost for all field /// instances sharing the same field name. /// @return the boost double getBoost(); AttributeSourcePtr getAttributeSource(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldMaskingSpanQuery.h000066400000000000000000000065001217574114600233150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDMASKINGSPANQUERY_H #define FIELDMASKINGSPANQUERY_H #include "SpanQuery.h" namespace Lucene { /// Wrapper to allow {@link SpanQuery} objects participate in composite single-field SpanQueries by /// 'lying' about their search field. That is, the masked SpanQuery will function as normal, but /// {@link SpanQuery#getField()} simply hands back the value supplied in this class's constructor. /// /// This can be used to support Queries like {@link SpanNearQuery} or {@link SpanOrQuery} across /// different fields, which is not ordinarily permitted. /// /// This can be useful for denormalized relational data: for example, when indexing a document with /// conceptually many 'children': /// ///
    /// teacherid: 1
    /// studentfirstname: james
    /// studentsurname: jones
    ///
    /// teacherid: 2
    /// studenfirstname: james
    /// studentsurname: smith
    /// studentfirstname: sally
    /// studentsurname: jones
    /// 
/// /// A SpanNearQuery with a slop of 0 can be applied across two {@link SpanTermQuery} objects as follows: /// ///
    /// SpanQueryPtr q1 = newLucene(newLucene(L"studentfirstname", L"james"));
    /// SpanQueryPtr q2 = newLucene(newLucene(L"studentsurname", L"jones"));
    /// SpanQueryPtr q2m = newLucene(q2, L"studentfirstname");
    ///
    /// Collection span = newCollection(q1, q1);
    ///
    /// QueryPtr q = newLucene(span, -1, false);
    /// 
/// to search for 'studentfirstname:james studentsurname:jones' and find teacherid 1 without matching /// teacherid 2 (which has a 'james' in position 0 and 'jones' in position 1). /// /// Note: as {@link #getField()} returns the masked field, scoring will be done using the norms of the /// field name supplied. This may lead to unexpected scoring behaviour. class LPPAPI FieldMaskingSpanQuery : public SpanQuery { public: FieldMaskingSpanQuery(SpanQueryPtr maskedQuery, const String& maskedField); virtual ~FieldMaskingSpanQuery(); LUCENE_CLASS(FieldMaskingSpanQuery); protected: SpanQueryPtr maskedQuery; String field; public: using SpanQuery::toString; virtual String getField(); SpanQueryPtr getMaskedQuery(); virtual SpansPtr getSpans(IndexReaderPtr reader); virtual void extractTerms(SetTerm terms); virtual WeightPtr createWeight(SearcherPtr searcher); virtual SimilarityPtr getSimilarity(SearcherPtr searcher); virtual QueryPtr rewrite(IndexReaderPtr reader); virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); /// Returns a clone of this query. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldScoreQuery.h000066400000000000000000000063531217574114600221630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSCOREQUERY_H #define FIELDSCOREQUERY_H #include "ValueSourceQuery.h" namespace Lucene { /// A query that scores each document as the value of the numeric input field. /// /// The query matches all documents, and scores each document according to the numeric value of that field. /// /// It is assumed, and expected, that: ///
    ///
  • The field used here is indexed, and has exactly one token in every scored document. ///
  • Best if this field is un_tokenized. ///
  • That token is parseable to the selected type. ///
/// /// Combining this query in a FunctionQuery allows much freedom in affecting document scores. Note, that /// with this freedom comes responsibility: it is more than likely that the default Lucene scoring is superior /// in quality to scoring modified as explained here. However, in some cases, and certainly for research /// experiments, this capability may turn useful. /// /// When constructing this query, select the appropriate type. That type should match the data stored in the /// field. So in fact the "right" type should be selected before indexing. Type selection has effect on the /// RAM usage: ///
    ///
  • Byte consumes 1 * maxDocs bytes. ///
  • Int consumes 4 * maxDocs bytes. ///
  • Double consumes 8 * maxDocs bytes. ///
/// /// Caching: Values for the numeric field are loaded once and cached in memory for further use with the same /// IndexReader. To take advantage of this, it is extremely important to reuse index-readers or index- /// searchers, otherwise, for instance if for each query a new index reader is opened, large penalties would /// be paid for loading the field values into memory over and over again. class LPPAPI FieldScoreQuery : public ValueSourceQuery { public: /// Type of score field, indicating how field values are interpreted/parsed. enum Type { /// Field values are interpreted as numeric byte values. BYTE, /// Field values are interpreted as numeric integer values. INT, /// Field values are interpreted as numeric double values. DOUBLE }; /// Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field. /// The type param tells how to parse the field string values into a numeric score value. /// @param field the numeric field to be used. /// @param type the type of the field. FieldScoreQuery(const String& field, Type type); virtual ~FieldScoreQuery(); LUCENE_CLASS(FieldScoreQuery); public: /// Create the appropriate (cached) field value source. static ValueSourcePtr getValueSource(const String& field, Type type); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldSelector.h000066400000000000000000000065651217574114600216470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSELECTOR_H #define FIELDSELECTOR_H #include "LuceneObject.h" namespace Lucene { /// The FieldSelector allows one to make decisions about what Fields get loaded on a {@link Document} by /// {@link IndexReader#document(int32_t, FieldSelector)} class LPPAPI FieldSelector : public LuceneObject { protected: FieldSelector(); public: virtual ~FieldSelector(); LUCENE_CLASS(FieldSelector); public: /// Provides information about what should be done with this Field enum FieldSelectorResult { /// Null value SELECTOR_NULL, /// Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is /// encountered. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should /// not return null. /// {@link Document#add(Fieldable)} should be called by the Reader. SELECTOR_LOAD, /// Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually /// contain its data until invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link /// Document#getFieldable(String)} is safe to use and should return a valid instance of a {@link /// Fieldable}. /// {@link Document#add(Fieldable)} should be called by the Reader. SELECTOR_LAZY_LOAD, /// Do not load the {@link Field}. {@link Document#getField(String)} and {@link /// Document#getFieldable(String)} should return null. {@link Document#add(Fieldable)} is not called. /// {@link Document#add(Fieldable)} should not be called by the Reader. SELECTOR_NO_LOAD, /// Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading /// for the {@link Document}. Thus, the Document may not have its complete set of Fields. {@link /// Document#getField(String)} and {@link Document#getFieldable(String)} should both be valid for /// this {@link Field} /// {@link Document#add(Fieldable)} should be called by the Reader. SELECTOR_LOAD_AND_BREAK, /// Load the size of this {@link Field} rather than its value. Size is measured as number of bytes /// required to store the field == bytes for a binary or any compressed value, and 2*chars for a String /// value. The size is stored as a binary value, represented as an int in a byte[], with the higher /// order byte first in [0] SELECTOR_SIZE, /// Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further /// fields, after the size is loaded SELECTOR_SIZE_AND_BREAK }; public: virtual FieldSelectorResult accept(const String& fieldName) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldSortedTermVectorMapper.h000066400000000000000000000041401217574114600244720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSORTEDTERMVECTORMAPPER_H #define FIELDSORTEDTERMVECTORMAPPER_H #include #include "TermVectorMapper.h" namespace Lucene { /// For each Field, store a sorted collection of {@link TermVectorEntry}s /// This is not thread-safe. class LPPAPI FieldSortedTermVectorMapper : public TermVectorMapper { public: /// @param comparator A Comparator for sorting {@link TermVectorEntry}s FieldSortedTermVectorMapper(TermVectorEntryComparator comparator); FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator); virtual ~FieldSortedTermVectorMapper(); LUCENE_CLASS(FieldSortedTermVectorMapper); protected: MapStringCollectionTermVectorEntry fieldToTerms; Collection currentSet; String currentField; TermVectorEntryComparator comparator; public: /// Map the Term Vector information into your own structure virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); /// Get the mapping between fields and terms, sorted by the comparator /// @return A map between field names and {@link java.util.SortedSet}s per field. SortedSet entries are /// {@link TermVectorEntry} MapStringCollectionTermVectorEntry getFieldToTerms(); TermVectorEntryComparator getComparator(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldValueHitQueue.h000066400000000000000000000053131217574114600226030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDVALUEHITQUEUE_H #define FIELDVALUEHITQUEUE_H #include "HitQueueBase.h" #include "ScoreDoc.h" namespace Lucene { /// A hit queue for sorting by hits by terms in more than one field. Uses FieldCache::DEFAULT for maintaining /// internal term lookup tables. /// @see Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr) /// @see FieldCache class LPPAPI FieldValueHitQueue : public HitQueueBase { protected: FieldValueHitQueue(Collection fields, int32_t size); public: virtual ~FieldValueHitQueue(); LUCENE_CLASS(FieldValueHitQueue); protected: /// Stores the sort criteria being used. Collection fields; Collection comparators; Collection reverseMul; public: /// Creates a hit queue sorted by the given list of fields. /// @param fields SortField array we are sorting by in priority order (highest priority first); cannot /// be null or empty. /// @param size The number of hits to retain. Must be greater than zero. static FieldValueHitQueuePtr create(Collection fields, int32_t size); Collection getComparators(); Collection getReverseMul(); /// Given a queue Entry, creates a corresponding FieldDoc that contains the values used to sort the given /// document. These values are not the raw values out of the index, but the internal representation of /// them. This is so the given search hit can be collated by a MultiSearcher with other search hits. /// @param entry The Entry used to create a FieldDoc /// @return The newly created FieldDoc /// @see Searchable#search(WeightPtr, FilterPtr, int32_t, SortPtr) FieldDocPtr fillFields(FieldValueHitQueueEntryPtr entry); /// Returns the SortFields being used by this hit queue. Collection getFields(); }; class LPPAPI FieldValueHitQueueEntry : public ScoreDoc { public: FieldValueHitQueueEntry(int32_t slot, int32_t doc, double score); virtual ~FieldValueHitQueueEntry(); LUCENE_CLASS(FieldValueHitQueueEntry); public: int32_t slot; public: virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/Fieldable.h000066400000000000000000000177541217574114600207740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDABLE_H #define FIELDABLE_H #include "LuceneObject.h" namespace Lucene { /// Synonymous with {@link Field}. /// /// WARNING: This interface may change within minor versions, despite Lucene's backward compatibility /// requirements. This means new methods may be added from version to version. This change only /// affects the Fieldable API; other backwards compatibility promises remain intact. For example, Lucene /// can still read and write indices created within the same major version. class LPPAPI Fieldable { public: LUCENE_INTERFACE(Fieldable); public: /// Sets the boost factor hits on this field. This value will be multiplied into the score of all /// hits on this this field of this document. /// /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field. /// If a document has multiple fields with the same name, all such values are multiplied together. /// This product is then used to compute the norm factor for the field. By default, in the {@link /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)} /// before it is stored in the index. One should attempt to ensure that this product does not overflow /// the range of that encoding. /// /// @see Document#setBoost(double) /// @see Similarity#computeNorm(String, FieldInvertState) /// @see Similarity#encodeNorm(double) virtual void setBoost(double boost) = 0; /// Returns the boost factor for hits for this field. /// /// The default value is 1.0. /// /// Note: this value is not stored directly with the document in the index. Documents returned from /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value /// present as when this field was indexed. virtual double getBoost() = 0; /// Returns the name of the field as an interned string. For example "date", "title", "body", ... virtual String name() = 0; /// The value of the field as a String, or empty. /// /// For indexing, if isStored()==true, the stringValue() will be used as the stored field value /// unless isBinary()==true, in which case getBinaryValue() will be used. /// /// If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token. /// If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate /// indexed tokens if not null, else readerValue() will be used to generate indexed tokens if not null, /// else stringValue() will be used to generate tokens. virtual String stringValue() = 0; /// The value of the field as a Reader, which can be used at index time to generate indexed tokens. /// @see #stringValue() virtual ReaderPtr readerValue() = 0; /// The TokenStream for this field to be used when indexing, or null. /// @see #stringValue() virtual TokenStreamPtr tokenStreamValue() = 0; /// True if the value of the field is to be stored in the index for return with search hits. virtual bool isStored() = 0; /// True if the value of the field is to be indexed, so that it may be searched on. virtual bool isIndexed() = 0; /// True if the value of the field should be tokenized as text prior to indexing. Un-tokenized fields /// are indexed as a single word and may not be Reader-valued. virtual bool isTokenized() = 0; /// True if the term or terms used to index this field are stored as a term vector, available from /// {@link IndexReader#getTermFreqVector(int,String)}. These methods do not provide access to the /// original content of the field, only to terms used to index it. If the original content must be /// preserved, use the stored attribute instead. virtual bool isTermVectorStored() = 0; /// True if terms are stored as term vector together with their offsets (start and end position in /// source text). virtual bool isStoreOffsetWithTermVector() = 0; /// True if terms are stored as term vector together with their token positions. virtual bool isStorePositionWithTermVector() = 0; /// True if the value of the field is stored as binary. virtual bool isBinary() = 0; /// True if norms are omitted for this indexed field. virtual bool getOmitNorms() = 0; /// If set, omit normalization factors associated with this indexed field. /// This effectively disables indexing boosts and length normalization for this field. virtual void setOmitNorms(bool omitNorms) = 0; /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()} /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open. /// /// @return true if this field can be loaded lazily virtual bool isLazy() = 0; /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is /// undefined. /// @return index of the first character in byte[] segment that represents this Field value. virtual int32_t getBinaryOffset() = 0; /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is /// undefined. /// @return length of byte[] segment that represents this Field value. virtual int32_t getBinaryLength() = 0; /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. /// @return reference to the Field value as byte[]. virtual ByteArray getBinaryValue() = 0; /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. /// /// About reuse: if you pass in the result byte[] and it is used, likely the underlying implementation will /// hold onto this byte[] and return it in future calls to {@link #getBinaryValue()}. So if you subsequently /// re-use the same byte[] elsewhere it will alter this Fieldable's value. /// @param result User defined buffer that will be used if possible. If this is null or not large enough, /// a new buffer is allocated /// @return reference to the Field value as byte[]. virtual ByteArray getBinaryValue(ByteArray result) = 0; /// @see #setOmitTermFreqAndPositions virtual bool getOmitTermFreqAndPositions() = 0; /// If set, omit term freq, positions and payloads from postings for this field. /// /// NOTE: While this option reduces storage space required in the index, it also means any query requiring /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail /// to find results. virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldsReader.h000066400000000000000000000144051217574114600214440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSREADER_H #define FIELDSREADER_H #include "AbstractField.h" #include "CloseableThreadLocal.h" namespace Lucene { /// Class responsible for access to stored document fields. It uses .fdt and .fdx; files. class FieldsReader : public LuceneObject { public: /// Used only by clone FieldsReader(FieldInfosPtr fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, int32_t formatSize, int32_t docStoreOffset, IndexInputPtr cloneableFieldsStream, IndexInputPtr cloneableIndexStream); FieldsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn); FieldsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn, int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0); virtual ~FieldsReader(); LUCENE_CLASS(FieldsReader); protected: FieldInfosPtr fieldInfos; // The main fieldStream, used only for cloning. IndexInputPtr cloneableFieldsStream; // This is a clone of cloneableFieldsStream used for reading documents. It should not be cloned outside of a // synchronized context. IndexInputPtr fieldsStream; IndexInputPtr cloneableIndexStream; IndexInputPtr indexStream; int32_t numTotalDocs; int32_t _size; bool closed; int32_t format; int32_t formatSize; // The docID offset where our docs begin in the index file. This will be 0 if we have our own private file. int32_t docStoreOffset; CloseableThreadLocal fieldsStreamTL; bool isOriginal; public: /// Returns a cloned FieldsReader that shares open IndexInputs with the original one. It is the caller's job not to /// close the original FieldsReader until all clones are called (eg, currently SegmentReader manages this logic). virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Closes the underlying {@link IndexInput} streams, including any ones associated with a lazy implementation of a /// Field. This means that the Fields values will not be accessible. void close(); int32_t size(); bool canReadRawDocs(); DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector); /// Returns the length in bytes of each raw document in a contiguous range of length numDocs starting with startDocID. /// Returns the IndexInput (the fieldStream), already seeked to the starting point for startDocID. IndexInputPtr rawDocs(Collection lengths, int32_t startDocID, int32_t numDocs); protected: void ConstructReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size); void ensureOpen(); void seekIndex(int32_t docID); /// Skip the field. We still have to read some of the information about the field, but can skip past the actual content. /// This will have the most payoff on large fields. void skipField(bool binary, bool compressed); void skipField(bool binary, bool compressed, int32_t toRead); void addFieldLazy(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed, bool tokenize); void addField(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed, bool tokenize); /// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes). /// Read just the size - caller must skip the field content to continue reading fields. Return the size in bytes or chars, /// depending on field type. int32_t addFieldSize(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed); ByteArray uncompress(ByteArray b); String uncompressString(ByteArray b); friend class LazyField; }; class LazyField : public AbstractField { public: LazyField(FieldsReaderPtr reader, const String& name, Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); LazyField(FieldsReaderPtr reader, const String& name, Store store, Index index, TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); virtual ~LazyField(); LUCENE_CLASS(LazyField); protected: FieldsReaderWeakPtr _reader; int32_t toRead; int64_t pointer; /// @deprecated Only kept for backward-compatibility with <3.0 indexes. bool isCompressed; public: /// The value of the field as a Reader, or null. If null, the String value, binary value, or TokenStream value is used. /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. ReaderPtr readerValue(); /// The value of the field as a TokenStream, or null. If null, the Reader value, String value, or binary value is used. /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. TokenStreamPtr tokenStreamValue(); /// The value of the field as a String, or null. If null, the Reader value, binary value, or TokenStream value is used. /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. String stringValue(); int64_t getPointer(); void setPointer(int64_t pointer); int32_t getToRead(); void setToRead(int32_t toRead); /// Return the raw byte[] for the binary field. virtual ByteArray getBinaryValue(ByteArray result); protected: IndexInputPtr getFieldStream(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FieldsWriter.h000066400000000000000000000046131217574114600215160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSWRITER_H #define FIELDSWRITER_H #include "LuceneObject.h" namespace Lucene { class FieldsWriter : public LuceneObject { public: FieldsWriter(DirectoryPtr d, const String& segment, FieldInfosPtr fn); FieldsWriter(IndexOutputPtr fdx, IndexOutputPtr fdt, FieldInfosPtr fn); virtual ~FieldsWriter(); LUCENE_CLASS(FieldsWriter); protected: FieldInfosPtr fieldInfos; IndexOutputPtr fieldsStream; IndexOutputPtr indexStream; bool doClose; public: static const uint8_t FIELD_IS_TOKENIZED; static const uint8_t FIELD_IS_BINARY; static const uint8_t FIELD_IS_COMPRESSED; static const int32_t FORMAT; // Original format static const int32_t FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; // Changed strings to UTF8 static const int32_t FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; // Lucene 3.0: Removal of compressed fields // NOTE: if you introduce a new format, make it 1 higher than the current one, and always change this // if you switch to a new format! static const int32_t FORMAT_CURRENT; public: void setFieldsStream(IndexOutputPtr stream); /// Writes the contents of buffer into the fields stream and adds a new entry for this document into the index /// stream. This assumes the buffer was already written in the correct fields format. void flushDocument(int32_t numStoredFields, RAMOutputStreamPtr buffer); void skipDocument(); void flush(); void close(); void writeField(FieldInfoPtr fi, FieldablePtr field); /// Bulk write a contiguous series of documents. The lengths array is the length (in bytes) of each raw document. /// The stream IndexInput is the fieldsStream from which we should bulk-copy all bytes. void addRawDocuments(IndexInputPtr stream, Collection lengths, int32_t numDocs); void addDocument(DocumentPtr doc); }; } #endif LucenePlusPlus-rel_3.0.4/include/FileReader.h000066400000000000000000000027421217574114600211160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILEREADER_H #define FILEREADER_H #include "Reader.h" namespace Lucene { /// Convenience class for reading character files. class LPPAPI FileReader : public Reader { public: /// Creates a new FileReader, given the file name to read from. FileReader(const String& fileName); virtual ~FileReader(); LUCENE_CLASS(FileReader); protected: ifstreamPtr file; int64_t _length; ByteArray fileBuffer; public: static const int32_t FILE_EOF; static const int32_t FILE_ERROR; public: /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); /// Close the stream. virtual void close(); /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Reset the stream. virtual void reset(); /// The number of bytes in the file. virtual int64_t length(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FileSwitchDirectory.h000066400000000000000000000062441217574114600230430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILESWITCHDIRECTORY_H #define FILESWITCHDIRECTORY_H #include "Directory.h" namespace Lucene { /// A Directory instance that switches files between two other /// Directory instances. /// /// Files with the specified extensions are placed in the primary /// directory; others are placed in the secondary directory. The /// provided Set must not change once passed to this class, and /// must allow multiple threads to call contains at once. class LPPAPI FileSwitchDirectory : public Directory { public: FileSwitchDirectory(HashSet primaryExtensions, DirectoryPtr primaryDir, DirectoryPtr secondaryDir, bool doClose); virtual ~FileSwitchDirectory(); LUCENE_CLASS(FileSwitchDirectory); protected: HashSet primaryExtensions; DirectoryPtr primaryDir; DirectoryPtr secondaryDir; bool doClose; public: /// Return the primary directory. DirectoryPtr getPrimaryDir(); /// Return the secondary directory. DirectoryPtr getSecondaryDir(); /// Closes the store. virtual void close(); /// Returns an array of strings, one for each file in the directory. virtual HashSet listAll(); /// Utility method to return a file's extension. static String getExtension(const String& name); /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name); /// Returns the time the named file was last modified. virtual uint64_t fileModified(const String& name); /// Set the modified time of an existing file to now. virtual void touchFile(const String& name); /// Removes an existing file in the directory. virtual void deleteFile(const String& name); /// Returns the length of a file in the directory. virtual int64_t fileLength(const String& name); /// Creates a new, empty file in the directory with the given name. /// Returns a stream writing this file. virtual IndexOutputPtr createOutput(const String& name); /// Ensure that any writes to this file are moved to stable storage. /// Lucene uses this to properly commit changes to the index, to /// prevent a machine/OS crash from corrupting the index. virtual void sync(const String& name); /// Returns a stream reading an existing file, with the specified /// read buffer size. The particular Directory implementation may /// ignore the buffer size. virtual IndexInputPtr openInput(const String& name); protected: DirectoryPtr getDirectory(const String& name); }; } #endif LucenePlusPlus-rel_3.0.4/include/FileUtils.h000066400000000000000000000047321217574114600210150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILEUTILS_H #define FILEUTILS_H #include "Lucene.h" namespace Lucene { namespace FileUtils { /// Return true if given file or directory exists. LPPAPI bool fileExists(const String& path); /// Return file last modified date and time. LPPAPI uint64_t fileModified(const String& path); /// Set file last modified date and time to now. LPPAPI bool touchFile(const String& path); /// Return file length in bytes. LPPAPI int64_t fileLength(const String& path); /// Set new file length, truncating or expanding as required. LPPAPI bool setFileLength(const String& path, int64_t length); /// Delete file from file system. LPPAPI bool removeFile(const String& path); /// Copy a file to/from file system. LPPAPI bool copyFile(const String& source, const String& dest); /// Create new directory under given location. LPPAPI bool createDirectory(const String& path); /// Delete directory from file system. LPPAPI bool removeDirectory(const String& path); /// Return true if given path points to a directory. LPPAPI bool isDirectory(const String& path); /// Return list of files (and/or directories) under given directory. /// @param path path to list directory. /// @param filesOnly if true the exclude sub-directories. /// @param dirList list of files to return. LPPAPI bool listDirectory(const String& path, bool filesOnly, HashSet dirList); /// Copy a directory to/from file system. LPPAPI bool copyDirectory(const String& source, const String& dest); /// Return complete path after joining given directory and file name. LPPAPI String joinPath(const String& path, const String& file); /// Extract parent path from given path. LPPAPI String extractPath(const String& path); /// Extract file name from given path. LPPAPI String extractFile(const String& path); } } #endif LucenePlusPlus-rel_3.0.4/include/Filter.h000066400000000000000000000033151217574114600203360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTER_H #define FILTER_H #include "LuceneObject.h" namespace Lucene { /// Abstract base class for restricting which documents may be returned during searching. class LPPAPI Filter : public LuceneObject { public: virtual ~Filter(); LUCENE_CLASS(Filter); public: /// Creates a {@link DocIdSet} enumerating the documents that should be permitted in search results. /// /// Note: null can be returned if no documents are accepted by this Filter. /// /// Note: This method will be called once per segment in the index during searching. The returned /// {@link DocIdSet} must refer to document IDs for that segment, not for the top-level reader. /// /// @param reader a {@link IndexReader} instance opened on the index currently searched on. Note, /// it is likely that the provided reader does not represent the whole underlying index ie. if the /// index has more than one segment the given reader only represents a single segment. /// @return a DocIdSet that provides the documents which should be permitted or prohibited in search /// results. NOTE: null can be returned if no documents will be accepted by this Filter. /// /// @see DocIdBitSet virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FilterIndexReader.h000066400000000000000000000123521217574114600224520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTERINDEXREADER_H #define FILTERINDEXREADER_H #include "IndexReader.h" #include "TermPositions.h" #include "TermEnum.h" namespace Lucene { /// A FilterIndexReader contains another IndexReader, which it uses as its basic source of data, possibly /// transforming the data along the way or providing additional functionality. The class FilterIndexReader /// itself simply implements all abstract methods of IndexReader with versions that pass all requests to /// the contained index reader. Subclasses of FilterIndexReader may further override some of these methods /// and may also provide additional methods and fields. class LPPAPI FilterIndexReader : public IndexReader { public: /// Construct a FilterIndexReader based on the specified base reader. Directory locking for delete, /// undeleteAll, and setNorm operations is left to the base reader. /// /// Note that base reader is closed if this FilterIndexReader is closed. /// @param in specified base reader. FilterIndexReader(IndexReaderPtr in); virtual ~FilterIndexReader(); LUCENE_CLASS(FilterIndexReader); protected: IndexReaderPtr in; public: virtual DirectoryPtr directory(); virtual Collection getTermFreqVectors(int32_t docNumber); virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); virtual int32_t numDocs(); virtual int32_t maxDoc(); virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); virtual bool isDeleted(int32_t n); virtual bool hasDeletions(); virtual bool hasNorms(const String& field); virtual ByteArray norms(const String& field); virtual void norms(const String& field, ByteArray norms, int32_t offset); virtual TermEnumPtr terms(); virtual TermEnumPtr terms(TermPtr t); virtual int32_t docFreq(TermPtr t); virtual TermDocsPtr termDocs(); virtual TermDocsPtr termDocs(TermPtr term); virtual TermPositionsPtr termPositions(); virtual HashSet getFieldNames(FieldOption fieldOption); virtual int64_t getVersion(); virtual bool isCurrent(); virtual bool isOptimized(); virtual Collection getSequentialSubReaders(); /// If the subclass of FilteredIndexReader modifies the contents of the FieldCache, you must /// override this method to provide a different key virtual LuceneObjectPtr getFieldCacheKey(); /// If the subclass of FilteredIndexReader modifies the deleted docs, you must override this /// method to provide a different key virtual LuceneObjectPtr getDeletesCacheKey(); protected: virtual void doUndeleteAll(); virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); virtual void doDelete(int32_t docNum); virtual void doCommit(MapStringString commitUserData); virtual void doClose(); }; /// Base class for filtering {@link TermDocs} implementations. class LPPAPI FilterTermDocs : public TermPositions, public LuceneObject { public: FilterTermDocs(TermDocsPtr in); virtual ~FilterTermDocs(); LUCENE_CLASS(FilterTermDocs); protected: TermDocsPtr in; public: virtual void seek(TermPtr term); virtual void seek(TermEnumPtr termEnum); virtual int32_t doc(); virtual int32_t freq(); virtual bool next(); virtual int32_t read(Collection docs, Collection freqs); virtual bool skipTo(int32_t target); virtual void close(); }; /// Base class for filtering {@link TermPositions} implementations. class LPPAPI FilterTermPositions : public FilterTermDocs { public: FilterTermPositions(TermPositionsPtr in); virtual ~FilterTermPositions(); LUCENE_CLASS(FilterTermPositions); public: virtual int32_t nextPosition(); virtual int32_t getPayloadLength(); virtual ByteArray getPayload(ByteArray data, int32_t offset); virtual bool isPayloadAvailable(); }; /// Base class for filtering {@link TermEnum} implementations. class LPPAPI FilterTermEnum : public TermEnum { public: FilterTermEnum(TermEnumPtr in); virtual ~FilterTermEnum(); LUCENE_CLASS(FilterTermEnum); protected: TermEnumPtr in; public: virtual bool next(); virtual TermPtr term(); virtual int32_t docFreq(); virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FilterManager.h000066400000000000000000000047761217574114600216450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTERMANAGER_H #define FILTERMANAGER_H #include "LuceneObject.h" namespace Lucene { /// Filter caching singleton. It can be used to save filters locally for reuse. Also could be used as a /// persistent storage for any filter as long as the filter provides a proper hashCode(), as that is used /// as the key in the cache. /// /// The cache is periodically cleaned up from a separate thread to ensure the cache doesn't exceed the /// maximum size. class LPPAPI FilterManager : public LuceneObject { public: /// Sets up the FilterManager singleton. FilterManager(); virtual ~FilterManager(); LUCENE_CLASS(FilterManager); protected: /// The default maximum number of Filters in the cache static const int32_t DEFAULT_CACHE_CLEAN_SIZE; /// The default frequency of cache cleanup static const int64_t DEFAULT_CACHE_SLEEP_TIME; /// The cache itself MapIntFilterItem cache; /// Maximum allowed cache size int32_t cacheCleanSize; /// Cache cleaning frequency int64_t cleanSleepTime; /// Cache cleaner that runs in a separate thread FilterCleanerPtr filterCleaner; public: virtual void initialize(); static FilterManagerPtr getInstance(); /// Sets the max size that cache should reach before it is cleaned up /// @param cacheCleanSize maximum allowed cache size void setCacheSize(int32_t cacheCleanSize); /// Sets the cache cleaning frequency in milliseconds. /// @param cleanSleepTime cleaning frequency in milliseconds void setCleanThreadSleepTime(int64_t cleanSleepTime); /// Returns the cached version of the filter. Allows the caller to pass up a small filter but this will /// keep a persistent version around and allow the caching filter to do its job. /// @param filter The input filter /// @return The cached version of the filter FilterPtr getFilter(FilterPtr filter); friend class FilterCleaner; }; } #endif LucenePlusPlus-rel_3.0.4/include/FilteredDocIdSet.h000066400000000000000000000040661217574114600222320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTEREDDOCIDSET_H #define FILTEREDDOCIDSET_H #include "DocIdSet.h" namespace Lucene { /// Abstract decorator class for a DocIdSet implementation that provides on-demand filtering/validation /// mechanism on a given DocIdSet. /// /// Technically, this same functionality could be achieved with ChainedFilter (under contrib/misc), however /// the benefit of this class is it never materializes the full bitset for the filter. Instead, the {@link /// #match} method is invoked on-demand, per docID visited during searching. If you know few docIDs will /// be visited, and the logic behind {@link #match} is relatively costly, this may be a better way to filter /// than ChainedFilter. /// @see DocIdSet class LPPAPI FilteredDocIdSet : public DocIdSet { public: /// @param innerSet Underlying DocIdSet FilteredDocIdSet(DocIdSetPtr innerSet); virtual ~FilteredDocIdSet(); LUCENE_CLASS(FilteredDocIdSet); protected: DocIdSetPtr innerSet; public: /// This DocIdSet implementation is cacheable if the inner set is cacheable. virtual bool isCacheable(); /// Implementation of the contract to build a DocIdSetIterator. /// @see DocIdSetIterator /// @see FilteredDocIdSetIterator virtual DocIdSetIteratorPtr iterator(); protected: /// Validation method to determine whether a docid should be in the result set. /// @param docid docid to be tested /// @return true if input docid should be in the result set, false otherwise. virtual bool match(int32_t docid) = 0; friend class DefaultFilteredDocIdSetIterator; }; } #endif LucenePlusPlus-rel_3.0.4/include/FilteredDocIdSetIterator.h000066400000000000000000000030031217574114600237320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTEREDDOCIDSETITERATOR_H #define FILTEREDDOCIDSETITERATOR_H #include "DocIdSetIterator.h" namespace Lucene { /// Abstract decorator class of a DocIdSetIterator implementation that provides on-demand filter/validation /// mechanism on an underlying DocIdSetIterator. See {@link FilteredDocIdSet}. class LPPAPI FilteredDocIdSetIterator : public DocIdSetIterator { public: /// @param innerIter Underlying DocIdSetIterator. FilteredDocIdSetIterator(DocIdSetIteratorPtr innerIter); virtual ~FilteredDocIdSetIterator(); LUCENE_CLASS(FilteredDocIdSetIterator); protected: DocIdSetIteratorPtr innerIter; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); protected: /// Validation method to determine whether a docid should be in the result set. /// @param doc docid to be tested /// @return true if input docid should be in the result set, false otherwise. /// @see #FilteredDocIdSetIterator(DocIdSetIterator). virtual bool match(int32_t docid) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FilteredQuery.h000066400000000000000000000042451217574114600217000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTEREDQUERY_H #define FILTEREDQUERY_H #include "Query.h" namespace Lucene { /// A query that applies a filter to the results of another query. /// /// Note: the bits are retrieved from the filter each time this query is used in a search - use a /// CachingWrapperFilter to avoid regenerating the bits every time. /// /// @see CachingWrapperFilter class LPPAPI FilteredQuery : public Query { public: /// Constructs a new query which applies a filter to the results of the original query. /// Filter::getDocIdSet() will be called every time this query is used in a search. /// @param query Query to be filtered, cannot be null. /// @param filter Filter to apply to query results, cannot be null. FilteredQuery(QueryPtr query, FilterPtr filter); virtual ~FilteredQuery(); LUCENE_CLASS(FilteredQuery); private: QueryPtr query; FilterPtr filter; public: using Query::toString; /// Returns a Weight that applies the filter to the enclosed query's Weight. /// This is accomplished by overriding the Scorer returned by the Weight. virtual WeightPtr createWeight(SearcherPtr searcher); /// Rewrites the wrapped query. virtual QueryPtr rewrite(IndexReaderPtr reader); QueryPtr getQuery(); FilterPtr getFilter(); virtual void extractTerms(SetTerm terms); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); friend class FilteredQueryWeight; }; } #endif LucenePlusPlus-rel_3.0.4/include/FilteredTermEnum.h000066400000000000000000000041401217574114600223210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTEREDTERMENUM_H #define FILTEREDTERMENUM_H #include "TermEnum.h" namespace Lucene { /// Abstract class for enumerating a subset of all terms. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than /// all that precede it. class LPPAPI FilteredTermEnum : public TermEnum { public: virtual ~FilteredTermEnum(); LUCENE_CLASS(FilteredTermEnum); protected: /// The current term TermPtr currentTerm; /// The delegate enum - to set this member use {@link #setEnum} TermEnumPtr actualEnum; public: /// Equality measure on the term virtual double difference() = 0; /// Returns the docFreq of the current Term in the enumeration. /// Returns -1 if no Term matches or all terms have been enumerated. virtual int32_t docFreq(); /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Returns the current Term in the enumeration. /// Returns null if no Term matches or all terms have been enumerated. virtual TermPtr term(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); protected: /// Equality compare on the term virtual bool termCompare(TermPtr term) = 0; /// Indicates the end of the enumeration has been reached virtual bool endEnum() = 0; /// Use this method to set the actual TermEnum (eg. in ctor), it will be automatically positioned /// on the first matching term. virtual void setEnum(TermEnumPtr actualEnum); }; } #endif LucenePlusPlus-rel_3.0.4/include/FlagsAttribute.h000066400000000000000000000030151217574114600220260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FLAGSATTRIBUTE_H #define FLAGSATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// This attribute can be used to pass different flags down the tokenizer chain, eg from one TokenFilter /// to another one. class LPPAPI FlagsAttribute : public Attribute { public: FlagsAttribute(); virtual ~FlagsAttribute(); LUCENE_CLASS(FlagsAttribute); protected: int32_t flags; public: virtual String toString(); /// Get the bitset for any bits that have been set. This is completely distinct from {@link /// TypeAttribute#type()}, although they do share similar purposes. The flags can be used to encode /// information about the token for use by other {@link TokenFilter}s. virtual int32_t getFlags(); /// @see #getFlags() virtual void setFlags(int32_t flags); virtual void clear(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual void copyTo(AttributePtr target); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/FormatPostingsDocsConsumer.h000066400000000000000000000017401217574114600244150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSDOCSCONSUMER_H #define FORMATPOSTINGSDOCSCONSUMER_H #include "LuceneObject.h" namespace Lucene { class FormatPostingsDocsConsumer : public LuceneObject { public: virtual ~FormatPostingsDocsConsumer(); LUCENE_CLASS(FormatPostingsDocsConsumer); public: /// Adds a new doc in this term. If this returns null then we just skip consuming positions/payloads. virtual FormatPostingsPositionsConsumerPtr addDoc(int32_t docID, int32_t termDocFreq) = 0; /// Called when we are done adding docs to this term virtual void finish() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FormatPostingsDocsWriter.h000066400000000000000000000035401217574114600240760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSDOCSWRITER_H #define FORMATPOSTINGSDOCSWRITER_H #include "FormatPostingsDocsConsumer.h" namespace Lucene { /// Consumes doc & freq, writing them using the current index file format class FormatPostingsDocsWriter : public FormatPostingsDocsConsumer { public: FormatPostingsDocsWriter(SegmentWriteStatePtr state, FormatPostingsTermsWriterPtr parent); virtual ~FormatPostingsDocsWriter(); LUCENE_CLASS(FormatPostingsDocsWriter); public: IndexOutputPtr out; FormatPostingsTermsWriterWeakPtr _parent; SegmentWriteStatePtr state; FormatPostingsPositionsWriterPtr posWriter; DefaultSkipListWriterPtr skipListWriter; int32_t skipInterval; int32_t totalNumDocs; bool omitTermFreqAndPositions; bool storePayloads; int64_t freqStart; FieldInfoPtr fieldInfo; int32_t lastDocID; int32_t df; TermInfoPtr termInfo; // minimize consing UTF8ResultPtr utf8; public: virtual void initialize(); void setField(FieldInfoPtr fieldInfo); /// Adds a new doc in this term. If this returns null then we just skip consuming positions/payloads. virtual FormatPostingsPositionsConsumerPtr addDoc(int32_t docID, int32_t termDocFreq); /// Called when we are done adding docs to this term virtual void finish(); void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FormatPostingsFieldsConsumer.h000066400000000000000000000021251217574114600247310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSFIELDSCONSUMER_H #define FORMATPOSTINGSFIELDSCONSUMER_H #include "LuceneObject.h" namespace Lucene { /// Abstract API that consumes terms, doc, freq, prox and payloads postings. Concrete implementations of this /// actually do "something" with the postings (write it into the index in a specific format). class FormatPostingsFieldsConsumer : public LuceneObject { public: virtual ~FormatPostingsFieldsConsumer(); LUCENE_CLASS(FormatPostingsFieldsConsumer); public: /// Add a new field. virtual FormatPostingsTermsConsumerPtr addField(FieldInfoPtr field) = 0; /// Called when we are done adding everything. virtual void finish() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FormatPostingsFieldsWriter.h000066400000000000000000000025131217574114600244130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSFIELDSWRITER_H #define FORMATPOSTINGSFIELDSWRITER_H #include "FormatPostingsFieldsConsumer.h" namespace Lucene { class FormatPostingsFieldsWriter : public FormatPostingsFieldsConsumer { public: FormatPostingsFieldsWriter(SegmentWriteStatePtr state, FieldInfosPtr fieldInfos); virtual ~FormatPostingsFieldsWriter(); LUCENE_CLASS(FormatPostingsFieldsWriter); public: DirectoryPtr dir; String segment; TermInfosWriterPtr termsOut; SegmentWriteStatePtr state; FieldInfosPtr fieldInfos; FormatPostingsTermsWriterPtr termsWriter; DefaultSkipListWriterPtr skipListWriter; int32_t totalNumDocs; public: virtual void initialize(); /// Add a new field. virtual FormatPostingsTermsConsumerPtr addField(FieldInfoPtr field); /// Called when we are done adding everything. virtual void finish(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FormatPostingsPositionsConsumer.h000066400000000000000000000020231217574114600255070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSPOSITIONSCONSUMER_H #define FORMATPOSTINGSPOSITIONSCONSUMER_H #include "LuceneObject.h" namespace Lucene { class FormatPostingsPositionsConsumer : public LuceneObject { public: virtual ~FormatPostingsPositionsConsumer(); LUCENE_CLASS(FormatPostingsPositionsConsumer); public: /// Add a new position & payload. If payloadLength > 0 you must read those bytes from the IndexInput. virtual void addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength) = 0; /// Called when we are done adding positions & payloads. virtual void finish() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FormatPostingsPositionsWriter.h000066400000000000000000000026321217574114600251760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSPOSITIONSWRITER_H #define FORMATPOSTINGSPOSITIONSWRITER_H #include "FormatPostingsPositionsConsumer.h" namespace Lucene { class FormatPostingsPositionsWriter : public FormatPostingsPositionsConsumer { public: FormatPostingsPositionsWriter(SegmentWriteStatePtr state, FormatPostingsDocsWriterPtr parent); virtual ~FormatPostingsPositionsWriter(); LUCENE_CLASS(FormatPostingsPositionsWriter); public: FormatPostingsDocsWriterWeakPtr _parent; IndexOutputPtr out; bool omitTermFreqAndPositions; bool storePayloads; int32_t lastPayloadLength; int32_t lastPosition; public: /// Add a new position & payload virtual void addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength); void setField(FieldInfoPtr fieldInfo); /// Called when we are done adding positions & payloads virtual void finish(); void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FormatPostingsTermsConsumer.h000066400000000000000000000020231217574114600246120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSTERMSCONSUMER_H #define FORMATPOSTINGSTERMSCONSUMER_H #include "LuceneObject.h" namespace Lucene { class FormatPostingsTermsConsumer : public LuceneObject { public: virtual ~FormatPostingsTermsConsumer(); LUCENE_CLASS(FormatPostingsTermsConsumer); public: CharArray termBuffer; public: /// Adds a new term in this field virtual FormatPostingsDocsConsumerPtr addTerm(CharArray text, int32_t start) = 0; virtual FormatPostingsDocsConsumerPtr addTerm(const String& text); /// Called when we are done adding terms to this field virtual void finish() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/FormatPostingsTermsWriter.h000066400000000000000000000027631217574114600243060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSTERMSWRITER_H #define FORMATPOSTINGSTERMSWRITER_H #include "FormatPostingsTermsConsumer.h" namespace Lucene { class FormatPostingsTermsWriter : public FormatPostingsTermsConsumer { public: FormatPostingsTermsWriter(SegmentWriteStatePtr state, FormatPostingsFieldsWriterPtr parent); virtual ~FormatPostingsTermsWriter(); LUCENE_CLASS(FormatPostingsTermsWriter); public: FormatPostingsFieldsWriterWeakPtr _parent; SegmentWriteStatePtr state; FormatPostingsDocsWriterPtr docsWriter; TermInfosWriterPtr termsOut; FieldInfoPtr fieldInfo; CharArray currentTerm; int32_t currentTermStart; int64_t freqStart; int64_t proxStart; public: virtual void initialize(); void setField(FieldInfoPtr fieldInfo); /// Adds a new term in this field virtual FormatPostingsDocsConsumerPtr addTerm(CharArray text, int32_t start); /// Called when we are done adding terms to this field virtual void finish(); void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FreqProxFieldMergeState.h000066400000000000000000000025241217574114600236050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FREQPROXFIELDMERGESTATE_H #define FREQPROXFIELDMERGESTATE_H #include "LuceneObject.h" namespace Lucene { /// Used by DocumentsWriter to merge the postings from multiple ThreadStates when creating a segment class FreqProxFieldMergeState : public LuceneObject { public: FreqProxFieldMergeState(FreqProxTermsWriterPerFieldPtr field); virtual ~FreqProxFieldMergeState(); LUCENE_CLASS(FreqProxFieldMergeState); public: FreqProxTermsWriterPerFieldPtr field; int32_t numPostings; CharBlockPoolPtr charPool; Collection postings; FreqProxTermsWriterPostingListPtr p; CharArray text; int32_t textOffset; ByteSliceReaderPtr freq; ByteSliceReaderPtr prox; int32_t docID; int32_t termFreq; protected: int32_t postingUpto; public: bool nextTerm(); bool nextDoc(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FreqProxTermsWriter.h000066400000000000000000000041651217574114600230730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FREQPROXTERMSWRITER_H #define FREQPROXTERMSWRITER_H #include "TermsHashConsumer.h" #include "RawPostingList.h" namespace Lucene { class FreqProxTermsWriter : public TermsHashConsumer { public: virtual ~FreqProxTermsWriter(); LUCENE_CLASS(FreqProxTermsWriter); protected: ByteArray payloadBuffer; public: virtual TermsHashConsumerPerThreadPtr addThread(TermsHashPerThreadPtr perThread); virtual void createPostings(Collection postings, int32_t start, int32_t count); virtual void closeDocStore(SegmentWriteStatePtr state); virtual void abort(); virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state); /// Walk through all unique text tokens (Posting instances) found in this field and serialize them /// into a single RAM segment. void appendPostings(Collection fields, FormatPostingsFieldsConsumerPtr consumer); virtual int32_t bytesPerPosting(); protected: static int32_t compareText(const wchar_t* text1, int32_t pos1, const wchar_t* text2, int32_t pos2); }; class FreqProxTermsWriterPostingList : public RawPostingList { public: FreqProxTermsWriterPostingList(); virtual ~FreqProxTermsWriterPostingList(); LUCENE_CLASS(FreqProxTermsWriterPostingList); public: int32_t docFreq; // # times this term occurs in the current doc int32_t lastDocID; // Last docID where this term occurred int32_t lastDocCode; // Code for prior doc int32_t lastPosition; // Last position where this term occurred }; } #endif LucenePlusPlus-rel_3.0.4/include/FreqProxTermsWriterPerField.h000066400000000000000000000032701217574114600245020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FREQPROXTERMSWRITERPERFIELD_H #define FREQPROXTERMSWRITERPERFIELD_H #include "TermsHashConsumerPerField.h" namespace Lucene { class FreqProxTermsWriterPerField : public TermsHashConsumerPerField { public: FreqProxTermsWriterPerField(TermsHashPerFieldPtr termsHashPerField, FreqProxTermsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo); virtual ~FreqProxTermsWriterPerField(); LUCENE_CLASS(FreqProxTermsWriterPerField); public: FreqProxTermsWriterPerThreadWeakPtr _perThread; TermsHashPerFieldWeakPtr _termsHashPerField; FieldInfoPtr fieldInfo; DocStatePtr docState; FieldInvertStatePtr fieldState; bool omitTermFreqAndPositions; PayloadAttributePtr payloadAttribute; bool hasPayloads; public: virtual int32_t getStreamCount(); virtual void finish(); virtual void skippingLongTerm(); virtual int32_t compareTo(LuceneObjectPtr other); void reset(); virtual bool start(Collection fields, int32_t count); virtual void start(FieldablePtr field); void writeProx(FreqProxTermsWriterPostingListPtr p, int32_t proxCode); virtual void newTerm(RawPostingListPtr p); virtual void addTerm(RawPostingListPtr p); void abort(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FreqProxTermsWriterPerThread.h000066400000000000000000000021521217574114600246640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FREQPROXTERMSWRITERPERTHREAD_H #define FREQPROXTERMSWRITERPERTHREAD_H #include "TermsHashConsumerPerThread.h" namespace Lucene { class FreqProxTermsWriterPerThread : public TermsHashConsumerPerThread { public: FreqProxTermsWriterPerThread(TermsHashPerThreadPtr perThread); virtual ~FreqProxTermsWriterPerThread(); LUCENE_CLASS(FreqProxTermsWriterPerThread); public: TermsHashPerThreadWeakPtr _termsHashPerThread; DocStatePtr docState; public: virtual TermsHashConsumerPerFieldPtr addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo); virtual void startDocument(); virtual DocWriterPtr finishDocument(); virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.4/include/FuzzyQuery.h000066400000000000000000000061461217574114600212730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FUZZYQUERY_H #define FUZZYQUERY_H #include "MultiTermQuery.h" namespace Lucene { /// Implements the fuzzy search query. The similarity measurement is based on the Levenshtein (edit /// distance) algorithm. /// /// Warning: this query is not very scalable with its default prefix length of 0 - in this case, *every* /// term will be enumerated and cause an edit score calculation. class LPPAPI FuzzyQuery : public MultiTermQuery { public: /// Create a new FuzzyQuery that will match terms with a similarity of at least minimumSimilarity /// to term. If a prefixLength > 0 is specified, a common prefix of that length is also required. /// @param term The term to search for /// @param minimumSimilarity A value between 0 and 1 to set the required similarity between the query /// term and the matching terms. For example, for a minimumSimilarity of 0.5 a term of the same /// length as the query term is considered similar to the query term if the edit distance between /// both terms is less than length(term) * 0.5 /// @param prefixLength Length of common (non-fuzzy) prefix FuzzyQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength); FuzzyQuery(TermPtr term, double minimumSimilarity); FuzzyQuery(TermPtr term); virtual ~FuzzyQuery(); LUCENE_CLASS(FuzzyQuery); protected: double minimumSimilarity; int32_t prefixLength; bool termLongEnough; TermPtr term; public: static double defaultMinSimilarity(); static const int32_t defaultPrefixLength; public: using MultiTermQuery::toString; /// Returns the minimum similarity that is required for this query to match. /// @return float value between 0.0 and 1.0 double getMinSimilarity(); /// Returns the non-fuzzy prefix length. This is the number of characters at the start of a term that /// must be identical (not fuzzy) to the query term if the query is to match that term. int32_t getPrefixLength(); /// Returns the pattern term. TermPtr getTerm(); virtual void setRewriteMethod(RewriteMethodPtr method); virtual QueryPtr rewrite(IndexReaderPtr reader); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual String toString(const String& field); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); protected: void ConstructQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength); virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/FuzzyTermEnum.h000066400000000000000000000122361217574114600217170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FUZZYTERMENUM_H #define FUZZYTERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that are similar to the specified filter term. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater /// than all that precede it. class LPPAPI FuzzyTermEnum : public FilteredTermEnum { public: /// Constructor for enumeration of all terms from specified reader which share a prefix of length /// prefixLength with term and which have a fuzzy similarity > minSimilarity. /// /// After calling the constructor the enumeration is already pointing to the first valid term if /// such a term exists. /// @param reader Delivers terms. /// @param term Pattern term. /// @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5. /// @param prefixLength Length of required common prefix. Default value is 0. FuzzyTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity, int32_t prefixLength); FuzzyTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity); FuzzyTermEnum(IndexReaderPtr reader, TermPtr term); virtual ~FuzzyTermEnum(); LUCENE_CLASS(FuzzyTermEnum); protected: /// Allows us save time required to create a new array every time similarity is called. Collection p; Collection d; double _similarity; bool _endEnum; TermPtr searchTerm; String field; String text; String prefix; double minimumSimilarity; double scale_factor; public: virtual double difference(); virtual bool endEnum(); virtual void close(); protected: void ConstructTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity, int32_t prefixLength); /// The termCompare method in FuzzyTermEnum uses Levenshtein distance to calculate the distance between /// the given term and the comparing term. virtual bool termCompare(TermPtr term); /// /// Compute Levenshtein distance /// /// Similarity returns a number that is 1.0f or less (including negative numbers) based on how similar the /// Term is compared to a target term. It returns exactly 0.0 when ///
        /// editDistance > maximumEditDistance
        /// 
/// /// Otherwise it returns: ///
        /// 1 - (editDistance / length)
        /// 
/// where length is the length of the shortest term (text or target) including a prefix that are identical /// and editDistance is the Levenshtein distance for the two words. /// /// Embedded within this algorithm is a fail-fast Levenshtein distance algorithm. The fail-fast algorithm /// differs from the standard Levenshtein distance algorithm in that it is aborted if it is discovered that /// the minimum distance between the words is greater than some threshold. /// /// To calculate the maximum distance threshold we use the following formula: ///
        /// (1 - minimumSimilarity) * length
        /// 
/// where length is the shortest term including any prefix that is not part of the similarity comparison. /// This formula was derived by solving for what maximum value of distance returns false for the following /// statements: ///
        /// similarity = 1 - ((double)distance / (double)(prefixLength + std::min(textlen, targetlen)));
        /// return (similarity > minimumSimilarity);
        /// 
/// where distance is the Levenshtein distance for the two words. /// /// Levenshtein distance (also known as edit distance) is a measure of similarity between two strings where /// the distance is measured as the number of character deletions, insertions or substitutions required to /// transform one string to the other string. /// /// @param target The target word or phrase. /// @return the similarity, 0.0 or less indicates that it matches less than the required threshold and 1.0 /// indicates that the text and target are identical. double similarity(const String& target); /// The max Distance is the maximum Levenshtein distance for the text compared to some other value that /// results in score that is better than the minimum similarity. /// @param m The length of the "other value" /// @return The maximum Levenshtein distance that we care about int32_t calculateMaxDistance(int32_t m); }; } #endif LucenePlusPlus-rel_3.0.4/include/HashMap.h000066400000000000000000000120421217574114600204270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HASHMAP_H #define HASHMAP_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle hash maps that can be safely copied and shared template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > class HashMap : public LuceneSync { public: typedef HashMap this_type; typedef std::pair key_value; typedef boost::unordered_map< KEY, VALUE, HASH, EQUAL, LuceneAllocator > map_type; typedef typename map_type::iterator iterator; typedef typename map_type::const_iterator const_iterator; typedef KEY key_type; typedef VALUE value_type; virtual ~HashMap() { } protected: boost::shared_ptr mapContainer; public: static this_type newInstance() { this_type instance; instance.mapContainer = Lucene::newInstance(); return instance; } void reset() { mapContainer.reset(); } int32_t size() const { return (int32_t)mapContainer->size(); } bool empty() const { return mapContainer->empty(); } void clear() { mapContainer->clear(); } iterator begin() { return mapContainer->begin(); } iterator end() { return mapContainer->end(); } const_iterator begin() const { return mapContainer->begin(); } const_iterator end() const { return mapContainer->end(); } operator bool() const { return mapContainer; } bool operator! () const { return !mapContainer; } map_type& operator= (const map_type& other) { mapContainer = other.mapContainer; return *this; } void put(const KEY& key, const VALUE& value) { (*mapContainer)[key] = value; } template void putAll(ITER first, ITER last) { for (iterator current = first; current != last; ++current) (*mapContainer)[current->first] = current->second; } template void remove(ITER pos) { mapContainer->erase(pos); } template ITER remove(ITER first, ITER last) { return mapContainer->erase(first, last); } bool remove(const KEY& key) { return (mapContainer->erase(key) > 0); } iterator find(const KEY& key) { return mapContainer->find(key); } VALUE get(const KEY& key) const { iterator findValue = mapContainer->find(key); return findValue == mapContainer->end() ? VALUE() : findValue->second; } bool contains(const KEY& key) const { return (mapContainer->find(key) != mapContainer->end()); } VALUE& operator[] (const KEY& key) { return (*mapContainer)[key]; } }; /// Utility template class to handle weak keyed maps template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > class WeakHashMap : public HashMap { public: typedef WeakHashMap this_type; typedef std::pair key_value; typedef typename boost::unordered_map< KEY, VALUE, HASH, EQUAL, LuceneAllocator > map_type; typedef typename map_type::iterator iterator; static this_type newInstance() { this_type instance; instance.mapContainer = Lucene::newInstance(); return instance; } void removeWeak() { if (!this->mapContainer || this->mapContainer->empty()) return; map_type clearCopy; for (iterator key = this->mapContainer->begin(); key != this->mapContainer->end(); ++key) { if (!key->first.expired()) clearCopy.insert(*key); } this->mapContainer->swap(clearCopy); } VALUE get(const KEY& key) { iterator findValue = this->mapContainer->find(key); if (findValue != this->mapContainer->end()) return findValue->second; removeWeak(); return VALUE(); } }; } #endif LucenePlusPlus-rel_3.0.4/include/HashSet.h000066400000000000000000000062251217574114600204530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HASHSET_H #define HASHSET_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle hash set collections that can be safely copied and shared template < class TYPE, class HASH = boost::hash, class EQUAL = std::equal_to > class HashSet : public LuceneSync { public: typedef HashSet this_type; typedef boost::unordered_set< TYPE, HASH, EQUAL, LuceneAllocator > set_type; typedef typename set_type::iterator iterator; typedef typename set_type::const_iterator const_iterator; typedef TYPE value_type; virtual ~HashSet() { } protected: boost::shared_ptr setContainer; public: static this_type newInstance() { this_type instance; instance.setContainer = Lucene::newInstance(); return instance; } template static this_type newInstance(ITER first, ITER last) { this_type instance; instance.setContainer = Lucene::newInstance(first, last); return instance; } void reset() { setContainer.reset(); } int32_t size() const { return (int32_t)setContainer->size(); } bool empty() const { return setContainer->empty(); } void clear() { setContainer->clear(); } iterator begin() { return setContainer->begin(); } iterator end() { return setContainer->end(); } const_iterator begin() const { return setContainer->begin(); } const_iterator end() const { return setContainer->end(); } operator bool() const { return setContainer; } bool operator! () const { return !setContainer; } set_type& operator= (const set_type& other) { setContainer = other.setContainer; return *this; } bool add(const TYPE& type) { return setContainer->insert(type).second; } template void addAll(ITER first, ITER last) { setContainer->insert(first, last); } bool remove(const TYPE& type) { return (setContainer->erase(type) > 0); } iterator find(const TYPE& type) { return setContainer->find(type); } bool contains(const TYPE& type) const { return (setContainer->find(type) != setContainer->end()); } }; } #endif LucenePlusPlus-rel_3.0.4/include/HitQueue.h000066400000000000000000000016411217574114600206420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HITQUEUE_H #define HITQUEUE_H #include "HitQueueBase.h" namespace Lucene { class HitQueue : public HitQueueBase { public: /// Creates a new instance with size elements. HitQueue(int32_t size, bool prePopulate); virtual ~HitQueue(); LUCENE_CLASS(HitQueue); protected: bool prePopulate; protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); /// Returns null if prePopulate is false. virtual ScoreDocPtr getSentinelObject(); }; } #endif LucenePlusPlus-rel_3.0.4/include/HitQueueBase.h000066400000000000000000000034271217574114600214410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HITQUEUEBASE_H #define HITQUEUEBASE_H #include "PriorityQueue.h" namespace Lucene { class LPPAPI HitQueueBase : public LuceneObject { public: HitQueueBase(int32_t size); virtual ~HitQueueBase(); LUCENE_CLASS(HitQueueBase); public: virtual ScoreDocPtr add(ScoreDocPtr scoreDoc); virtual ScoreDocPtr addOverflow(ScoreDocPtr scoreDoc); virtual ScoreDocPtr top(); virtual ScoreDocPtr pop(); virtual ScoreDocPtr updateTop(); virtual int32_t size(); virtual bool empty(); virtual void clear(); protected: PriorityQueueScoreDocsPtr queue; int32_t queueSize; public: virtual void initialize(); protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) = 0; virtual ScoreDocPtr getSentinelObject(); friend class PriorityQueueScoreDocs; }; class LPPAPI PriorityQueueScoreDocs : public PriorityQueue { public: PriorityQueueScoreDocs(HitQueueBasePtr hitQueue, int32_t size); virtual ~PriorityQueueScoreDocs(); LUCENE_CLASS(PriorityQueueScoreDocs); protected: HitQueueBaseWeakPtr _hitQueue; protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); virtual ScoreDocPtr getSentinelObject(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ISOLatin1AccentFilter.h000066400000000000000000000027241217574114600231030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ISOLATIN1ACCENTFILTER_H #define ISOLATIN1ACCENTFILTER_H #include "TokenFilter.h" namespace Lucene { /// A filter that replaces accented characters in the ISO Latin 1 character set (ISO-8859-1) by their unaccented /// equivalent. The case will not be altered. /// /// For instance, 'à' will be replaced by 'a'. /// /// @deprecated If you build a new index, use {@link ASCIIFoldingFilter} which covers a superset of Latin 1. /// This class is included for use with existing indexes and will be removed in a future release (possibly Lucene 4.0). class LPPAPI ISOLatin1AccentFilter : public TokenFilter { public: ISOLatin1AccentFilter(TokenStreamPtr input); virtual ~ISOLatin1AccentFilter(); LUCENE_CLASS(ISOLatin1AccentFilter); protected: CharArray output; int32_t outputPos; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// To replace accented characters in a String by unaccented equivalents. void removeAccents(const wchar_t* input, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexCommit.h000066400000000000000000000064371217574114600213410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXCOMMIT_H #define INDEXCOMMIT_H #include "LuceneObject.h" namespace Lucene { /// Represents a single commit into an index as seen by the {@link IndexDeletionPolicy} or {@link IndexReader}. /// /// Changes to the content of an index are made visible only after the writer who made that change commits by /// writing a new segments file (segments_N). This point in time, when the action of writing of a new segments /// file to the directory is completed, is an index commit. /// /// Each index commit point has a unique segments file associated with it. The segments file associated with a /// later index commit point would have a larger N. class LPPAPI IndexCommit : public LuceneObject { public: virtual ~IndexCommit(); LUCENE_CLASS(IndexCommit); public: /// Get the segments file (segments_N) associated with this commit point. virtual String getSegmentsFileName() = 0; /// Returns all index files referenced by this commit point. virtual HashSet getFileNames() = 0; /// Returns the {@link Directory} for the index. virtual DirectoryPtr getDirectory() = 0; /// Delete this commit point. This only applies when using the commit point in the context of IndexWriter's /// IndexDeletionPolicy. /// /// Upon calling this, the writer is notified that this commit point should be deleted. /// /// Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect /// and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or /// {@link IndexDeletionPolicy#onCommit onCommit()} methods. virtual void deleteCommit() = 0; virtual bool isDeleted() = 0; /// Returns true if this commit is an optimized index. virtual bool isOptimized() = 0; /// Two IndexCommits are equal if both their Directory and versions are equal. virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); /// Returns the version for this IndexCommit. This is the same value that {@link IndexReader#getVersion} /// would return if it were opened on this commit. virtual int64_t getVersion() = 0; /// Returns the generation (the _N in segments_N) for this IndexCommit. virtual int64_t getGeneration() = 0; /// Convenience method that returns the last modified time of the segments_N file corresponding to this /// index commit, equivalent to getDirectory()->fileModified(getSegmentsFileName()). virtual int64_t getTimestamp(); /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. Map is /// String -> String. virtual MapStringString getUserData() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexDeletionPolicy.h000066400000000000000000000073201217574114600230240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXDELETIONPOLICY_H #define INDEXDELETIONPOLICY_H #include "LuceneObject.h" namespace Lucene { /// Policy for deletion of stale {@link IndexCommit index commits}. /// Implement this interface, and pass /// it to one of the {@link IndexWriter} or {@link IndexReader} constructors, to customize when older /// {@link IndexCommit point-in-time commits} are deleted from the index directory. The default deletion /// policy is {@link KeepOnlyLastCommitDeletionPolicy}, which always removes old commits as soon as a new /// commit is done (this matches the behavior before 2.2). /// /// One expected use case for this (and the reason why it was first created) is to work around problems /// with an index directory accessed via filesystems like NFS because NFS does not provide the "delete on /// last close" semantics that Lucene's "point in time" search normally relies on. By implementing a /// custom deletion policy, such as "a commit is only removed once it has been stale for more than X /// minutes", you can give your readers time to refresh to the new commit before {@link IndexWriter} /// removes the old commits. Note that doing so will increase the storage requirements of the index. class LPPAPI IndexDeletionPolicy : public LuceneObject { protected: IndexDeletionPolicy(); public: virtual ~IndexDeletionPolicy(); LUCENE_CLASS(IndexDeletionPolicy); public: /// This is called once when a writer is first instantiated to give the policy a chance to remove old /// commit points. /// /// The writer locates all index commits present in the index directory and calls this method. The /// policy may choose to delete some of the commit points, doing so by calling method {@link /// IndexCommit#delete delete()} of {@link IndexCommit}. /// /// Note: the last CommitPoint is the most recent one, ie. the "front index state". Be careful not to /// delete it, unless you know for sure what you are doing, and unless you can afford to lose the /// index content while doing that. /// /// @param commits List of current {@link IndexCommit point-in-time commits}, sorted by age (the 0th /// one is the oldest commit). virtual void onInit(Collection commits) = 0; /// This is called each time the writer completed a commit. This gives the policy a chance to remove /// old commit points with each commit. /// /// The policy may now choose to delete old commit points by calling method {@link /// IndexCommit#delete delete()} of {@link IndexCommit}. /// /// This method is only called when {@link IndexWriter#commit} or {@link IndexWriter#close} is called, /// or possibly not at all if the {@link IndexWriter#rollback} is called. /// /// Note: the last CommitPoint is the most recent one, ie. the "front index state". Be careful not to /// delete it, unless you know for sure what you are doing, and unless you can afford to lose the /// index content while doing that. /// /// @param commits List of {@link IndexCommit}, sorted by age (the 0th one is the oldest commit). virtual void onCommit(Collection commits) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexFileDeleter.h000066400000000000000000000211741217574114600222700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXFILEDELETER_H #define INDEXFILEDELETER_H #include "IndexCommit.h" namespace Lucene { /// This class keeps track of each SegmentInfos instance that is still "live", either because it corresponds to a /// segments_N file in the Directory (a "commit", ie. a committed SegmentInfos) or because it's an in-memory /// SegmentInfos that a writer is actively updating but has not yet committed. This class uses simple reference /// counting to map the live SegmentInfos instances to individual files in the Directory. /// /// The same directory file may be referenced by more than one IndexCommit, i.e. more than one SegmentInfos. /// Therefore we count how many commits reference each file. When all the commits referencing a certain file have /// been deleted, the refcount for that file becomes zero, and the file is deleted. /// /// A separate deletion policy interface (IndexDeletionPolicy) is consulted on creation (onInit) and once per /// commit (onCommit), to decide when a commit should be removed. /// /// It is the business of the IndexDeletionPolicy to choose when to delete commit points. The actual mechanics of /// file deletion, retrying, etc, derived from the deletion of commit points is the business of the IndexFileDeleter. /// /// The current default deletion policy is {@link KeepOnlyLastCommitDeletionPolicy}, which removes all prior commits /// when a new commit has completed. This matches the behavior before 2.2. /// /// Note that you must hold the write.lock before instantiating this class. It opens segments_N file(s) directly /// with no retry logic. class IndexFileDeleter : public LuceneObject { public: /// Initialize the deleter: find all previous commits in the Directory, incref the files they reference, call /// the policy to let it delete commits. This will remove any files not referenced by any of the commits. IndexFileDeleter(DirectoryPtr directory, IndexDeletionPolicyPtr policy, SegmentInfosPtr segmentInfos, InfoStreamPtr infoStream, DocumentsWriterPtr docWriter, HashSet synced); virtual ~IndexFileDeleter(); LUCENE_CLASS(IndexFileDeleter); protected: /// Files that we tried to delete but failed (likely because they are open and we are running on Windows), /// so we will retry them again later HashSet deletable; /// Reference count for all files in the index. Counts how many existing commits reference a file. MapStringRefCount refCounts; /// Holds all commits (segments_N) currently in the index. This will have just 1 commit if you are using the /// default delete policy (KeepOnlyLastCommitDeletionPolicy). Other policies may leave commit points live for /// longer in which case this list would be longer than 1 Collection commits; /// Holds files we had incref'd from the previous non-commit checkpoint Collection< HashSet > lastFiles; /// Commits that the IndexDeletionPolicy have decided to delete Collection commitsToDelete; InfoStreamPtr infoStream; DirectoryPtr directory; IndexDeletionPolicyPtr policy; DocumentsWriterPtr docWriter; SegmentInfosPtr lastSegmentInfos; HashSet synced; /// Change to true to see details of reference counts when infoStream != null static bool VERBOSE_REF_COUNTS; public: bool startingCommitDeleted; protected: void message(const String& message); /// Remove the CommitPoints in the commitsToDelete List by DecRef'ing all files from each SegmentInfos. void deleteCommits(); void deletePendingFiles(); RefCountPtr getRefCount(const String& fileName); public: void setInfoStream(InfoStreamPtr infoStream); SegmentInfosPtr getLastSegmentInfos(); /// Writer calls this when it has hit an error and had to roll back, to tell us that there may now be /// unreferenced files in the filesystem. So we re-list the filesystem and delete such files. If /// segmentName is non-null, we will only delete files corresponding to that segment. void refresh(const String& segmentName); void refresh(); void close(); /// For definition of "check point" see IndexWriter comments: "Clarification: Check Points (and commits)". /// Writer calls this when it has made a "consistent change" to the index, meaning new files are written to /// the index and the in-memory SegmentInfos have been modified to point to those files. /// /// This may or may not be a commit (segments_N may or may not have been written). /// /// We simply incref the files referenced by the new SegmentInfos and decref the files we had previously /// seen (if any). /// /// If this is a commit, we also call the policy to give it a chance to remove other commits. If any /// commits are removed, we decref their files as well. void checkpoint(SegmentInfosPtr segmentInfos, bool isCommit); void incRef(SegmentInfosPtr segmentInfos, bool isCommit); void incRef(HashSet files); void incRef(const String& fileName); void decRef(HashSet files); void decRef(const String& fileName); void decRef(SegmentInfosPtr segmentInfos); bool exists(const String& fileName); void deleteFiles(HashSet files); /// Deletes the specified files, but only if they are new (have not yet been incref'd). void deleteNewFiles(HashSet files); void deleteFile(const String& fileName); }; /// Tracks the reference count for a single index file class RefCount : public LuceneObject { public: RefCount(const String& fileName); virtual ~RefCount(); LUCENE_CLASS(RefCount); public: String fileName; // fileName used only for better assert error messages bool initDone; int32_t count; public: int32_t IncRef(); int32_t DecRef(); }; /// Holds details for each commit point. This class is also passed to the deletion policy. Note: this class /// has a natural ordering that is inconsistent with equals. class CommitPoint : public IndexCommit { public: CommitPoint(Collection commitsToDelete, DirectoryPtr directory, SegmentInfosPtr segmentInfos); virtual ~CommitPoint(); LUCENE_CLASS(CommitPoint); public: int64_t gen; HashSet files; String segmentsFileName; bool deleted; DirectoryPtr directory; Collection commitsToDelete; int64_t version; int64_t generation; bool _isOptimized; MapStringString userData; public: virtual String toString(); /// Returns true if this commit is an optimized index. virtual bool isOptimized(); /// Get the segments file (segments_N) associated with this commit point. virtual String getSegmentsFileName(); /// Returns all index files referenced by this commit point. virtual HashSet getFileNames(); /// Returns the {@link Directory} for the index. virtual DirectoryPtr getDirectory(); /// Returns the version for this IndexCommit. virtual int64_t getVersion(); /// Returns the generation (the _N in segments_N) for this IndexCommit. virtual int64_t getGeneration(); /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. virtual MapStringString getUserData(); /// Called only be the deletion policy, to remove this commit point from the index. virtual void deleteCommit(); virtual bool isDeleted(); virtual int32_t compareTo(LuceneObjectPtr other); }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexFileNameFilter.h000066400000000000000000000022321217574114600227240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXFILENAMEFILTER_H #define INDEXFILENAMEFILTER_H #include "LuceneObject.h" namespace Lucene { /// Filename filter that accept filenames and extensions only created by Lucene. class LPPAPI IndexFileNameFilter : public LuceneObject { public: /// Returns true if this is a file known to be a Lucene index file. static bool accept(const String& directory, const String& name); /// Returns true if this is a file that would be contained in a CFS file. /// This function should only be called on files that pass the /// {@link #accept} (ie, are already known to be a Lucene index file). static bool isCFSFile(const String& name); /// Return singleton IndexFileNameFilter static IndexFileNameFilterPtr getFilter(); }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexFileNames.h000066400000000000000000000106751217574114600217530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXFILENAMES_H #define INDEXFILENAMES_H #include "LuceneObject.h" namespace Lucene { /// Constants representing filenames and extensions used by Lucene. class IndexFileNames : public LuceneObject { public: virtual ~IndexFileNames(); LUCENE_CLASS(IndexFileNames); public: /// Name of the index segment file. static const String& SEGMENTS(); /// Name of the generation reference file name. static const String& SEGMENTS_GEN(); /// Name of the index deletable file (only used in pre-lockless indices). static const String& DELETABLE(); /// Extension of norms file. static const String& NORMS_EXTENSION(); /// Extension of freq postings file. static const String& FREQ_EXTENSION(); /// Extension of prox postings file. static const String& PROX_EXTENSION(); /// Extension of terms file. static const String& TERMS_EXTENSION(); /// Extension of terms index file. static const String& TERMS_INDEX_EXTENSION(); /// Extension of stored fields index file. static const String& FIELDS_INDEX_EXTENSION(); /// Extension of stored fields file. static const String& FIELDS_EXTENSION(); /// Extension of vectors fields file. static const String& VECTORS_FIELDS_EXTENSION(); /// Extension of vectors documents file. static const String& VECTORS_DOCUMENTS_EXTENSION(); /// Extension of vectors index file. static const String& VECTORS_INDEX_EXTENSION(); /// Extension of compound file. static const String& COMPOUND_FILE_EXTENSION(); /// Extension of compound file for doc store files. static const String& COMPOUND_FILE_STORE_EXTENSION(); /// Extension of deletes. static const String& DELETES_EXTENSION(); /// Extension of field infos. static const String& FIELD_INFOS_EXTENSION(); /// Extension of plain norms. static const String& PLAIN_NORMS_EXTENSION(); /// Extension of separate norms. static const String& SEPARATE_NORMS_EXTENSION(); /// Extension of gen file. static const String& GEN_EXTENSION(); /// This array contains all filename extensions used by Lucene's index /// files, with two exceptions, namely the extension made up from /// ".f" + number and from ".s" + number. Also note that Lucene's /// "segments_N" files do not have any filename extension. static const HashSet INDEX_EXTENSIONS(); /// File extensions that are added to a compound file (same as /// {@link #INDEX_EXTENSIONS}, minus "del", "gen", "cfs"). static const HashSet INDEX_EXTENSIONS_IN_COMPOUND_FILE(); static const HashSet STORE_INDEX_EXTENSIONS(); static const HashSet NON_STORE_INDEX_EXTENSIONS(); /// File extensions of old-style index files. static const HashSet COMPOUND_EXTENSIONS(); /// File extensions for term vector support. static const HashSet VECTOR_EXTENSIONS(); /// Computes the full file name from base, extension and generation. /// If the generation is {@link SegmentInfo#NO}, the file name is null. /// If it's {@link SegmentInfo#WITHOUT_GEN} the file name is base+extension. /// If it's > 0, the file name is base_generation+extension. static String fileNameFromGeneration(const String& base, const String& extension, int64_t gen); /// Returns true if the provided filename is one of the doc store files /// (ends with an extension in STORE_INDEX_EXTENSIONS). static bool isDocStoreFile(const String& fileName); /// Return segment file name. static String segmentFileName(const String& segmentName, const String& ext); }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexInput.h000066400000000000000000000131311217574114600211750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXINPUT_H #define INDEXINPUT_H #include "LuceneObject.h" namespace Lucene { /// Abstract base class for input from a file in a {@link Directory}. /// A random-access input stream. Used for all Lucene index input operations. /// @see Directory class LPPAPI IndexInput : public LuceneObject { public: IndexInput(); virtual ~IndexInput(); LUCENE_CLASS(IndexInput); protected: bool preUTF8Strings; // true if we are reading old (modified UTF8) string format public: /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte() = 0; /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*, int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length) = 0; /// Reads a specified number of bytes into an array at the specified offset /// with control over whether the read should be buffered (callers who have /// their own buffer should pass in "false" for useBuffer). Currently only /// {@link BufferedIndexInput} respects this parameter. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @param useBuffer set to false if the caller will handle buffering. /// @see IndexOutput#writeBytes(const uint8_t*,int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer); /// Reads four bytes and returns an int. /// @see IndexOutput#writeInt(int32_t) virtual int32_t readInt(); /// Reads an int stored in variable-length format. Reads between one and five /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. /// @see IndexOutput#writeVInt(int32_t) virtual int32_t readVInt(); /// Reads eight bytes and returns a int64. /// @see IndexOutput#writeLong(int64_t) virtual int64_t readLong(); /// Reads a int64 stored in variable-length format. Reads between one and nine /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. virtual int64_t readVLong(); /// Call this if readString should read characters stored in the old modified /// UTF8 format. This is used for indices written pre-2.4. virtual void setModifiedUTF8StringsMode(); /// Reads a string. /// @see IndexOutput#writeString(const String&) virtual String readString(); /// Reads a modified UTF8 format string. virtual String readModifiedUTF8String(); /// Reads Lucene's old "modified UTF-8" encoded characters into an array. /// @param buffer the array to read characters into. /// @param start the offset in the array to start storing characters. /// @param length the number of characters to read. /// @see IndexOutput#writeChars(const String& s, int32_t, int32_t) virtual int32_t readChars(wchar_t* buffer, int32_t start, int32_t length); /// Similar to {@link #readChars(wchar_t*, int32_t, int32_t)} but does not /// do any conversion operations on the bytes it is reading in. It still /// has to invoke {@link #readByte()} just as {@link #readChars(wchar_t*, int32_t, int32_t)} /// does, but it does not need a buffer to store anything and it does not have /// to do any of the bitwise operations, since we don't actually care what is /// in the byte except to determine how many more bytes to read. /// @param length The number of chars to read. /// @deprecated this method operates on old "modified utf8" encoded strings. virtual void skipChars(int32_t length); /// Closes the stream to further operations. virtual void close() = 0; /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer() = 0; /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() virtual void seek(int64_t pos) = 0; /// The number of bytes in the file. virtual int64_t length() = 0; /// Returns a clone of this stream. /// /// Clones of a stream access the same data, and are positioned at the same /// point as the stream they were cloned from. /// /// Subclasses must ensure that clones may be positioned at different points /// in the input from each other and from the stream they were cloned from. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Read string map as a series of key/value pairs. virtual MapStringString readStringStringMap(); }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexOutput.h000066400000000000000000000104301217574114600213750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXOUTPUT_H #define INDEXOUTPUT_H #include "LuceneObject.h" namespace Lucene { /// Abstract base class for output to a file in a Directory. A random-access output stream. Used for all /// Lucene index output operations. /// @see Directory /// @see IndexInput class LPPAPI IndexOutput : public LuceneObject { public: virtual ~IndexOutput(); LUCENE_CLASS(IndexOutput); protected: static const int32_t COPY_BUFFER_SIZE; ByteArray copyBuffer; public: /// Writes a single byte. /// @see IndexInput#readByte() virtual void writeByte(uint8_t b) = 0; /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length) = 0; /// Forces any buffered output to be written. virtual void flush() = 0; /// Closes this stream to further operations. virtual void close() = 0; /// Returns the current position in this file, where the next write will occur. virtual int64_t getFilePointer() = 0; /// Sets current position in this file, where the next write will occur. /// @see #getFilePointer() virtual void seek(int64_t pos) = 0; /// The number of bytes in the file. virtual int64_t length() = 0; public: /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) void writeBytes(const uint8_t* b, int32_t length); /// Writes an int as four bytes. /// @see IndexInput#readInt() void writeInt(int32_t i); /// Writes an int in a variable-length format. Writes between one and five bytes. Smaller values take fewer bytes. /// Negative numbers are not supported. /// @see IndexInput#readVInt() void writeVInt(int32_t i); /// Writes a int64 as eight bytes. /// @see IndexInput#readLong() void writeLong(int64_t i); /// Writes an int64 in a variable-length format. Writes between one and five bytes. Smaller values take fewer bytes. /// Negative numbers are not supported. /// @see IndexInput#readVLong() void writeVLong(int64_t i); /// Writes a string. /// @see IndexInput#readString() void writeString(const String& s); /// Writes a sub sequence of characters from s as the old format (modified UTF-8 encoded bytes). /// @param s the source of the characters. /// @param start the first character in the sequence. /// @param length the number of characters in the sequence. /// @deprecated -- please use {@link #writeString} void writeChars(const String& s, int32_t start, int32_t length); /// Copy numBytes bytes from input to ourself. void copyBytes(IndexInputPtr input, int64_t numBytes); /// Set the file length. By default, this method does nothing (it's optional for a Directory to implement it). /// But, certain Directory implementations (for example @see FSDirectory) can use this to inform the underlying IO /// system to pre-allocate the file to the specified size. If the length is longer than the current file length, /// the bytes added to the file are undefined. Otherwise the file is truncated. /// @param length file length. void setLength(int64_t length); /// Write string map as a series of key/value pairs. /// @param map map of string-string key-values. void writeStringStringMap(MapStringString map); }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexReader.h000066400000000000000000001044671217574114600213150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXREADER_H #define INDEXREADER_H #include "SegmentInfos.h" namespace Lucene { /// IndexReader is an abstract class, providing an interface for accessing an index. Search of an index is done /// entirely through this abstract interface, so that any subclass which implements it is searchable. /// /// Concrete subclasses of IndexReader are usually constructed with a call to one of the static open methods, /// eg. {@link #open(DirectoryPtr, bool)}. /// /// For efficiency, in this API documents are often referred to via document numbers, non-negative integers which /// each name a unique document in the index. These document numbers are ephemeral -they may change as documents /// are added to and deleted from an index. Clients should thus not rely on a given document having the same number /// between sessions. /// /// An IndexReader can be opened on a directory for which an IndexWriter is opened already, but it cannot be used /// to delete documents from the index then. /// /// NOTE: for backwards API compatibility, several methods are not listed as abstract, but have no useful implementations /// in this base class and instead always throw UnsupportedOperation exception. Subclasses are strongly encouraged to /// override these methods, but in many cases may not need to. /// /// NOTE: as of 2.4, it's possible to open a read-only IndexReader using the static open methods that accept the bool /// readOnly parameter. Such a reader has better concurrency as it's not necessary to synchronize on the isDeleted /// method. You must specify false if you want to make changes with the resulting IndexReader. /// /// NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, /// concurrently. If your application requires external synchronization, you should not synchronize on the IndexReader /// instance; use your own (non-Lucene) objects instead. class LPPAPI IndexReader : public LuceneObject { public: IndexReader(); virtual ~IndexReader(); LUCENE_CLASS(IndexReader); public: /// Constants describing field properties, for example used for {@link IndexReader#getFieldNames(FieldOption)}. enum FieldOption { /// All fields FIELD_OPTION_ALL, /// All indexed fields FIELD_OPTION_INDEXED, /// All fields that store payloads FIELD_OPTION_STORES_PAYLOADS, /// All fields that omit tf FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS, /// All fields which are not indexed FIELD_OPTION_UNINDEXED, /// All fields which are indexed with termvectors enabled FIELD_OPTION_INDEXED_WITH_TERMVECTOR, /// All fields which are indexed but don't have termvectors enabled FIELD_OPTION_INDEXED_NO_TERMVECTOR, /// All fields with termvectors enabled. Please note that only standard termvector fields are returned FIELD_OPTION_TERMVECTOR, /// All fields with termvectors with position values enabled FIELD_OPTION_TERMVECTOR_WITH_POSITION, /// All fields with termvectors with offset values enabled FIELD_OPTION_TERMVECTOR_WITH_OFFSET, /// All fields with termvectors with offset values and position values enabled FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET }; static const int32_t DEFAULT_TERMS_INDEX_DIVISOR; protected: bool closed; bool _hasChanges; int32_t refCount; public: /// Returns the current refCount for this reader int32_t getRefCount(); /// Increments the refCount of this IndexReader instance. RefCounts are used to determine when a reader can be /// closed safely, i.e. as soon as there are no more references. Be sure to always call a corresponding {@link /// #decRef}, in a finally clause; otherwise the reader may never be closed. Note that {@link #close} simply /// calls decRef(), which means that the IndexReader will not really be closed until {@link #decRef} has been /// called for all outstanding references. /// @see #decRef void incRef(); /// Decreases the refCount of this IndexReader instance. If the refCount drops to 0, then pending changes /// (if any) are committed to the index and this reader is closed. /// @see #incRef void decRef(); /// Returns a IndexReader reading the index in the given Directory, with readOnly = true. /// @param directory the index directory static IndexReaderPtr open(DirectoryPtr directory); /// Returns an IndexReader reading the index in the given Directory. You should pass readOnly = true, since it /// gives much better concurrent performance, unless you intend to do write operations (delete documents or change /// norms) with the reader. /// @param directory the index directory /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader static IndexReaderPtr open(DirectoryPtr directory, bool readOnly); /// Returns an IndexReader reading the index in the given {@link IndexCommit}. You should pass readOnly = true, /// since it gives much better concurrent performance, unless you intend to do write operations (delete documents /// or change norms) with the reader. /// @param commit the commit point to open /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader static IndexReaderPtr open(IndexCommitPtr commit, bool readOnly); /// Returns an IndexReader reading the index in the given Directory, with a custom {@link IndexDeletionPolicy}. /// You should pass readOnly=true, since it gives much better concurrent performance, unless you intend to do write /// operations (delete documents or change norms) with the reader. /// @param directory the index directory /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform /// deletes or to set norms); see {@link IndexWriter} for details. /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader static IndexReaderPtr open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, bool readOnly); /// Returns an IndexReader reading the index in the given Directory, with a custom {@link IndexDeletionPolicy}. /// You should pass readOnly=true, since it gives much better concurrent performance, unless you intend to do write /// operations (delete documents or change norms) with the reader. /// @param directory the index directory /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform /// deletes or to set norms); see {@link IndexWriter} for details. /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the /// same effect as {@link IndexWriter#setTermIndexInterval} except that setting must be done at /// indexing time while this setting can be set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into memory. By setting this to a value > 1 /// you can reduce memory usage, at the expense of higher latency when loading a TermInfo. The /// default value is 1. Set this to -1 to skip loading the terms index entirely. static IndexReaderPtr open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); /// Returns an IndexReader reading the index in the given Directory, using a specific commit and with a custom /// {@link IndexDeletionPolicy}. You should pass readOnly=true, since it gives much better concurrent performance, /// unless you intend to do write operations (delete documents or change norms) with the reader. /// @param commit the specific {@link IndexCommit} to open; see {@link IndexReader#listCommits} to list all /// commits in a directory /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform /// deletes or to set norms); see {@link IndexWriter} for details. /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader static IndexReaderPtr open(IndexCommitPtr commit, IndexDeletionPolicyPtr deletionPolicy, bool readOnly); /// Returns an IndexReader reading the index in the given Directory, using a specific commit and with a custom {@link /// IndexDeletionPolicy}. You should pass readOnly=true, since it gives much better concurrent performance, unless /// you intend to do write operations (delete documents or change norms) with the reader. /// @param commit the specific {@link IndexCommit} to open; see {@link IndexReader#listCommits} to /// list all commits in a directory /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform deletes /// or to set norms); see {@link IndexWriter} for details. /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the same effect as /// {@link IndexWriter#setTermIndexInterval} except that setting must be done at indexing time while this setting can /// be set per reader. When set to N, then one in every N * termIndexInterval terms in the index is loaded into /// memory. By setting this to a value > 1 you can reduce memory usage, at the expense of higher latency when loading /// a TermInfo. The default value is 1. Set this to -1 to skip loading the terms index entirely. static IndexReaderPtr open(IndexCommitPtr commit, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); /// Refreshes an IndexReader if the index has changed since this instance was (re)opened. /// /// Opening an IndexReader is an expensive operation. This method can be used to refresh an existing IndexReader to /// reduce these costs. This method tries to only load segments that have changed or were created after the /// IndexReader was (re)opened. /// /// If the index has not changed since this instance was (re)opened, then this call is a NOOP and returns this /// instance. Otherwise, a new instance is returned. The old instance is not closed and remains usable. /// /// If the reader is reopened, even though they share resources internally, it's safe to make changes (deletions, /// norms) with the new reader. All shared mutable state obeys "copy on write" semantics to ensure the changes are /// not seen by other readers. /// /// You can determine whether a reader was actually reopened by comparing the old instance with the /// instance returned by this method: /// ///
        /// IndexReaderPtr reader = ... 
        /// ...
        /// IndexReaderPtr newReader = r.reopen();
        /// if (newReader != reader)
        /// {
        ///     ... // reader was reopened
        ///     reader->close();
        /// }
        /// reader = newReader;
        /// ...
        /// 
/// /// Be sure to synchronize that code so that other threads, if present, can never use reader after it has been /// closed and before it's switched to newReader. If this reader is a near real-time reader (obtained from /// {@link IndexWriter#getReader()}, reopen() will simply call writer.getReader() again for you, though this /// may change in the future. virtual IndexReaderPtr reopen(); /// Just like {@link #reopen()}, except you can change the readOnly of the original reader. If the index is /// unchanged but readOnly is different then a new reader will be returned. virtual IndexReaderPtr reopen(bool openReadOnly); /// Reopen this reader on a specific commit point. This always returns a readOnly reader. If the specified commit /// point matches what this reader is already on, and this reader is already readOnly, then this same instance is /// returned; if it is not already readOnly, a readOnly clone is returned. virtual IndexReaderPtr reopen(IndexCommitPtr commit); /// Efficiently clones the IndexReader (sharing most internal state). /// /// On cloning a reader with pending changes (deletions, norms), the original reader transfers its write lock to the /// cloned reader. This means only the cloned reader may make further changes to the index, and commit the changes /// to the index on close, but the old reader still reflects all changes made up until it was cloned. /// /// Like {@link #reopen()}, it's safe to make changes to either the original or the cloned reader: all shared mutable /// state obeys "copy on write" semantics to ensure the changes are not seen by other readers. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Clones the IndexReader and optionally changes readOnly. A readOnly reader cannot open a writable reader. virtual LuceneObjectPtr clone(bool openReadOnly, LuceneObjectPtr other = LuceneObjectPtr()); /// Returns the directory associated with this index. The default implementation returns the directory specified by /// subclasses when delegating to the IndexReader(Directory) constructor, or throws an UnsupportedOperation exception /// if one was not specified. virtual DirectoryPtr directory(); /// Returns the time the index in the named directory was last modified. Do not use this to check /// whether the reader is still up-to-date, use {@link #isCurrent()} instead. static int64_t lastModified(DirectoryPtr directory2); /// Reads version number from segments files. The version number is initialized with a timestamp /// and then increased by one for each change of the index. /// @param directory where the index resides. /// @return version number. static int64_t getCurrentVersion(DirectoryPtr directory); /// Reads commitUserData, previously passed to {@link IndexWriter#commit(MapStringString)}, from /// current index segments file. This will return null if {@link IndexWriter#commit(MapStringString)} /// has never been called for this index. static MapStringString getCommitUserData(DirectoryPtr directory); /// Version number when this IndexReader was opened. Not implemented in the IndexReader base class. /// /// If this reader is based on a Directory (ie, was created by calling {@link #open}, or {@link /// #reopen} on a reader based on a Directory), then this method returns the version recorded in the /// commit that the reader opened. This version is advanced every time {@link IndexWriter#commit} /// is called. /// /// If instead this reader is a near real-time reader (ie, obtained by a call to {@link /// IndexWriter#getReader}, or by calling {@link #reopen} on a near real-time reader), then this /// method returns the version of the last commit done by the writer. Note that even as further /// changes are made with the writer, the version will not changed until a commit is completed. /// Thus, you should not rely on this method to determine when a near real-time reader should be /// opened. Use {@link #isCurrent} instead. virtual int64_t getVersion(); /// Retrieve the String userData optionally passed to IndexWriter#commit. This will return null if /// {@link IndexWriter#commit(MapStringString)} has never been called for this index. virtual MapStringString getCommitUserData(); /// Check whether any new changes have occurred to the index since this reader was opened. /// /// If this reader is based on a Directory (ie, was created by calling {@link #open}, or {@link /// #reopen} on a reader based on a Directory), then this method checks if any further commits (see /// {@link IndexWriter#commit} have occurred in that directory). /// /// If instead this reader is a near real-time reader (ie, obtained by a call to {@link /// IndexWriter#getReader}, or by calling {@link #reopen} on a near real-time reader), then this /// method checks if either a new commit has occurred, or any new uncommitted changes have taken /// place via the writer. Note that even if the writer has only performed merging, this method /// will still return false. /// /// In any event, if this returns false, you should call {@link #reopen} to get a new reader that /// sees the changes. virtual bool isCurrent(); /// Checks is the index is optimized (if it has a single segment and no deletions). Not implemented /// in the IndexReader base class. /// @return true if the index is optimized; false otherwise virtual bool isOptimized(); /// Return an array of term frequency vectors for the specified document. The array contains a /// vector for each vectorized field in the document. Each vector contains terms and frequencies /// for all terms in a given vectorized field. If no such fields existed, the method returns null. /// The term vectors that are returned may either be of type {@link TermFreqVector} or of type /// {@link TermPositionVector} if positions or offsets have been stored. /// /// @param docNumber document for which term frequency vectors are returned /// @return array of term frequency vectors. May be null if no term vectors have been stored for the /// specified document. virtual Collection getTermFreqVectors(int32_t docNumber) = 0; /// Return a term frequency vector for the specified document and field. The returned vector contains /// terms and frequencies for the terms in the specified field of this document, if the field had the /// storeTermVector flag set. If termvectors had been stored with positions or offsets, a /// {@link TermPositionVector} is returned. /// /// @param docNumber document for which the term frequency vector is returned. /// @param field field for which the term frequency vector is returned. /// @return term frequency vector May be null if field does not exist in the specified document or /// term vector was not stored. virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field) = 0; /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays /// of the {@link TermFreqVector}. /// @param docNumber The number of the document to load the vector for /// @param field The name of the field to load /// @param mapper The {@link TermVectorMapper} to process the vector. Must not be null. virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) = 0; /// Map all the term vectors for all fields in a Document /// @param docNumber The number of the document to load the vector for /// @param mapper The {@link TermVectorMapper} to process the vector. Must not be null. virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) = 0; /// Returns true if an index exists at the specified directory. If the directory does not exist or /// if there is no index in it. /// @param directory the directory to check for an index /// @return true if an index exists; false otherwise static bool indexExists(DirectoryPtr directory); /// Returns the number of documents in this index. virtual int32_t numDocs() = 0; /// Returns one greater than the largest possible document number. This may be used to, eg., determine /// how big to allocate an array which will have an element for every document number in an index. virtual int32_t maxDoc() = 0; /// Returns the number of deleted documents. int32_t numDeletedDocs(); /// Returns the stored fields of the n'th Document in this index. /// /// NOTE: for performance reasons, this method does not check if the requested document is deleted, and /// therefore asking for a deleted document may yield unspecified results. Usually this is not required, /// however you can call {@link #isDeleted(int)} with the requested document ID to verify the document /// is not deleted. virtual DocumentPtr document(int32_t n); /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine /// what {@link Field}s to load and how they should be loaded. /// NOTE: If this Reader (more specifically, the underlying FieldsReader) is closed before the lazy /// {@link Field} is loaded an exception may be thrown. If you want the value of a lazy {@link Field} /// to be available after closing you must explicitly load it or fetch the Document again with a new /// loader. /// /// NOTE: for performance reasons, this method does not check if the requested document is deleted, /// and therefore asking for a deleted document may yield unspecified results. Usually this is not /// required, however you can call {@link #isDeleted(int32_t)} with the requested document ID to verify /// the document is not deleted. /// /// @param n Get the document at the n'th position /// @param fieldSelector The {@link FieldSelector} to use to determine what Fields should be loaded on /// the Document. May be null, in which case all Fields will be loaded. /// @return The stored fields of the {@link Document} at the n'th position /// @see Fieldable /// @see FieldSelector /// @see SetBasedFieldSelector /// @see LoadFirstFieldSelector virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector) = 0; /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n) = 0; /// Returns true if any documents have been deleted virtual bool hasDeletions() = 0; /// Used for testing virtual bool hasChanges(); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. This is used /// by the search code to score documents. /// @see Field#setBoost(double) virtual ByteArray norms(const String& field) = 0; /// Reads the byte-encoded normalization factor for the named field of every document. This is used /// by the search code to score documents. /// @see Field#setBoost(double) virtual void norms(const String& field, ByteArray norms, int32_t offset) = 0; /// Resets the normalization factor for the named field of the named document. The norm represents /// the product of the field's {@link Fieldable#setBoost(double) boost} and its {@link /// Similarity#lengthNorm(String, int) length normalization}. Thus, to preserve the length normalization /// values when resetting this, one should base the new value upon the old. /// /// NOTE: If this field does not store norms, then this method call will silently do nothing. /// /// @see #norms(String) /// @see Similarity#decodeNorm(byte) virtual void setNorm(int32_t doc, const String& field, uint8_t value); /// Resets the normalization factor for the named field of the named document. /// /// @see #norms(String) /// @see Similarity#decodeNorm(byte) virtual void setNorm(int32_t doc, const String& field, double value); /// Returns an enumeration of all the terms in the index. The enumeration is ordered by /// Term::compareTo(). Each term is greater than all that precede it in the enumeration. /// Note that after calling terms(), {@link TermEnum#next()} must be called on the resulting /// enumeration before calling other methods such as {@link TermEnum#term()}. virtual TermEnumPtr terms() = 0; /// Returns an enumeration of all terms starting at a given term. If the given term does not /// exist, the enumeration is positioned at the first term greater than the supplied term. /// The enumeration is ordered by Term::compareTo(). Each term is greater than all that precede /// it in the enumeration. virtual TermEnumPtr terms(TermPtr t) = 0; /// Returns the number of documents containing the term t. virtual int32_t docFreq(TermPtr t) = 0; /// Returns an enumeration of all the documents which contain term. For each document, the /// document number, the frequency of the term in that document is also provided, for use in /// search scoring. If term is null, then all non-deleted docs are returned with freq=1. /// The enumeration is ordered by document number. Each document number is greater than all /// that precede it in the enumeration. virtual TermDocsPtr termDocs(TermPtr term); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs() = 0; /// Returns an enumeration of all the documents which contain term. For each document, in /// addition to the document number and frequency of the term in that document, a list of all /// of the ordinal positions of the term in the document is available. Thus, this method /// positions of the term in the document is available. /// This positional information facilitates phrase and proximity searching. /// The enumeration is ordered by document number. Each document number is greater than all /// that precede it in the enumeration. virtual TermPositionsPtr termPositions(TermPtr term); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions() = 0; /// Deletes the document numbered docNum. Once a document is deleted it will not appear in /// TermDocs or TermPostitions enumerations. Attempts to read its field with the {@link /// #document} method will result in an error. The presence of this document may still be /// reflected in the {@link #docFreq} statistic, though this will be corrected eventually as /// the index is further modified. virtual void deleteDocument(int32_t docNum); /// Deletes all documents that have a given term indexed. This is useful if one uses a /// document field to hold a unique ID string for the document. Then to delete such a /// document, one merely constructs a term with the appropriate field and the unique ID string /// as its text and passes it to this method. See {@link #deleteDocument(int)} for information /// about when this deletion will become effective. /// @return the number of documents deleted virtual int32_t deleteDocuments(TermPtr term); /// Undeletes all documents currently marked as deleted in this index. virtual void undeleteAll(); void flush(); /// @param commitUserData Opaque Map (String -> String) that's recorded into the segments file /// in the index, and retrievable by {@link IndexReader#getCommitUserData}. void flush(MapStringString commitUserData); /// Commit changes resulting from delete, undeleteAll, or setNorm operations. /// If an exception is hit, then either no changes or all changes will have been committed to /// the index (transactional semantics). void commit(MapStringString commitUserData); /// Closes files associated with this index. Also saves any new deletions to disk. /// No other methods should be called after this has been called. void close(); /// Get a list of unique field names that exist in this index and have the specified field option information. /// @param fieldOption specifies which field option should be available for the returned fields /// @return Collection of Strings indicating the names of the fields. virtual HashSet getFieldNames(FieldOption fieldOption) = 0; /// Return the IndexCommit that this reader has opened. This method is only implemented by those /// readers that correspond to a Directory with its own segments_N file. virtual IndexCommitPtr getIndexCommit(); /// Prints the filename and size of each file within a given compound file. Add the -extract flag /// to extract files to the current working directory. In order to make the extracted version of /// the index work, you have to copy the segments file from the compound index into the directory /// where the extracted files are stored. /// @param args Usage: IndexReader [-extract] static void main(Collection args); /// Returns all commit points that exist in the Directory. Normally, because the default is {@link /// KeepOnlyLastCommitDeletionPolicy}, there would be only one commit point. But if you're using a /// custom {@link IndexDeletionPolicy} then there could be many commits. Once you have a given /// commit, you can open a reader on it by calling {@link IndexReader#open(IndexCommit,bool)}. /// There must be at least one commit in the Directory, else this method throws an exception. /// Note that if a commit is in progress while this method is running, that commit may or may not /// be returned array. static Collection listCommits(DirectoryPtr dir); /// Returns the sequential sub readers that this reader is logically composed of. For example, /// IndexSearcher uses this API to drive searching by one sub reader at a time. If this reader is /// not composed of sequential child readers, it should return null. If this method returns an empty /// array, that means this reader is a null reader (for example a MultiReader that has no sub readers). /// /// NOTE: You should not try using sub-readers returned by this method to make any changes (setNorm, /// deleteDocument, etc.). While this might succeed for one composite reader (like MultiReader), it /// will most likely lead to index corruption for other readers (like DirectoryReader obtained /// through {@link #open}. Use the parent reader directly. virtual Collection getSequentialSubReaders(); virtual LuceneObjectPtr getFieldCacheKey(); /// This returns null if the reader has no deletions. virtual LuceneObjectPtr getDeletesCacheKey(); /// Returns the number of unique terms (across all fields) in this reader. /// /// This method returns int64_t, even though internally Lucene cannot handle more than 2^31 unique /// terms, for a possible future when this limitation is removed. virtual int64_t getUniqueTermCount(); /// For IndexReader implementations that use TermInfosReader to read terms, this returns the current /// indexDivisor as specified when the reader was opened. virtual int32_t getTermInfosIndexDivisor(); protected: void ensureOpen(); static IndexReaderPtr open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, IndexCommitPtr commit, bool readOnly, int32_t termInfosIndexDivisor); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value) = 0; /// Implements deletion of the document numbered docNum. /// Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. virtual void doDelete(int32_t docNum) = 0; /// Implements actual undeleteAll() in subclass. virtual void doUndeleteAll() = 0; /// Does nothing by default. Subclasses that require a write lock for index modifications must /// implement this method. virtual void acquireWriteLock(); /// Commit changes resulting from delete, undeleteAll, or setNorm operations. /// If an exception is hit, then either no changes or all changes will have been committed to /// the index (transactional semantics). void commit(); /// Implements commit. virtual void doCommit(MapStringString commitUserData) = 0; /// Implements close. virtual void doClose() = 0; friend class DirectoryReader; friend class ParallelReader; }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexSearcher.h000066400000000000000000000116071217574114600216400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXSEARCHER_H #define INDEXSEARCHER_H #include "Searcher.h" namespace Lucene { /// Implements search over a single IndexReader. /// /// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or {@link /// #search(QueryPtr, FilterPtr, int32_t)} methods. For performance reasons it is recommended to open only /// one IndexSearcher and use it for all of your searches. /// /// NOTE: {@link IndexSearcher} instances are completely thread safe, meaning multiple threads can call any /// of its methods, concurrently. If your application requires external synchronization, you should not /// synchronize on the IndexSearcher instance; use your own (non-Lucene) objects instead. class LPPAPI IndexSearcher : public Searcher { public: /// Creates a searcher searching the index in the named directory. You should pass readOnly = true, /// since it gives much better concurrent performance, unless you intend to do write operations (delete /// documents or change norms) with the underlying IndexReader. /// @param path Directory where IndexReader will be opened /// @param readOnly If true, the underlying IndexReader will be opened readOnly IndexSearcher(DirectoryPtr path, bool readOnly = true); /// Creates a searcher searching the provided index. IndexSearcher(IndexReaderPtr reader); /// Directly specify the reader, subReaders and their docID starts. IndexSearcher(IndexReaderPtr reader, Collection subReaders, Collection docStarts); virtual ~IndexSearcher(); LUCENE_CLASS(IndexSearcher); public: IndexReaderPtr reader; protected: bool closeReader; Collection subReaders; Collection docStarts; bool fieldSortDoTrackScores; bool fieldSortDoMaxScore; public: /// Return the {@link IndexReader} this searches. IndexReaderPtr getIndexReader(); /// Note that the underlying IndexReader is not closed, if IndexSearcher was constructed with /// IndexSearcher(IndexReaderPtr reader). If the IndexReader was supplied implicitly by specifying a /// directory, then the IndexReader gets closed. virtual void close(); virtual int32_t docFreq(TermPtr term); virtual DocumentPtr doc(int32_t n); virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector); virtual int32_t maxDoc(); using Searcher::search; using Searcher::explain; virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n); virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort); /// Just like {@link #search(WeightPtr, FilterPtr, int32_t, SortPtr)}, but you choose whether or not the /// fields in the returned {@link FieldDoc} instances should be set by specifying fillFields. /// /// NOTE: this does not compute scores by default. If you need scores, create a {@link TopFieldCollector} /// instance by calling {@link TopFieldCollector#create} and then pass that to {@link #search(WeightPtr, /// FilterPtr, CollectorPtr)}. virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort, bool fillFields); virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr results); virtual QueryPtr rewrite(QueryPtr query); virtual ExplanationPtr explain(WeightPtr weight, int32_t doc); /// By default, no scores are computed when sorting by field (using {@link #search(QueryPtr, FilterPtr, /// int32_t, SortPtr)}). You can change that, per IndexSearcher instance, by calling this method. Note /// that this will incur a CPU cost. /// /// @param doTrackScores If true, then scores are returned for every matching document in {@link TopFieldDocs}. /// @param doMaxScore If true, then the max score for all matching docs is computed. virtual void setDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore); protected: void ConstructSearcher(IndexReaderPtr reader, bool closeReader); void gatherSubReaders(Collection allSubReaders, IndexReaderPtr reader); void searchWithFilter(IndexReaderPtr reader, WeightPtr weight, FilterPtr filter, CollectorPtr collector); }; } #endif LucenePlusPlus-rel_3.0.4/include/IndexWriter.h000066400000000000000000001762661217574114600213750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXWRITER_H #define INDEXWRITER_H #include "MergePolicy.h" namespace Lucene { /// An IndexWriter creates and maintains an index. /// /// The create argument to the {@link #IndexWriter(DirectoryPtr, AnalyzerPtr, bool, int32_t) constructor} /// determines whether a new index is created, or whether an existing index is opened. Note that you can /// open an index with create=true even while readers are using the index. The old readers will continue /// to search the "point in time" snapshot they had opened, and won't see the newly created index until /// they re-open. There are also {@link #IndexWriter(DirectoryPtr, AnalyzerPtr, int32_t) constructors} /// with no create argument which will create a new index if there is not already an index at the provided /// path and otherwise open the existing index. /// /// In either case, documents are added with {@link #addDocument(DocumentPtr) addDocument} and removed /// with {@link #deleteDocuments(TermPtr)} or {@link #deleteDocuments(QueryPtr)}. A document can be updated /// with {@link #updateDocument(TermPtr, DocumentPtr) updateDocument} (which just deletes and then adds /// the entire document). When finished adding, deleting and updating documents, {@link #close() close} /// should be called. /// /// These changes are buffered in memory and periodically flushed to the {@link Directory} (during the /// above method calls). A flush is triggered when there are enough buffered deletes (see /// {@link #setMaxBufferedDeleteTerms}) or enough added documents since the last flush, whichever is /// sooner. For the added documents, flushing is triggered either by RAM usage of the documents (see /// {@link #setRAMBufferSizeMB}) or the number of added documents. The default is to flush when RAM usage /// hits 16 MB. For best indexing speed you should flush by RAM usage with a large RAM buffer. Note that /// flushing just moves the internal buffered state in IndexWriter into the index, but these changes are /// not visible to IndexReader until either {@link #commit()} or {@link #close} is called. A flush may /// also trigger one or more segment merges which by default run with a background thread so as not to /// block the addDocument calls (see mergePolicy below for changing the {@link MergeScheduler}). /// /// If an index will not have more documents added for a while and optimal search performance is desired, /// then either the full {@link #optimize() optimize} method or partial {@link #optimize(int32_t)} method /// should be called before the index is closed. /// /// Opening an IndexWriter creates a lock file for the directory in use. Trying to open another IndexWriter /// on the same directory will lead to a LockObtainFailed exception. The LockObtainFailed exception is also /// thrown if an IndexReader on the same directory is used to delete documents from the index. /// /// IndexWriter allows an optional {@link IndexDeletionPolicy} implementation to be specified. You can use /// this to control when prior commits are deleted from the index. The default policy is {@link /// KeepOnlyLastCommitDeletionPolicy} which removes all prior commits as soon as a new commit is done (this /// matches behavior before 2.2). Creating your own policy can allow you to explicitly keep previous /// "point in time" commits alive in the index for some time, to allow readers to refresh to the new commit /// without having the old commit deleted out from under them. This is necessary on file systems like NFS /// that do not support "delete on last close" semantics, which Lucene's "point in time" search normally /// relies on. /// /// IndexWriter allows you to separately change the {@link MergePolicy} and the {@link MergeScheduler}. /// The {@link MergePolicy} is invoked whenever there are changes to the segments in the index. Its role /// is to select which merges to do, if any, and return a {@link MergePolicy.MergeSpecification} describing /// the merges. It also selects merges to do for optimize(). (The default is {@link LogByteSizeMergePolicy}. /// Then, the {@link MergeScheduler} is invoked with the requested merges and it decides when and how to run /// the merges. The default is {@link ConcurrentMergeScheduler}. /// /// NOTE: if you hit an std::bad_alloc then IndexWriter will quietly record this fact and block all future /// segment commits. This is a defensive measure in case any internal state (buffered documents and /// deletions) were corrupted. Any subsequent calls to {@link #commit()} will throw an IllegalState /// exception. The only course of action is to call {@link #close()}, which internally will call {@link /// #rollback()}, to undo any changes to the index since the last commit. You can also just call {@link /// #rollback()} directly. /// /// NOTE: {@link IndexWriter} instances are completely thread safe, meaning multiple threads can call any of /// its methods, concurrently. If your application requires external synchronization, you should not /// synchronize on the IndexWriter instance as this may cause deadlock; use your own (non-Lucene) objects /// instead. /// /// Clarification: Check Points (and commits) /// IndexWriter writes new index files to the directory without writing a new segments_N file which /// references these new files. It also means that the state of the in memory SegmentInfos object is different /// than the most recent segments_N file written to the directory. /// /// Each time the SegmentInfos is changed, and matches the (possibly modified) directory files, we have a new /// "check point". If the modified/new SegmentInfos is written to disk - as a new (generation of) segments_N /// file - this check point is also an IndexCommit. /// /// A new checkpoint always replaces the previous checkpoint and becomes the new "front" of the index. This /// allows the IndexFileDeleter to delete files that are referenced only by stale checkpoints (files that were /// created since the last commit, but are no longer referenced by the "front" of the index). For this, /// IndexFileDeleter keeps track of the last non commit checkpoint. class LPPAPI IndexWriter : public LuceneObject { protected: IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl, IndexingChainPtr indexingChain, IndexCommitPtr commit); public: IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl); IndexWriter(DirectoryPtr d, AnalyzerPtr a, int32_t mfl); IndexWriter(DirectoryPtr d, AnalyzerPtr a, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl); IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl); IndexWriter(DirectoryPtr d, AnalyzerPtr a, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl, IndexCommitPtr commit); virtual ~IndexWriter(); LUCENE_CLASS(IndexWriter); protected: int64_t writeLockTimeout; /// The normal read buffer size defaults to 1024, but increasing this during merging seems to /// yield performance gains. However we don't want to increase it too much because there are /// quite a few BufferedIndexInputs created during merging. static const int32_t MERGE_READ_BUFFER_SIZE; SynchronizePtr messageIDLock; static int32_t MESSAGE_ID; int32_t messageID; bool hitOOM; DirectoryPtr directory; // where this index resides AnalyzerPtr analyzer; // how to analyze text bool create; IndexDeletionPolicyPtr deletionPolicy; IndexingChainPtr indexingChain; IndexCommitPtr indexCommit; SimilarityPtr similarity; // how to normalize int64_t changeCount; // increments every time a change is completed int64_t lastCommitChangeCount; // last changeCount that was committed SegmentInfosPtr rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails MapSegmentInfoInt rollbackSegments; SegmentInfosPtr localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails int32_t localFlushedDocCount; SegmentInfosPtr segmentInfos; // the segments DocumentsWriterPtr docWriter; IndexFileDeleterPtr deleter; SetSegmentInfo segmentsToOptimize; // used by optimize to note those needing optimization int32_t optimizeMaxNumSegments; LockPtr writeLock; int32_t termIndexInterval; bool closed; bool closing; SetSegmentInfo mergingSegments; MergePolicyPtr mergePolicy; MergeSchedulerPtr mergeScheduler; Collection pendingMerges; SetOneMerge runningMerges; Collection mergeExceptions; int64_t mergeGen; bool stopMerges; int32_t flushCount; int32_t flushDeletesCount; /// Used to only allow one addIndexes to proceed at once int32_t readCount; // count of how many threads are holding read lock int64_t writeThread; // non-null if any thread holds write lock int32_t upgradeCount; int32_t readerTermsIndexDivisor; // This is a "write once" variable (like the organic dye on a DVD-R that may or may not // be heated by a laser and then cooled to permanently record the event): it's false, // until getReader() is called for the first time, at which point it's switched to true // and never changes back to false. Once this is true, we hold open and reuse SegmentReader // instances internally for applying deletes, doing merges, and reopening near real-time readers. bool poolReaders; /// The maximum number of terms that will be indexed for a single field in a document. This /// limits the amount of memory required for indexing, so that collections with very large files /// will not crash the indexing process by running out of memory. /// Note that this effectively truncates large documents, excluding from the index terms that /// occur further in the document. If you know your source documents are large, be sure to set /// this value high enough to accommodate the expected size. If you set it to INT_MAX, then the /// only limit is your memory, but you should anticipate an std::bad_alloc. By default, no more /// than 10,000 terms will be indexed for a field. /// /// @see #setMaxFieldLength(int32_t) int32_t maxFieldLength; InfoStreamPtr infoStream; static InfoStreamPtr defaultInfoStream; HashSet synced; // files that have been sync'd already HashSet syncing; // files that are now being sync'd IndexReaderWarmerPtr mergedSegmentWarmer; /// Used only by commit; lock order is commitLock -> IW SynchronizePtr commitLock; INTERNAL: SegmentInfosPtr pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) int64_t pendingCommitChangeCount; ReaderPoolPtr readerPool; public: /// Default value for the write lock timeout (1,000). /// @see #setDefaultWriteLockTimeout static int64_t WRITE_LOCK_TIMEOUT; static const String WRITE_LOCK_NAME; /// Value to denote a flush trigger is disabled. static const int32_t DISABLE_AUTO_FLUSH; /// Disabled by default (because IndexWriter flushes by RAM usage by default). Change using /// {@link #setMaxBufferedDocs(int32_t)}. static const int32_t DEFAULT_MAX_BUFFERED_DOCS; /// Default value is 16 MB (which means flush when buffered docs consume 16 MB RAM). /// Change using {@link #setRAMBufferSizeMB}. static const double DEFAULT_RAM_BUFFER_SIZE_MB; /// Disabled by default (because IndexWriter flushes by RAM usage by default). Change using /// {@link #setMaxBufferedDeleteTerms(int32_t)}. static const int32_t DEFAULT_MAX_BUFFERED_DELETE_TERMS; /// Default value is 10,000. Change using {@link #setMaxFieldLength(int32_t)}. static const int32_t DEFAULT_MAX_FIELD_LENGTH; /// Default value is 128. Change using {@link #setTermIndexInterval(int32_t)}. static const int32_t DEFAULT_TERM_INDEX_INTERVAL; /// Absolute hard maximum length for a term. If a term arrives from the analyzer longer than /// this length, it is skipped and a message is printed to infoStream, if set (see {@link /// #setInfoStream}). static int32_t MAX_TERM_LENGTH(); /// Sets the maximum field length to INT_MAX static const int32_t MaxFieldLengthUNLIMITED; /// Sets the maximum field length to {@link #DEFAULT_MAX_FIELD_LENGTH} static const int32_t MaxFieldLengthLIMITED; public: virtual void initialize(); /// Returns a read-only reader, covering all committed as well as un-committed changes to the /// index. This provides "near real-time" searching, in that changes made during an IndexWriter /// session can be quickly made available for searching without closing the writer nor calling /// {@link #commit}. /// /// Note that this is functionally equivalent to calling {#commit} and then using {@link /// IndexReader#open} to open a new reader. But the turnaround time of this method should be /// faster since it avoids the potentially costly {@link #commit}. /// /// You must close the {@link IndexReader} returned by this method once you are done using it. /// /// It's near real-time because there is no hard guarantee on how quickly you can get a new /// reader after making changes with IndexWriter. You'll have to experiment in your situation /// to determine if it's fast enough. As this is a new and experimental feature, please report /// back on your findings so we can learn, improve and iterate. /// /// The resulting reader supports {@link IndexReader#reopen}, but that call will simply forward /// back to this method (though this may change in the future). /// /// The very first time this method is called, this writer instance will make every effort to /// pool the readers that it opens for doing merges, applying deletes, etc. This means additional /// resources (RAM, file descriptors, CPU time) will be consumed. /// /// For lower latency on reopening a reader, you should call {@link #setMergedSegmentWarmer} to /// pre-warm a newly merged segment before it's committed to the index. This is important for /// minimizing index-to-search delay after a large merge. /// /// If an addIndexes* call is running in another thread, then this reader will only search those /// segments from the foreign index that have been successfully copied over, so far. /// /// NOTE: Once the writer is closed, any outstanding readers may continue to be used. However, /// if you attempt to reopen any of those readers, you'll hit an AlreadyClosed exception. /// /// NOTE: This API is experimental and might change in incompatible ways in the next release. /// /// @return IndexReader that covers entire index plus all changes made so far by this IndexWriter /// instance virtual IndexReaderPtr getReader(); /// Like {@link #getReader}, except you can specify which termInfosIndexDivisor should be used for /// any newly opened readers. /// /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the /// same effect as {@link IndexWriter#setTermIndexInterval} except that setting must be done at /// indexing time while this setting can be set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into memory. By setting this to a value > 1 /// you can reduce memory usage, at the expense of higher latency when loading a TermInfo. /// The default value is 1. Set this to -1 to skip loading the terms index entirely. virtual IndexReaderPtr getReader(int32_t termInfosIndexDivisor); /// Obtain the number of deleted docs for a pooled reader. If the reader isn't being pooled, /// the segmentInfo's delCount is returned. virtual int32_t numDeletedDocs(SegmentInfoPtr info); virtual void acquireWrite(); virtual void releaseWrite(); virtual void acquireRead(); /// Allows one readLock to upgrade to a writeLock even if there are other readLocks as long /// as all other readLocks are also blocked in this method virtual void upgradeReadToWrite(); virtual void releaseRead(); virtual bool isOpen(bool includePendingClose); virtual void message(const String& message); /// Get the current setting of whether newly flushed segments will use the compound file format. /// Note that this just returns the value previously set with setUseCompoundFile(bool), or the /// default value (true). You cannot use this to query the status of previously flushed segments. /// /// Note that this method is a convenience method: it just calls mergePolicy.getUseCompoundFile /// as long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument /// exception is thrown. /// @see #setUseCompoundFile(bool) virtual bool getUseCompoundFile(); /// Setting to turn on usage of a compound file. When on, multiple files for each segment are /// merged into a single file when a new segment is flushed. /// /// Note that this method is a convenience method: it just calls mergePolicy.setUseCompoundFile /// as long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument /// exception is thrown. virtual void setUseCompoundFile(bool value); /// Set the Similarity implementation used by this IndexWriter. virtual void setSimilarity(SimilarityPtr similarity); /// Return the Similarity implementation used by this IndexWriter. /// This defaults to the current value of {@link Similarity#getDefault()}. virtual SimilarityPtr getSimilarity(); /// Set the interval between indexed terms. Large values cause less memory to be used by /// IndexReader, but slow random-access to terms. Small values cause more memory to be used by /// an IndexReader, and speed random-access to terms. /// /// This parameter determines the amount of computation required per query term, regardless of /// the number of documents that contain that term. In particular, it is the maximum number of /// other terms that must be scanned before a term is located and its frequency and position /// information may be processed. In a large index with user-entered query terms, query /// processing time is likely to be dominated not by term lookup but rather by the processing of /// frequency and positional data. In a small index or when many uncommon query terms are /// generated (eg., by wildcard queries) term lookup may become a dominant cost. /// /// In particular, numUniqueTerms/interval terms are read into memory by an IndexReader, and on /// average, interval/2 terms must be scanned for each random term access. /// /// @see #DEFAULT_TERM_INDEX_INTERVAL virtual void setTermIndexInterval(int32_t interval); /// Return the interval between indexed terms. /// @see #setTermIndexInterval(int32_t) virtual int32_t getTermIndexInterval(); /// Set the merge policy used by this writer. virtual void setMergePolicy(MergePolicyPtr mp); /// Returns the current MergePolicy in use by this writer. /// @see #setMergePolicy virtual MergePolicyPtr getMergePolicy(); /// Set the merge scheduler used by this writer. virtual void setMergeScheduler(MergeSchedulerPtr mergeScheduler); /// Returns the current MergePolicy in use by this writer. /// @see #setMergePolicy virtual MergeSchedulerPtr getMergeScheduler(); /// Determines the largest segment (measured by document count) that may be merged with other /// segments. Small values (eg., less than 10,000) are best for interactive indexing, as this /// limits the length of pauses while indexing to a few seconds. Larger values are best for /// batched indexing and speedier searches. /// /// The default value is INT_MAX. /// /// Note that this method is a convenience method: it just calls mergePolicy.setMaxMergeDocs as /// long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument /// exception is thrown. /// /// The default merge policy ({@link LogByteSizeMergePolicy}) also allows you to set this limit /// by net size (in MB) of the segment, using {@link LogByteSizeMergePolicy#setMaxMergeMB}. virtual void setMaxMergeDocs(int32_t maxMergeDocs); /// Returns the largest segment (measured by document count) that may be merged with other /// segments. /// /// Note that this method is a convenience method: it just calls mergePolicy.getMaxMergeDocs as /// long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument /// exception is thrown. /// /// @see #setMaxMergeDocs virtual int32_t getMaxMergeDocs(); /// The maximum number of terms that will be indexed for a single field in a document. This /// limits the amount of memory required for indexing, so that collections with very large files /// will not crash the indexing process by running out of memory. This setting refers to the /// number of running terms, not to the number of different terms. /// Note: this silently truncates large documents, excluding from the index all terms that occur /// further in the document. If you know your source documents are large, be sure to set this /// value high enough to accommodate the expected size. If you set it to INT_MAX, then the only /// limit is your memory, but you should anticipate an std::bad_alloc. /// By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms will be indexed for a field. virtual void setMaxFieldLength(int32_t maxFieldLength); /// Returns the maximum number of terms that will be indexed for a single field in a document. /// @see #setMaxFieldLength virtual int32_t getMaxFieldLength(); /// Sets the termsIndexDivisor passed to any readers that IndexWriter opens, for example when /// applying deletes or creating a near-real-time reader in {@link IndexWriter#getReader}. /// Default value is {@link IndexReader#DEFAULT_TERMS_INDEX_DIVISOR}. virtual void setReaderTermsIndexDivisor(int32_t divisor); /// @see #setReaderTermsIndexDivisor() virtual int32_t getReaderTermsIndexDivisor(); /// Determines the minimal number of documents required before the buffered in-memory documents /// are flushed as a new Segment. Large values generally gives faster indexing. /// /// When this is set, the writer will flush every maxBufferedDocs added documents. Pass in /// {@link #DISABLE_AUTO_FLUSH} to prevent triggering a flush due to number of buffered /// documents. Note that if flushing by RAM usage is also enabled, then the flush will be /// triggered by whichever comes first. /// /// Disabled by default (writer flushes by RAM usage). /// /// @see #setRAMBufferSizeMB virtual void setMaxBufferedDocs(int32_t maxBufferedDocs); /// Returns the number of buffered added documents that will trigger a flush if enabled. /// @see #setMaxBufferedDocs virtual int32_t getMaxBufferedDocs(); /// Determines the amount of RAM that may be used for buffering added documents and deletions /// before they are flushed to the Directory. Generally for faster indexing performance it's /// best to flush by RAM usage instead of document count and use as large a RAM buffer as you can. /// /// When this is set, the writer will flush whenever buffered documents and deletions use this /// much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a flush due to RAM usage. /// Note that if flushing by document count is also enabled, then the flush will be triggered by /// whichever comes first. /// /// Note: the account of RAM usage for pending deletions is only approximate. Specifically, if /// you delete by Query, Lucene currently has no way to measure the RAM usage if individual /// Queries so the accounting will under-estimate and you should compensate by either calling /// commit() periodically yourself, or by using {@link #setMaxBufferedDeleteTerms} to flush by /// count instead of RAM usage (each buffered delete Query counts as one). /// /// Note: because IndexWriter uses int32_t when managing its internal storage, the absolute /// maximum value for this setting is somewhat less than 2048 MB. The precise limit depends on /// various factors, such as how large your documents are, how many fields have norms, etc., so /// it's best to set this value comfortably under 2048. /// /// The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. virtual void setRAMBufferSizeMB(double mb); /// Returns the value set by {@link #setRAMBufferSizeMB} if enabled. virtual double getRAMBufferSizeMB(); /// Determines the minimal number of delete terms required before the buffered in-memory delete /// terms are applied and flushed. If there are documents buffered in memory at the time, they /// are merged and a new segment is created. /// /// Disabled by default (writer flushes by RAM usage). /// @see #setRAMBufferSizeMB virtual void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms); /// Returns the number of buffered deleted terms that will trigger a flush if enabled. /// @see #setMaxBufferedDeleteTerms virtual int32_t getMaxBufferedDeleteTerms(); /// Determines how often segment indices are merged by addDocument(). With smaller values, less /// RAM is used while indexing, and searches on unoptimized indices are faster, but indexing /// speed is slower. With larger values, more RAM is used during indexing, and while searches /// on unoptimized indices are slower, indexing is faster. Thus larger values (> 10) are best /// for batch index creation, and smaller values (< 10) for indices that are interactively maintained. /// /// Note that this method is a convenience method: it just calls mergePolicy.setMergeFactor as long /// as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument exception /// is thrown. This must never be less than 2. The default value is 10. virtual void setMergeFactor(int32_t mergeFactor); /// Returns the number of segments that are merged at once and also controls the total number of /// segments allowed to accumulate in the index. /// /// Note that this method is a convenience method: it just calls mergePolicy.getMergeFactor as long /// as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument exception /// is thrown. /// @see #setMergeFactor virtual int32_t getMergeFactor(); /// If non-null, this will be the default infoStream used by a newly instantiated IndexWriter. /// @see #setInfoStream static void setDefaultInfoStream(InfoStreamPtr infoStream); /// Returns the current default infoStream for newly instantiated IndexWriters. /// @see #setDefaultInfoStream static InfoStreamPtr getDefaultInfoStream(); /// If non-null, information about merges, deletes and a message when maxFieldLength is reached /// will be printed to this. virtual void setInfoStream(InfoStreamPtr infoStream); /// Returns the current infoStream in use by this writer. /// @see #setInfoStream virtual InfoStreamPtr getInfoStream(); /// Returns true if verbosing is enabled (i.e., infoStream != null). virtual bool verbose(); /// Sets the maximum time to wait for a write lock (in milliseconds) for this instance of /// IndexWriter. @see #setDefaultWriteLockTimeout to change the default value for all instances /// of IndexWriter. virtual void setWriteLockTimeout(int64_t writeLockTimeout); /// Returns allowed timeout when acquiring the write lock. /// @see #setWriteLockTimeout virtual int64_t getWriteLockTimeout(); /// Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock /// (in milliseconds). static void setDefaultWriteLockTimeout(int64_t writeLockTimeout); /// Returns default write lock timeout for newly instantiated IndexWriters. /// @see #setDefaultWriteLockTimeout static int64_t getDefaultWriteLockTimeout(); /// Commits all changes to an index and closes all associated files. Note that this may be /// a costly operation, so try to re-use a single writer instead of closing and opening a /// new one. See {@link #commit()} for caveats about write caching done by some IO devices. /// /// If an Exception is hit during close, eg due to disk full or some other reason, then both /// the on-disk index and the internal state of the IndexWriter instance will be consistent. /// However, the close will not be complete even though part of it (flushing buffered documents) /// may have succeeded, so the write lock will still be held. /// /// If you can correct the underlying cause (eg free up some disk space) then you can call /// close() again. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer, again. virtual void close(); /// Closes the index with or without waiting for currently running merges to finish. This is /// only meaningful when using a MergeScheduler that runs merges in background threads. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer, again. /// /// NOTE: it is dangerous to always call close(false), especially when IndexWriter is not open /// for very long, because this can result in "merge starvation" whereby long merges will never /// have a chance to finish. This will cause too many segments in your index over time. /// /// @param waitForMerges if true, this call will block until all merges complete; else, it will /// ask all running merges to abort, wait until those merges have finished (which should be at /// most a few seconds), and then return. virtual void close(bool waitForMerges); /// Returns the Directory used by this index. virtual DirectoryPtr getDirectory(); /// Returns the analyzer used by this index. virtual AnalyzerPtr getAnalyzer(); /// Returns total number of docs in this index, including docs not yet flushed (still in the /// RAM buffer), not counting deletions. /// @see #numDocs virtual int32_t maxDoc(); /// Returns total number of docs in this index, including docs not yet flushed (still in the /// RAM buffer), and including deletions. /// NOTE: buffered deletions are not counted. If you really need these to be counted you should /// call {@link #commit()} first. virtual int32_t numDocs(); virtual bool hasDeletions(); /// Adds a document to this index. If the document contains more than {@link /// #setMaxFieldLength(int32_t)} terms for a given field, the remainder are discarded. /// /// Note that if an Exception is hit (for example disk full) then the index will be consistent, /// but this document may not have been added. Furthermore, it's possible the index will have /// one segment in non-compound format even when using compound files (when a merge has partially /// succeeded). /// /// This method periodically flushes pending documents to the Directory, and also periodically /// triggers segment merges in the index according to the {@link MergePolicy} in use. /// /// Merges temporarily consume space in the directory. The amount of space required is up to 1X /// the size of all segments being merged, when no size of all segments being merged, when no /// 2X the size of all segments being merged when readers/searchers are open against the index /// (see {@link #optimize()} for details). The sequence of primitive merge operations performed /// is governed by the merge policy. /// /// Note that each term in the document can be no longer than 16383 characters, otherwise an /// IllegalArgument exception will be thrown. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void addDocument(DocumentPtr doc); /// Adds a document to this index, using the provided analyzer instead of the value of {@link /// #getAnalyzer()}. If the document contains more than {@link #setMaxFieldLength(int32_t)} terms /// for a given field, the remainder are discarded. /// /// See {@link #addDocument(DocumentPtr)} for details on index and IndexWriter state after an /// exception, and flushing/merging temporary free space requirements. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void addDocument(DocumentPtr doc, AnalyzerPtr analyzer); /// Deletes the document(s) containing term. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param term the term to identify the documents to be deleted virtual void deleteDocuments(TermPtr term); /// Deletes the document(s) containing any of the terms. All deletes are flushed at the same time. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param terms array of terms to identify the documents to be deleted virtual void deleteDocuments(Collection terms); /// Deletes the document(s) matching the provided query. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param query the query to identify the documents to be deleted virtual void deleteDocuments(QueryPtr query); /// Deletes the document(s) matching any of the provided queries. All deletes are flushed at /// the same time. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param queries array of queries to identify the documents to be deleted virtual void deleteDocuments(Collection queries); /// Updates a document by first deleting the document(s) containing term and then adding the new /// document. The delete and then add are atomic as seen by a reader on the same index (flush /// may happen only after the add). /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param term the term to identify the document(s) to be deleted /// @param doc the document to be added virtual void updateDocument(TermPtr term, DocumentPtr doc); /// Updates a document by first deleting the document(s) containing term and then adding the new /// document. The delete and then add are atomic as seen by a reader on the same index (flush /// may happen only after the add). /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param term the term to identify the document(s) to be deleted /// @param doc the document to be added /// @param analyzer the analyzer to use when analyzing the document virtual void updateDocument(TermPtr term, DocumentPtr doc, AnalyzerPtr analyzer); virtual int32_t getSegmentCount(); virtual int32_t getNumBufferedDocuments(); virtual int32_t getDocCount(int32_t i); virtual int32_t getFlushCount(); virtual int32_t getFlushDeletesCount(); virtual String newSegmentName(); /// Requests an "optimize" operation on an index, priming the index for the fastest available /// search. Traditionally this has meant merging all segments into a single segment as is done in /// the default merge policy, but individual merge policies may implement optimize in different ways. /// /// It is recommended that this method be called upon completion of indexing. In environments with /// frequent updates, optimize is best done during low volume times, if at all. /// /// Note that optimize requires 2X the index size free space in your Directory (3X if you're using /// compound file format). For example, if your index size is 10 MB then you need 20 MB free for /// optimize to complete (30 MB if you're using compound file format). /// /// If some but not all readers re-open while an optimize is underway, this will cause > 2X temporary /// space to be consumed as those new readers will then hold open the partially optimized segments at /// that time. It is best not to re-open readers while optimize is running. /// /// The actual temporary usage could be much less than these figures (it depends on many factors). /// /// In general, once the optimize completes, the total size of the index will be less than the size /// of the starting index. It could be quite a bit smaller (if there were many pending deletes) or /// just slightly smaller. /// /// If an Exception is hit during optimize(), for example due to disk full, the index will not be /// corrupt and no documents will have been lost. However, it may have been partially optimized /// (some segments were merged but not all), and it's possible that one of the segments in the index /// will be in non-compound format even when using compound file format. This will occur when the /// exception is hit during conversion of the segment into compound format. /// /// This call will optimize those segments present in the index when the call started. If other /// threads are still adding documents and flushing segments, those newly created segments will not /// be optimized unless you call optimize again. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @see LogMergePolicy#findMergesForOptimize virtual void optimize(); /// Optimize the index down to <= maxNumSegments. If maxNumSegments==1 then this is the same as /// {@link #optimize()}. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param maxNumSegments maximum number of segments left in the index after optimization finishes virtual void optimize(int32_t maxNumSegments); /// Just like {@link #optimize()}, except you can specify whether the call should block until the /// optimize completes. This is only meaningful with a {@link MergeScheduler} that is able to run /// merges in background threads. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void optimize(bool doWait); /// Just like {@link #optimize(int32_t)}, except you can specify whether the call should block /// until the optimize completes. This is only meaningful with a {@link MergeScheduler} that is /// able to run merges in background threads. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void optimize(int32_t maxNumSegments, bool doWait); /// Just like {@link #expungeDeletes()}, except you can specify whether the call should block /// until the operation completes. This is only meaningful with a {@link MergeScheduler} that /// is able to run merges in background threads. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void expungeDeletes(bool doWait); /// Expunges all deletes from the index. When an index has many document deletions (or updates /// to existing documents), it's best to either call optimize or expungeDeletes to remove all /// unused data in the index associated with the deleted documents. To see how many deletions /// you have pending in your index, call {@link IndexReader#numDeletedDocs}. This saves disk /// space and memory usage while searching. expungeDeletes should be somewhat faster than /// optimize since it does not insist on reducing the index to a single segment (though, this /// depends on the {@link MergePolicy}; see {@link MergePolicy#findMergesToExpungeDeletes}.). /// Note that this call does not first commit any buffered documents, so you must do so yourself /// if necessary. See also {@link #expungeDeletes(bool)} /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void expungeDeletes(); /// Asks the mergePolicy whether any merges are necessary now and if so, runs the requested /// merges and then iterate (test again if merges are needed) until no more merges are returned /// by the mergePolicy. /// /// Explicit calls to maybeMerge() are usually not necessary. The most common case is when merge /// policy parameters have changed. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void maybeMerge(); /// The {@link MergeScheduler} calls this method to retrieve the next merge requested by the /// MergePolicy. virtual OneMergePtr getNextMerge(); /// Close the IndexWriter without committing any changes that have occurred since the last commit /// (or since it was opened, if commit hasn't been called). This removes any temporary files that /// had been created, after which the state of the index will be the same as it was when commit() /// was last called or when this writer was first opened. This also clears a previous call to /// {@link #prepareCommit}. virtual void rollback(); /// Delete all documents in the index. /// /// This method will drop all buffered documents and will remove all segments from the index. This /// change will not be visible until a {@link #commit()} has been called. This method can be rolled /// back using {@link #rollback()}. /// /// NOTE: this method is much faster than using {@link #deleteDocuments()}. /// /// NOTE: this method will forcefully abort all merges in progress. If other threads are running /// {@link #optimize()} or any of the addIndexes methods, they will receive {@link /// MergePolicy.MergeAbortedException} virtual void deleteAll(); /// Wait for any currently outstanding merges to finish. /// /// It is guaranteed that any merges started prior to calling this method will have completed once /// this method completes. virtual void waitForMerges(); /// Merges all segments from an array of indexes into this index. /// /// This may be used to parallelize batch indexing. A large document collection can be broken into /// sub-collections. Each sub-collection can be indexed in parallel, on a different thread, process /// or machine. The complete index can then be created by merging sub-collection indexes with this /// method. /// /// NOTE: the index in each Directory must not be changed (opened by a writer) while this method is /// running. This method does not acquire a write lock in each input Directory, so it is up to the /// caller to enforce this. /// /// NOTE: while this is running, any attempts to add or delete documents (with another thread) will /// be paused until this method completes. /// /// This method is transactional in how exceptions are handled: it does not commit a new segments_N /// file until all indexes are added. This means if an exception occurs (for example disk full), /// then either no indexes will have been added or they all will have been. /// /// Note that this requires temporary free space in the Directory up to 2X the sum of all input /// indexes (including the starting index). If readers/searchers are open against the starting index, /// then temporary free space required will be higher by the size of the starting index (see /// {@link #optimize()} for details). /// /// Once this completes, the final size of the index will be less than the sum of all input index /// sizes (including the starting index). It could be quite a bit smaller (if there were many pending /// deletes) or just slightly smaller. /// /// This requires this index not be among those to be added. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void addIndexesNoOptimize(Collection dirs); /// Merges the provided indexes into this index. /// After this completes, the index is optimized. The provided IndexReaders are not closed. /// /// NOTE: while this is running, any attempts to add or delete documents (with another thread) will /// be paused until this method completes. /// /// See {@link #addIndexesNoOptimize} for details on transactional semantics, temporary free space /// required in the Directory, and non-CFS segments on an exception. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void addIndexes(Collection readers); /// Prepare for commit. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// @see #prepareCommit(MapStringString) virtual void prepareCommit(); /// Prepare for commit, specifying commitUserData Map (String -> String). This does the first phase /// of 2-phase commit. This method does all steps necessary to commit changes since this writer was /// opened: flushes pending added and deleted docs, syncs the index files, writes most of next /// segments_N file. After calling this you must call either {@link #commit()} to finish the commit, /// or {@link #rollback()} to revert the commit and undo all changes done since the writer was opened. /// /// You can also just call {@link #commit(Map)} directly without prepareCommit first in which case /// that method will internally call prepareCommit. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param commitUserData Opaque Map (String->String) that's recorded into the segments file in the /// index, and retrievable by {@link IndexReader#getCommitUserData}. Note that when IndexWriter /// commits itself during {@link #close}, the commitUserData is unchanged (just carried over from the /// prior commit). If this is null then the previous commitUserData is kept. Also, the commitUserData // will only "stick" if there are actually changes in the index to commit. virtual void prepareCommit(MapStringString commitUserData); /// Commits all pending changes (added & deleted documents, optimizations, segment merges, added /// indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the /// changes and the index updates will survive an OS or machine crash or power loss. Note that this /// does not wait for any running background merges to finish. This may be a costly operation, so you /// should test the cost in your application and do it only when really necessary. /// /// Note that this operation calls Directory.sync on the index files. That call should not return until /// the file contents & metadata are on stable storage. For FSDirectory, this calls the OS's fsync. /// But, beware: some hardware devices may in fact cache writes even during fsync, and return before the /// bits are actually on stable storage, to give the appearance of faster performance. If you have such /// a device, and it does not have a battery backup (for example) then on power loss it may still lose /// data. Lucene cannot guarantee consistency on such devices. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @see #prepareCommit /// @see #commit(MapStringString) virtual void commit(); /// Commits all changes to the index, specifying a commitUserData Map (String -> String). This just /// calls {@link #prepareCommit(MapStringString)} (if you didn't already call it) and then /// {@link #finishCommit}. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void commit(MapStringString commitUserData); /// Return the total size of all index files currently cached in memory. Useful for size management /// with flushRamDocs() virtual int64_t ramSizeInBytes(); /// Return the number of documents currently buffered in RAM. virtual int32_t numRamDocs(); /// Merges the indicated segments, replacing them in the stack with a single segment. virtual void merge(OneMergePtr merge); /// Hook that's called when the specified merge is complete. virtual void mergeSuccess(OneMergePtr merge); /// Checks whether this merge involves any segments already participating in a merge. If not, this /// merge is "registered", meaning we record that its segments are now participating in a merge, /// and true is returned. Else (the merge conflicts) false is returned. virtual bool registerMerge(OneMergePtr merge); /// Does initial setup for a merge, which is fast but holds the synchronized lock on IndexWriter /// instance. virtual void mergeInit(OneMergePtr merge); /// Does finishing for a merge, which is fast but holds the synchronized lock on IndexWriter instance. virtual void mergeFinish(OneMergePtr merge); virtual void addMergeException(OneMergePtr merge); /// For test purposes. virtual int32_t getBufferedDeleteTermsSize(); /// For test purposes. virtual int32_t getNumBufferedDeleteTerms(); /// Utility routines for tests virtual SegmentInfoPtr newestSegment(); virtual String segString(); /// Returns true if the index in the named directory is currently locked. /// @param directory the directory to check for a lock static bool isLocked(DirectoryPtr directory); /// Forcibly unlocks the index in the named directory. /// Caution: this should only be used by failure recovery code, when it is known that no other process /// nor thread is in fact currently accessing this index. static void unlock(DirectoryPtr directory); /// Set the merged segment warmer. See {@link IndexReaderWarmer}. virtual void setMergedSegmentWarmer(IndexReaderWarmerPtr warmer); /// Returns the current merged segment warmer. See {@link IndexReaderWarmer}. virtual IndexReaderWarmerPtr getMergedSegmentWarmer(); /// Used only by assert for testing. Current points: /// startDoFlush /// startCommitMerge /// startStartCommit /// midStartCommit /// midStartCommit2 /// midStartCommitSuccess /// finishStartCommit /// startCommitMergeDeletes /// startMergeInit /// startApplyDeletes /// startMergeInit /// startMergeInit virtual bool testPoint(const String& name); virtual bool nrtIsCurrent(SegmentInfosPtr infos); virtual bool isClosed(); protected: virtual void ensureOpen(bool includePendingClose); virtual void ensureOpen(); virtual void setMessageID(InfoStreamPtr infoStream); /// Casts current mergePolicy to LogMergePolicy, and throws an exception if the /// mergePolicy is not a LogMergePolicy. virtual LogMergePolicyPtr getLogMergePolicy(); virtual void setRollbackSegmentInfos(SegmentInfosPtr infos); /// If we are flushing by doc count (not by RAM usage), and using LogDocMergePolicy then push /// maxBufferedDocs down as its minMergeDocs, to keep backwards compatibility. virtual void pushMaxBufferedDocs(); virtual void messageState(); /// Returns true if this thread should attempt to close, or false if IndexWriter is now closed; /// else, waits until another thread finishes closing virtual bool shouldClose(); virtual void closeInternal(bool waitForMerges); /// Tells the docWriter to close its currently open shared doc stores (stored fields & vectors /// files). Return value specifies whether new doc store files are compound or not. virtual bool flushDocStores(); /// Returns true if any merges in pendingMerges or runningMerges are optimization merges. virtual bool optimizeMergesPending(); virtual void maybeMerge(bool optimize); virtual void maybeMerge(int32_t maxNumSegmentsOptimize, bool optimize); virtual void updatePendingMerges(int32_t maxNumSegmentsOptimize, bool optimize); /// Like {@link #getNextMerge()} except only returns a merge if it's external. virtual OneMergePtr getNextExternalMerge(); /// Begin a transaction. During a transaction, any segment merges that happen (or ram segments /// flushed) will not write a new segments file and will not remove any files that were present /// at the start of the transaction. You must make a matched call to commitTransaction() or /// rollbackTransaction() to finish the transaction. /// /// Note that buffered documents and delete terms are not handled within the transactions, so /// they must be flushed before the transaction is started. virtual void startTransaction(bool haveReadLock); /// Rolls back the transaction and restores state to where we were at the start. virtual void rollbackTransaction(); /// Commits the transaction. This will write the new segments file and remove and pending /// deletions we have accumulated during the transaction. virtual void commitTransaction(); virtual void rollbackInternal(); virtual void finishMerges(bool waitForMerges); /// Called whenever the SegmentInfos has been updated and the index files referenced exist /// (correctly) in the index directory. virtual void checkpoint(); virtual void finishAddIndexes(); virtual void blockAddIndexes(bool includePendingClose); virtual void resumeAddIndexes(); virtual void resetMergeExceptions(); virtual void noDupDirs(Collection dirs); virtual bool hasExternalSegments(); /// If any of our segments are using a directory != ours then we have to either copy them over one /// by one, merge them (if merge policy has chosen to) or wait until currently running merges (in /// the background) complete. We don't return until the SegmentInfos has no more external segments. /// Currently this is only used by addIndexesNoOptimize(). virtual void resolveExternalSegments(); /// A hook for extending classes to execute operations after pending added and deleted documents have /// been flushed to the Directory but before the change is committed (new segments_N file written). virtual void doAfterFlush(); /// A hook for extending classes to execute operations before pending added and deleted documents are /// flushed to the Directory. virtual void doBeforeFlush(); virtual void commit(int64_t sizeInBytes); virtual void finishCommit(); /// Flush all in-memory buffered updates (adds and deletes) to the Directory. /// @param triggerMerge if true, we may merge segments (if deletes or docs were flushed) if necessary /// @param flushDocStores if false we are allowed to keep doc stores open to share with the next segment /// @param flushDeletes whether pending deletes should also be flushed virtual void flush(bool triggerMerge, bool flushDocStores, bool flushDeletes); virtual bool doFlush(bool flushDocStores, bool flushDeletes); virtual bool doFlushInternal(bool flushDocStores, bool flushDeletes); virtual int32_t ensureContiguousMerge(OneMergePtr merge); /// Carefully merges deletes for the segments we just merged. This is tricky because, although merging /// will clear all deletes (compacts the documents), new deletes may have been flushed to the segments /// since the merge was started. This method "carries over" such new deletes onto the newly merged /// segment, and saves the resulting deletes file (incrementing the delete generation for merge.info). /// If no deletes were flushed, no new deletes file is saved. virtual void commitMergedDeletes(OneMergePtr merge, SegmentReaderPtr mergeReader); virtual bool commitMerge(OneMergePtr merge, SegmentMergerPtr merger, int32_t mergedDocCount, SegmentReaderPtr mergedReader); virtual LuceneException handleMergeException(const LuceneException& exc, OneMergePtr merge); virtual void _mergeInit(OneMergePtr merge); virtual void setDiagnostics(SegmentInfoPtr info, const String& source); virtual void setDiagnostics(SegmentInfoPtr info, const String& source, MapStringString details); virtual void setMergeDocStoreIsCompoundFile(OneMergePtr merge); virtual void closeMergeReaders(OneMergePtr merge, bool suppressExceptions); /// Does the actual (time-consuming) work of the merge, but without holding synchronized lock on /// IndexWriter instance. virtual int32_t mergeMiddle(OneMergePtr merge); /// Apply buffered deletes to all segments. virtual bool applyDeletes(); virtual String segString(SegmentInfosPtr infos); virtual bool startSync(const String& fileName, HashSet pending); virtual void finishSync(const String& fileName, bool success); /// Blocks until all files in syncing are sync'd bool waitForAllSynced(HashSet syncing); void doWait(); /// Walk through all files referenced by the current segmentInfos and ask the Directory to sync each /// file, if it wasn't already. If that succeeds, then we prepare a new segments_N file but do not /// fully commit it. virtual void startCommit(int64_t sizeInBytes, MapStringString commitUserData); virtual LuceneException handleOOM(const std::bad_alloc& oom, const String& location); friend class ReaderPool; }; /// If {@link #getReader} has been called (ie, this writer is in near real-time mode), then after /// a merge completes, this class can be invoked to warm the reader on the newly merged segment, /// before the merge commits. This is not required for near real-time search, but will reduce /// search latency on opening a new near real-time reader after a merge completes. /// /// NOTE: warm is called before any deletes have been carried over to the merged segment. class LPPAPI IndexReaderWarmer : public LuceneObject { public: virtual ~IndexReaderWarmer(); LUCENE_CLASS(IndexReaderWarmer); public: virtual void warm(IndexReaderPtr reader) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/InfoStream.h000066400000000000000000000033451217574114600211630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INFOSTREAM_H #define INFOSTREAM_H #include "LuceneObject.h" #include namespace Lucene { /// Utility class to support streaming info messages. class LPPAPI InfoStream : public LuceneObject { protected: InfoStream(); public: virtual ~InfoStream(); LUCENE_CLASS(InfoStream); public: virtual InfoStream& operator<< (const String& t) = 0; }; /// Stream override to write messages to a file. class LPPAPI InfoStreamFile : public InfoStream { public: InfoStreamFile(const String& path); virtual ~InfoStreamFile(); LUCENE_CLASS(InfoStreamFile); protected: std::wofstream file; public: virtual InfoStreamFile& operator<< (const String& t); }; /// Stream override to write messages to a std::cout. class LPPAPI InfoStreamOut : public InfoStream { public: virtual ~InfoStreamOut(); LUCENE_CLASS(InfoStreamOut); public: virtual InfoStreamOut& operator<< (const String& t); }; /// Null stream override to eat messages. class LPPAPI InfoStreamNull : public InfoStream { public: virtual ~InfoStreamNull(); LUCENE_CLASS(InfoStreamNull); public: virtual InfoStreamNull& operator<< (const String& t); }; } #endif LucenePlusPlus-rel_3.0.4/include/InputStreamReader.h000066400000000000000000000025161217574114600225110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INPUTSTREAMREADER_H #define INPUTSTREAMREADER_H #include "Reader.h" namespace Lucene { /// An InputStreamReader is a bridge from byte streams to character streams. class InputStreamReader : public Reader { public: /// Create an InputStreamReader that uses the utf8 charset. InputStreamReader(ReaderPtr reader); virtual ~InputStreamReader(); LUCENE_CLASS(InputStreamReader); protected: ReaderPtr reader; UTF8DecoderStreamPtr decoder; public: /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* b, int32_t offset, int32_t length); /// Close the stream. virtual void close(); /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Reset the stream. virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.4/include/IntBlockPool.h000066400000000000000000000021401217574114600214430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INTBLOCKPOOL_H #define INTBLOCKPOOL_H #include "LuceneObject.h" namespace Lucene { class IntBlockPool : public LuceneObject { public: IntBlockPool(DocumentsWriterPtr docWriter, bool trackAllocations); virtual ~IntBlockPool(); LUCENE_CLASS(IntBlockPool); public: Collection buffers; int32_t bufferUpto; // Which buffer we are upto int32_t intUpto; // Where we are in head buffer IntArray buffer; // Current head buffer int32_t intOffset; // Current head offset bool trackAllocations; protected: DocumentsWriterWeakPtr _docWriter; public: void reset(); void nextBuffer(); }; } #endif LucenePlusPlus-rel_3.0.4/include/IntFieldSource.h000066400000000000000000000035371217574114600217760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INTFIELDSOURCE_H #define INTFIELDSOURCE_H #include "FieldCacheSource.h" namespace Lucene { /// Obtains int field values from the {@link FieldCache} using getInts() and makes those values available /// as other numeric types, casting as needed. /// /// @see FieldCacheSource for requirements on the field. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU /// per lookup but will not consume double the FieldCache RAM. class LPPAPI IntFieldSource : public FieldCacheSource { public: /// Create a cached int field source with a specific string-to-int parser. IntFieldSource(const String& field, IntParserPtr parser = IntParserPtr()); virtual ~IntFieldSource(); LUCENE_CLASS(IntFieldSource); protected: IntParserPtr parser; public: virtual String description(); virtual DocValuesPtr getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader); virtual bool cachedFieldSourceEquals(FieldCacheSourcePtr other); virtual int32_t cachedFieldSourceHashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/InvertedDocConsumer.h000066400000000000000000000026011217574114600230300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCCONSUMER_H #define INVERTEDDOCCONSUMER_H #include "LuceneObject.h" namespace Lucene { class InvertedDocConsumer : public LuceneObject { public: virtual ~InvertedDocConsumer(); LUCENE_CLASS(InvertedDocConsumer); public: FieldInfosPtr fieldInfos; public: /// Add a new thread virtual InvertedDocConsumerPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread) = 0; /// Abort (called after hitting AbortException) virtual void abort() = 0; /// Flush a new segment virtual void flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state) = 0; /// Close doc stores virtual void closeDocStore(SegmentWriteStatePtr state) = 0; /// Attempt to free RAM, returning true if any RAM was freed virtual bool freeRAM() = 0; virtual void setFieldInfos(FieldInfosPtr fieldInfos); }; } #endif LucenePlusPlus-rel_3.0.4/include/InvertedDocConsumerPerField.h000066400000000000000000000025511217574114600244470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCCONSUMERPERFIELD_H #define INVERTEDDOCCONSUMERPERFIELD_H #include "LuceneObject.h" namespace Lucene { class InvertedDocConsumerPerField : public LuceneObject { public: virtual ~InvertedDocConsumerPerField(); LUCENE_CLASS(InvertedDocConsumerPerField); public: /// Called once per field, and is given all Fieldable occurrences for this field in the document. /// Return true if you wish to see inverted tokens for these fields virtual bool start(Collection fields, int32_t count) = 0; /// Called before a field instance is being processed virtual void start(FieldablePtr field) = 0; /// Called once per inverted token virtual void add() = 0; /// Called once per field per document, after all Fieldable occurrences are inverted virtual void finish() = 0; /// Called on hitting an aborting exception virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/InvertedDocConsumerPerThread.h000066400000000000000000000016571217574114600246410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCCONSUMERPERTHREAD_H #define INVERTEDDOCCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class InvertedDocConsumerPerThread : public LuceneObject { public: virtual ~InvertedDocConsumerPerThread(); LUCENE_CLASS(InvertedDocConsumerPerThread); public: virtual void startDocument() = 0; virtual InvertedDocConsumerPerFieldPtr addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo) = 0; virtual DocWriterPtr finishDocument() = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/InvertedDocEndConsumer.h000066400000000000000000000021021217574114600234530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCENDCONSUMER_H #define INVERTEDDOCENDCONSUMER_H #include "LuceneObject.h" namespace Lucene { class InvertedDocEndConsumer : public LuceneObject { public: virtual ~InvertedDocEndConsumer(); LUCENE_CLASS(InvertedDocEndConsumer); public: virtual InvertedDocEndConsumerPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread) = 0; virtual void flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, SegmentWriteStatePtr state) = 0; virtual void closeDocStore(SegmentWriteStatePtr state) = 0; virtual void abort() = 0; virtual void setFieldInfos(FieldInfosPtr fieldInfos) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/InvertedDocEndConsumerPerField.h000066400000000000000000000013761217574114600251020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCENDCONSUMERPERFIELD_H #define INVERTEDDOCENDCONSUMERPERFIELD_H #include "LuceneObject.h" namespace Lucene { class InvertedDocEndConsumerPerField : public LuceneObject { public: virtual ~InvertedDocEndConsumerPerField(); LUCENE_CLASS(InvertedDocEndConsumerPerField); public: virtual void finish() = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/InvertedDocEndConsumerPerThread.h000066400000000000000000000016711217574114600252640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCENDCONSUMERPERTHREAD_H #define INVERTEDDOCENDCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class InvertedDocEndConsumerPerThread : public LuceneObject { public: virtual ~InvertedDocEndConsumerPerThread(); LUCENE_CLASS(InvertedDocEndConsumerPerThread); public: virtual void startDocument() = 0; virtual InvertedDocEndConsumerPerFieldPtr addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo) = 0; virtual void finishDocument() = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/KeepOnlyLastCommitDeletionPolicy.h000066400000000000000000000022751217574114600255040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef KEEPONLYLASTCOMMITDELETIONPOLICY_H #define KEEPONLYLASTCOMMITDELETIONPOLICY_H #include "IndexDeletionPolicy.h" namespace Lucene { /// This {@link IndexDeletionPolicy} implementation that keeps only the most recent commit and immediately /// removes all prior commits after a new commit is done. This is the default deletion policy. class LPPAPI KeepOnlyLastCommitDeletionPolicy : public IndexDeletionPolicy { public: virtual ~KeepOnlyLastCommitDeletionPolicy(); LUCENE_CLASS(KeepOnlyLastCommitDeletionPolicy); public: /// Deletes all commits except the most recent one. virtual void onInit(Collection commits); /// Deletes all commits except the most recent one. virtual void onCommit(Collection commits); }; } #endif LucenePlusPlus-rel_3.0.4/include/KeywordAnalyzer.h000066400000000000000000000016501217574114600222430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef KEYWORDANALYZER_H #define KEYWORDANALYZER_H #include "Analyzer.h" namespace Lucene { /// Tokenizes the entire stream as a single token. This is useful for data like zip codes, ids, and some /// product names. class LPPAPI KeywordAnalyzer : public Analyzer { public: virtual ~KeywordAnalyzer(); LUCENE_CLASS(KeywordAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/KeywordTokenizer.h000066400000000000000000000024621217574114600224320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef KEYWORDTOKENIZER_H #define KEYWORDTOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// Emits the entire input as a single token. class LPPAPI KeywordTokenizer : public Tokenizer { public: KeywordTokenizer(ReaderPtr input); KeywordTokenizer(ReaderPtr input, int32_t bufferSize); KeywordTokenizer(AttributeSourcePtr source, ReaderPtr input, int32_t bufferSize); KeywordTokenizer(AttributeFactoryPtr factory, ReaderPtr input, int32_t bufferSize); virtual ~KeywordTokenizer(); LUCENE_CLASS(KeywordTokenizer); protected: static const int32_t DEFAULT_BUFFER_SIZE; bool done; int32_t finalOffset; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; protected: void init(int32_t bufferSize); public: virtual bool incrementToken(); virtual void end(); virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.4/include/LengthFilter.h000066400000000000000000000020531217574114600214760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LENGTHFILTER_H #define LENGTHFILTER_H #include "TokenFilter.h" namespace Lucene { /// Removes words that are too long or too short from the stream. class LPPAPI LengthFilter : public TokenFilter { public: /// Build a filter that removes words that are too long or too short from the text. LengthFilter(TokenStreamPtr input, int32_t min, int32_t max); virtual ~LengthFilter(); LUCENE_CLASS(LengthFilter); public: int32_t min; int32_t max; protected: TermAttributePtr termAtt; public: /// Returns the next input Token whose term() is the right len virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/include/LetterTokenizer.h000066400000000000000000000030171217574114600222420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LETTERTOKENIZER_H #define LETTERTOKENIZER_H #include "CharTokenizer.h" namespace Lucene { /// A LetterTokenizer is a tokenizer that divides text at non-letters. That's to say, it defines tokens as maximal /// strings of adjacent letters, as defined UnicodeUtil::isAlpha(c) predicate. /// /// Note: this does a decent job for most European languages, but does a terrible job for some Asian languages, where /// words are not separated by spaces. class LPPAPI LetterTokenizer : public CharTokenizer { public: /// Construct a new LetterTokenizer. LetterTokenizer(ReaderPtr input); /// Construct a new LetterTokenizer using a given {@link AttributeSource}. LetterTokenizer(AttributeSourcePtr source, ReaderPtr input); /// Construct a new LetterTokenizer using a given {@link AttributeFactory}. LetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input); virtual ~LetterTokenizer(); LUCENE_CLASS(LetterTokenizer); public: /// Collects only characters which satisfy UnicodeUtil::isAlpha(c). virtual bool isTokenChar(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.4/include/LoadFirstFieldSelector.h000066400000000000000000000014761217574114600234530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOADFIRSTFIELDSELECTOR_H #define LOADFIRSTFIELDSELECTOR_H #include "FieldSelector.h" namespace Lucene { /// Load the First field and break. /// See {@link FieldSelectorResult#LOAD_AND_BREAK} class LPPAPI LoadFirstFieldSelector : public FieldSelector { public: virtual ~LoadFirstFieldSelector(); LUCENE_CLASS(LoadFirstFieldSelector); public: virtual FieldSelectorResult accept(const String& fieldName); }; } #endif LucenePlusPlus-rel_3.0.4/include/Lock.h000066400000000000000000000035651217574114600200100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOCK_H #define LOCK_H #include "LuceneObject.h" namespace Lucene { /// An interprocess mutex lock. /// @see Directory#makeLock(const String&) class LPPAPI Lock : public LuceneObject { public: virtual ~Lock(); LUCENE_CLASS(Lock); public: /// How long {@link #obtain(int64_t)} waits, in milliseconds, in between attempts to acquire the lock. static const int32_t LOCK_OBTAIN_WAIT_FOREVER; /// Pass this value to {@link #obtain(int64_t)} to try forever to obtain the lock. static const int32_t LOCK_POLL_INTERVAL; public: /// Attempts to obtain exclusive access and immediately return upon success or failure. /// @return true if exclusive access is obtained. virtual bool obtain() = 0; /// Releases exclusive access. virtual void release() = 0; /// Returns true if the resource is currently locked. Note that one must still call {@link #obtain()} /// before using the resource. virtual bool isLocked() = 0; /// Attempts to obtain an exclusive lock within amount of time given. Polls once per {@link #LOCK_POLL_INTERVAL} /// (currently 1000) milliseconds until lockWaitTimeout is passed. /// @param lockWaitTimeout length of time to wait in milliseconds or {@link #LOCK_OBTAIN_WAIT_FOREVER} /// to retry forever. /// @return true if lock was obtained. bool obtain(int32_t lockWaitTimeout); }; } #endif LucenePlusPlus-rel_3.0.4/include/LockFactory.h000066400000000000000000000042151217574114600213310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOCKFACTORY_H #define LOCKFACTORY_H #include "LuceneObject.h" namespace Lucene { /// Base class for Locking implementation. {@link Directory} uses /// instances of this class to implement locking. /// Note that there are some useful tools to verify that /// your LockFactory is working correctly: {@link /// VerifyingLockFactory}, {@link LockStressTest}, {@link /// LockVerifyServer}. /// @see LockVerifyServer /// @see LockStressTest /// @see VerifyingLockFactory class LPPAPI LockFactory : public LuceneObject { public: virtual ~LockFactory(); LUCENE_CLASS(LockFactory); protected: String lockPrefix; public: /// Set the prefix in use for all locks created in this LockFactory. This is normally called once, when a /// Directory gets this LockFactory instance. However, you can also call this (after this instance is /// assigned to a Directory) to override the prefix in use. This is helpful if you're running Lucene on /// machines that have different mount points for the same shared directory. virtual void setLockPrefix(const String& lockPrefix); /// Get the prefix in use for all locks created in this LockFactory. virtual String getLockPrefix(); /// Return a new Lock instance identified by lockName. /// @param lockName name of the lock to be created. virtual LockPtr makeLock(const String& lockName) = 0; /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you /// are certain this lock is no longer in use. /// @param lockName name of the lock to be cleared. virtual void clearLock(const String& lockName) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/LogByteSizeMergePolicy.h000066400000000000000000000052771217574114600234620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOGBYTESIZEMERGEPOLICY_H #define LOGBYTESIZEMERGEPOLICY_H #include "LogMergePolicy.h" namespace Lucene { /// This is a {@link LogMergePolicy} that measures size of a segment as the total byte size of the /// segment's files. class LPPAPI LogByteSizeMergePolicy : public LogMergePolicy { public: LogByteSizeMergePolicy(IndexWriterPtr writer); virtual ~LogByteSizeMergePolicy(); LUCENE_CLASS(LogByteSizeMergePolicy); public: /// Default minimum segment size. @see setMinMergeMB. static const double DEFAULT_MIN_MERGE_MB; /// Default maximum segment size. A segment of this size or larger will never be merged. /// @see setMaxMergeMB static const double DEFAULT_MAX_MERGE_MB; protected: virtual int64_t size(SegmentInfoPtr info); public: /// Determines the largest segment (measured by total byte size of the segment's files, in MB) /// that may be merged with other segments. Small values (eg., less than 50 MB) are best for /// interactive indexing, as this limits the length of pauses while indexing to a few seconds. /// Larger values are best for batched indexing and speedier searches. /// /// Note that {@link #setMaxMergeDocs} is also used to check whether a segment is too large for /// merging (it's either or). void setMaxMergeMB(double mb); /// Returns the largest segment (measured by total byte size of the segment's files, in MB) that /// may be merged with other segments. @see #setMaxMergeMB double getMaxMergeMB(); /// Sets the minimum size for the lowest level segments. Any segments below this size are /// considered to be on the same level (even if they vary drastically in size) and will be merged /// whenever there are mergeFactor of them. This effectively truncates the "long tail" of small /// segments that would otherwise be created into a single level. If you set this too large, it /// could greatly increase the merging cost during indexing (if you flush many small segments). void setMinMergeMB(double mb); /// Get the minimum size for a segment to remain un-merged. @see #setMinMergeMB double getMinMergeMB(); }; } #endif LucenePlusPlus-rel_3.0.4/include/LogDocMergePolicy.h000066400000000000000000000033401217574114600224160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOGDOCMERGEPOLICY_H #define LOGDOCMERGEPOLICY_H #include "LogMergePolicy.h" namespace Lucene { /// This is a {@link LogMergePolicy} that measures size of a segment as the number of documents /// (not taking deletions into account). class LPPAPI LogDocMergePolicy : public LogMergePolicy { public: LogDocMergePolicy(IndexWriterPtr writer); virtual ~LogDocMergePolicy(); LUCENE_CLASS(LogDocMergePolicy); public: /// Default minimum segment size. @see setMinMergeDocs static const int32_t DEFAULT_MIN_MERGE_DOCS; protected: virtual int64_t size(SegmentInfoPtr info); public: /// Sets the minimum size for the lowest level segments. Any segments below this size are considered /// to be on the same level (even if they vary drastically in size) and will be merged whenever there /// are mergeFactor of them. This effectively truncates the "long tail" of small segments that would /// otherwise be created into a single level. If you set this too large, it could greatly increase the /// merging cost during indexing (if you flush many small segments). void setMinMergeDocs(int32_t minMergeDocs); /// Get the minimum size for a segment to remain un-merged. @see #setMinMergeDocs int32_t getMinMergeDocs(); }; } #endif LucenePlusPlus-rel_3.0.4/include/LogMergePolicy.h000066400000000000000000000203741217574114600217760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOGMERGEPOLICY_H #define LOGMERGEPOLICY_H #include "MergePolicy.h" namespace Lucene { /// This class implements a {@link MergePolicy} that tries to merge segments into levels of exponentially /// increasing size, where each level has fewer segments than the value of the merge factor. Whenever extra /// segments (beyond the merge factor upper bound) are encountered, all segments within the level are merged. /// You can get or set the merge factor using {@link #getMergeFactor()} and {@link #setMergeFactor(int)} /// respectively. /// /// This class is abstract and requires a subclass to define the {@link #size} method which specifies how a /// segment's size is determined. {@link LogDocMergePolicy} is one subclass that measures size by document /// count in the segment. {@link LogByteSizeMergePolicy} is another subclass that measures size as the total /// byte size of the file(s) for the segment. class LPPAPI LogMergePolicy : public MergePolicy { public: LogMergePolicy(IndexWriterPtr writer); virtual ~LogMergePolicy(); LUCENE_CLASS(LogMergePolicy); protected: int32_t mergeFactor; double noCFSRatio; bool calibrateSizeByDeletes; bool _useCompoundFile; bool _useCompoundDocStore; public: /// Defines the allowed range of log(size) for each level. A level is computed by taking the max segment /// log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range. static const double LEVEL_LOG_SPAN; /// Default merge factor, which is how many segments are merged at a time. static const int32_t DEFAULT_MERGE_FACTOR; /// Default maximum segment size. A segment of this size or larger will never be merged. /// @see setMaxMergeDocs static const int32_t DEFAULT_MAX_MERGE_DOCS; /// Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it. /// @see #setNoCFSRatio static const double DEFAULT_NO_CFS_RATIO; int64_t minMergeSize; int64_t maxMergeSize; int32_t maxMergeDocs; public: /// @see #setNoCFSRatio double getNoCFSRatio(); /// If a merged segment will be more than this percentage of the total size of the index, leave the segment as /// non-compound file even if compound file is enabled. Set to 1.0 to always use CFS regardless of merge size. void setNoCFSRatio(double noCFSRatio); /// Returns the number of segments that are merged at once and also controls the total number of segments /// allowed to accumulate in the index. int32_t getMergeFactor(); /// Determines how often segment indices are merged by addDocument(). With smaller values, less RAM is /// used while indexing, and searches on unoptimized indices are faster, but indexing speed is slower. /// With larger values, more RAM is used during indexing, and while searches on unoptimized indices are /// slower, indexing is faster. Thus larger values (> 10) are best for batch index creation, and smaller /// values (< 10) for indices that are interactively maintained. void setMergeFactor(int32_t mergeFactor); /// Returns true if a newly flushed (not from merge) segment should use the compound file format. virtual bool useCompoundFile(SegmentInfosPtr segments, SegmentInfoPtr newSegment); /// Sets whether compound file format should be used for newly flushed and newly merged segments. void setUseCompoundFile(bool useCompoundFile); /// Returns true if newly flushed and newly merge segments are written in compound file format. /// @see #setUseCompoundFile bool getUseCompoundFile(); /// Returns true if the doc store files should use the compound file format. virtual bool useCompoundDocStore(SegmentInfosPtr segments); /// Sets whether compound file format should be used for newly flushed and newly merged doc store /// segment files (term vectors and stored fields). void setUseCompoundDocStore(bool useCompoundDocStore); /// Returns true if newly flushed and newly merge doc store segment files (term vectors and stored fields) /// are written in compound file format. @see #setUseCompoundDocStore bool getUseCompoundDocStore(); /// Sets whether the segment size should be calibrated by the number of deletes when choosing segments /// for merge. void setCalibrateSizeByDeletes(bool calibrateSizeByDeletes); /// Returns true if the segment size should be calibrated by the number of deletes when choosing segments /// for merge. bool getCalibrateSizeByDeletes(); /// Release all resources for the policy. virtual void close(); /// Returns the merges necessary to optimize the index. This merge policy defines "optimized" to mean only /// one segment in the index, where that segment has no deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile setting is true. This method returns multiple merges /// (mergeFactor at a time) so the {@link MergeScheduler} in use may make use of concurrency. virtual MergeSpecificationPtr findMergesForOptimize(SegmentInfosPtr segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize); /// Finds merges necessary to expunge all deletes from the index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. virtual MergeSpecificationPtr findMergesToExpungeDeletes(SegmentInfosPtr segmentInfos); /// Checks if any merges are now necessary and returns a {@link MergePolicy.MergeSpecification} if so. A merge /// is necessary when there are more than {@link #setMergeFactor} segments at a given level. When multiple /// levels have too many segments, this method will return multiple merges, allowing the {@link MergeScheduler} /// to use concurrency. virtual MergeSpecificationPtr findMerges(SegmentInfosPtr segmentInfos); /// Determines the largest segment (measured by document count) that may be merged with other segments. /// Small values (eg., less than 10,000) are best for interactive indexing, as this limits the length of /// pauses while indexing to a few seconds. Larger values are best for batched indexing and speedier searches. /// /// The default value is INT_MAX. /// /// The default merge policy ({@link LogByteSizeMergePolicy}) also allows you to set this limit by net size /// (in MB) of the segment, using {@link LogByteSizeMergePolicy#setMaxMergeMB}. void setMaxMergeDocs(int32_t maxMergeDocs); /// Returns the largest segment (measured by document count) that may be merged with other segments. /// @see #setMaxMergeDocs int32_t getMaxMergeDocs(); protected: bool verbose(); void message(const String& message); virtual int64_t size(SegmentInfoPtr info) = 0; int64_t sizeDocs(SegmentInfoPtr info); int64_t sizeBytes(SegmentInfoPtr info); bool isOptimized(SegmentInfosPtr infos, int32_t maxNumSegments, SetSegmentInfo segmentsToOptimize); /// Returns true if this single info is optimized (has no pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting bool isOptimized(SegmentInfoPtr info); OneMergePtr makeOneMerge(SegmentInfosPtr infos, SegmentInfosPtr infosToMerge); }; } #endif LucenePlusPlus-rel_3.0.4/include/LowerCaseFilter.h000066400000000000000000000014541217574114600221450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOWERCASEFILTER_H #define LOWERCASEFILTER_H #include "TokenFilter.h" namespace Lucene { /// Normalizes token text to lower case. class LPPAPI LowerCaseFilter : public TokenFilter { public: LowerCaseFilter(TokenStreamPtr input); virtual ~LowerCaseFilter(); LUCENE_CLASS(LowerCaseFilter); protected: TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/include/LowerCaseTokenizer.h000066400000000000000000000033511217574114600226700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOWERCASETOKENIZER_H #define LOWERCASETOKENIZER_H #include "LetterTokenizer.h" namespace Lucene { /// LowerCaseTokenizer performs the function of LetterTokenizer and LowerCaseFilter together. It divides text at /// non-letters and converts them to lower case. While it is functionally equivalent to the combination of /// LetterTokenizer and LowerCaseFilter, there is a performance advantage to doing the two tasks at once, hence /// this (redundant) implementation. /// /// Note: this does a decent job for most European languages, but does a terrible job for some Asian languages, /// where words are not separated by spaces. class LPPAPI LowerCaseTokenizer : public LetterTokenizer { public: /// Construct a new LowerCaseTokenizer. LowerCaseTokenizer(ReaderPtr input); /// Construct a new LowerCaseTokenizer using a given {@link AttributeSource}. LowerCaseTokenizer(AttributeSourcePtr source, ReaderPtr input); /// Construct a new LowerCaseTokenizer using a given {@link AttributeFactory}. LowerCaseTokenizer(AttributeFactoryPtr factory, ReaderPtr input); virtual ~LowerCaseTokenizer(); LUCENE_CLASS(LowerCaseTokenizer); public: /// Converts char to lower case CharFolder::toLower. virtual wchar_t normalize(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.4/include/Lucene.h000066400000000000000000000236341217574114600203320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENE_H #define LUCENE_H #include "Config.h" #include #include #include #include #include #include #include #include #include #include #include #include using boost::int8_t; using boost::uint8_t; using boost::int16_t; using boost::uint16_t; using boost::int32_t; using boost::uint32_t; using boost::int64_t; using boost::uint64_t; #define SIZEOF_ARRAY(arr) (sizeof(arr) / sizeof((arr)[0])) #include "LuceneTypes.h" #include "LuceneAllocator.h" namespace boost { struct blank; class thread; class any; template < typename Signature > class function; namespace interprocess { class file_lock; } namespace posix_time { class ptime; } } namespace Lucene { typedef std::basic_string< char, std::char_traits, LuceneAllocator > SingleString; typedef std::basic_ostringstream< char, std::char_traits, LuceneAllocator > SingleStringStream; typedef std::basic_string< wchar_t, std::char_traits, LuceneAllocator > String; typedef std::basic_ostringstream< wchar_t, std::char_traits, LuceneAllocator > StringStream; const std::basic_string< wchar_t, std::char_traits, LuceneAllocator > EmptyString; typedef boost::shared_ptr filelockPtr; typedef boost::shared_ptr threadPtr; typedef boost::shared_ptr ofstreamPtr; typedef boost::shared_ptr ifstreamPtr; typedef boost::shared_ptr localePtr; } #include "LuceneFactory.h" #include "LuceneException.h" #include "Array.h" #include "Collection.h" #include "Map.h" #include "Set.h" #include "HashMap.h" #include "HashSet.h" #include "Constants.h" namespace Lucene { typedef Array ByteArray; typedef Array IntArray; typedef Array LongArray; typedef Array CharArray; typedef Array DoubleArray; template struct luceneEquals { inline bool operator()(const TYPE& first, const TYPE& second) const { return first ? first->equals(second) : (!first && !second); } }; template struct luceneEqualTo { luceneEqualTo(const TYPE& type) : equalType(type) {} inline bool operator()(const TYPE& other) const { return equalType->equals(other); } const TYPE& equalType; }; template struct luceneWeakEquals { inline bool operator()(const TYPE& first, const TYPE& second) const { if (first.expired() || second.expired()) return (first.expired() && second.expired()); return first.lock()->equals(second.lock()); } }; template struct luceneHash : std::unary_function { std::size_t operator()(const TYPE& type) const { return type ? type->hashCode() : 0; } }; template struct luceneWeakHash : std::unary_function { std::size_t operator()(const TYPE& type) const { return type.expired() ? 0 : type.lock()->hashCode(); } }; template struct luceneCompare { inline bool operator()(const TYPE& first, const TYPE& second) const { if (!second) return false; if (!first) return true; return (first->compareTo(second) < 0); } }; typedef boost::blank VariantNull; typedef boost::variant FieldsData; typedef boost::variant ComparableValue; typedef boost::variant NumericValue; typedef boost::variant StringValue; typedef boost::variant, Collection, Collection, VariantNull> CollectionValue; typedef HashSet< SegmentInfoPtr, luceneHash, luceneEquals > SetSegmentInfo; typedef HashSet< MergeThreadPtr, luceneHash, luceneEquals > SetMergeThread; typedef HashSet< OneMergePtr, luceneHash, luceneEquals > SetOneMerge; typedef HashSet< QueryPtr, luceneHash, luceneEquals > SetQuery; typedef HashSet< TermPtr, luceneHash, luceneEquals > SetTerm; typedef HashSet< BooleanClausePtr, luceneHash, luceneEquals > SetBooleanClause; typedef HashSet< ReaderFieldPtr, luceneHash, luceneEquals > SetReaderField; typedef HashSet SetByteArray; typedef HashMap< String, String > MapStringString; typedef HashMap< wchar_t, NormalizeCharMapPtr > MapCharNormalizeCharMap; typedef HashMap< String, AnalyzerPtr > MapStringAnalyzer; typedef HashMap< String, ByteArray > MapStringByteArray; typedef HashMap< String, int32_t > MapStringInt; typedef HashMap< String, FieldInfoPtr > MapStringFieldInfo; typedef HashMap< String, Collection > MapStringCollectionTermVectorEntry; typedef HashMap< String, RefCountPtr > MapStringRefCount; typedef HashMap< int32_t, TermVectorsPositionInfoPtr > MapIntTermVectorsPositionInfo; typedef HashMap< String, MapIntTermVectorsPositionInfo > MapStringMapIntTermVectorsPositionInfo; typedef HashMap< String, NormPtr > MapStringNorm; typedef HashMap< String, TermVectorEntryPtr > MapStringTermVectorEntry; typedef HashMap< String, RAMFilePtr > MapStringRAMFile; typedef HashMap< int32_t, ByteArray > MapIntByteArray; typedef HashMap< int32_t, FilterItemPtr > MapIntFilterItem; typedef HashMap< int32_t, double > MapIntDouble; typedef HashMap< int64_t, int32_t > MapLongInt; typedef HashMap< String, double > MapStringDouble; typedef HashMap< int32_t, CachePtr > MapStringCache; typedef HashMap< String, LockPtr > MapStringLock; typedef HashMap< SegmentInfoPtr, SegmentReaderPtr, luceneHash, luceneEquals > MapSegmentInfoSegmentReader; typedef HashMap< SegmentInfoPtr, int32_t, luceneHash, luceneEquals > MapSegmentInfoInt; typedef HashMap< DocFieldConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField; typedef HashMap< InvertedDocConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField; typedef HashMap< InvertedDocEndConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField; typedef HashMap< TermsHashConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField; typedef HashMap< FieldInfoPtr, Collection, luceneHash, luceneEquals > MapFieldInfoCollectionNormsWriterPerField; typedef HashMap< IndexReaderPtr, HashSet, luceneHash, luceneEquals > MapIndexReaderSetString; typedef HashMap< TermPtr, int32_t, luceneHash, luceneEquals > MapTermInt; typedef HashMap< QueryPtr, int32_t, luceneHash, luceneEquals > MapQueryInt; typedef HashMap< EntryPtr, boost::any, luceneHash, luceneEquals > MapEntryAny; typedef HashMap< PhrasePositionsPtr, LuceneObjectPtr, luceneHash, luceneEquals > MapPhrasePositionsLuceneObject; typedef HashMap< ReaderFieldPtr, SetReaderField, luceneHash, luceneEquals > MapReaderFieldSetReaderField; typedef WeakHashMap< LuceneObjectWeakPtr, LuceneObjectPtr, luceneWeakHash, luceneWeakEquals > WeakMapObjectObject; typedef WeakHashMap< LuceneObjectWeakPtr, MapEntryAny, luceneWeakHash, luceneWeakEquals > WeakMapLuceneObjectMapEntryAny; typedef Map< String, AttributePtr > MapStringAttribute; typedef Map< int64_t, DocumentsWriterThreadStatePtr > MapThreadDocumentsWriterThreadState; typedef Map< String, IndexReaderPtr > MapStringIndexReader; typedef Map< TermPtr, NumPtr, luceneCompare > MapTermNum; typedef boost::function TermVectorEntryComparator; template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > class SimpleLRUCache; typedef SimpleLRUCache< TermPtr, TermInfoPtr, luceneHash, luceneEquals > TermInfoCache; typedef boost::shared_ptr TermInfoCachePtr; } #include "Synchronize.h" #include "CycleCheck.h" #if defined(LPP_BUILDING_LIB) || defined(LPP_EXPOSE_INTERNAL) #define INTERNAL public #else #define INTERNAL protected #endif #endif LucenePlusPlus-rel_3.0.4/include/LuceneAllocator.h000066400000000000000000000070161217574114600221670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ALLOCATOR_H #define ALLOCATOR_H #include "Config.h" namespace Lucene { /// Allocate block of memory. LPPAPI void* AllocMemory(size_t size); /// Reallocate a given block of memory. LPPAPI void* ReallocMemory(void* memory, size_t size); /// Release a given block of memory. LPPAPI void FreeMemory(void* memory); /// Release thread cache. Note: should be called whenever a thread /// exits and using nedmalloc. LPPAPI void ReleaseThreadCache(); #ifdef LPP_USE_ALLOCATOR /// Custom stl allocator used to help exporting stl container across process /// borders. It can also calls custom memory allocation functions that can /// help track memory leaks and/or improve performance over standard allocators. /// @see #AllocMemory(size_t) /// @see #FreeMemory(void*) template class LuceneAllocator { public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef TYPE* pointer; typedef const TYPE* const_pointer; typedef TYPE& reference; typedef const TYPE& const_reference; typedef TYPE value_type; LuceneAllocator() { } LuceneAllocator(const LuceneAllocator&) { } pointer allocate(size_type n, const void* = 0) { return (TYPE*)AllocMemory((size_t)(n * sizeof(TYPE))); } void deallocate(void* p, size_type) { if (p != NULL) FreeMemory(p); } pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } LuceneAllocator& operator= (const LuceneAllocator&) { return *this; } void construct(pointer p, const TYPE& val) { new ((TYPE*)p) TYPE(val); } void destroy(pointer p) { p->~TYPE(); } size_type max_size() const { return size_t(-1); } template struct rebind { typedef LuceneAllocator other; }; template LuceneAllocator(const LuceneAllocator&) { } }; template inline bool operator== (const LuceneAllocator&, const LuceneAllocator&) { return true; } template inline bool operator!= (const LuceneAllocator&, const LuceneAllocator&) { return false; } template <> class LuceneAllocator { public: typedef void* pointer; typedef const void* const_pointer; typedef void value_type; LuceneAllocator() { } LuceneAllocator(const LuceneAllocator&) { } template struct rebind { typedef LuceneAllocator other; }; template LuceneAllocator(const LuceneAllocator&) { } }; #endif } #ifndef LPP_USE_ALLOCATOR #define LuceneAllocator std::allocator #endif #endif LucenePlusPlus-rel_3.0.4/include/LuceneException.h000066400000000000000000000111631217574114600222030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENEEXCEPTION_H #define LUCENEEXCEPTION_H #include "Lucene.h" namespace Lucene { /// Lucene exception container. class LPPAPI LuceneException : public std::exception { public: enum ExceptionType { Null, AlreadyClosed, Compression, CorruptIndex, FieldReader, FileNotFound, IllegalArgument, IllegalState, IndexOutOfBounds, IO, LockObtainFailed, LockReleaseFailed, Lookahead, MergeAborted, Merge, NoSuchDirectory, NullPointer, NumberFormat, OutOfMemory, Parse, QueryParser, Runtime, StaleReader, StopFillCache, Temporary, TimeExceeded, TooManyClauses, UnsupportedOperation }; LuceneException(const String& error = EmptyString, LuceneException::ExceptionType type = Null) throw(); ~LuceneException() throw(); protected: ExceptionType type; String error; public: ExceptionType getType() const; String getError() const; bool isNull() const; void throwException(); }; template class ExceptionTemplate : public ParentException { public: ExceptionTemplate(const String& error = EmptyString, LuceneException::ExceptionType type = Type) : ParentException(error, type) { } }; typedef ExceptionTemplate RuntimeException; typedef ExceptionTemplate OutOfMemoryError; typedef ExceptionTemplate TemporaryException; typedef ExceptionTemplate IllegalStateException; typedef ExceptionTemplate IllegalArgumentException; typedef ExceptionTemplate IndexOutOfBoundsException; typedef ExceptionTemplate NullPointerException; typedef ExceptionTemplate FieldReaderException; typedef ExceptionTemplate MergeException; typedef ExceptionTemplate StopFillCacheException; typedef ExceptionTemplate TimeExceededException; typedef ExceptionTemplate TooManyClausesException; typedef ExceptionTemplate UnsupportedOperationException; typedef ExceptionTemplate NumberFormatException; typedef ExceptionTemplate AlreadyClosedException; typedef ExceptionTemplate IOException; typedef ExceptionTemplate CorruptIndexException; typedef ExceptionTemplate FileNotFoundException; typedef ExceptionTemplate LockObtainFailedException; typedef ExceptionTemplate LockReleaseFailedException; typedef ExceptionTemplate MergeAbortedException; typedef ExceptionTemplate StaleReaderException; typedef ExceptionTemplate NoSuchDirectoryException; typedef ExceptionTemplate LookaheadSuccess; typedef ExceptionTemplate ParseException; typedef ExceptionTemplate QueryParserError; typedef ExceptionTemplate CompressionException; } #endif LucenePlusPlus-rel_3.0.4/include/LuceneFactory.h000066400000000000000000000164631217574114600216640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENEFACTORY_H #define LUCENEFACTORY_H #include #include namespace Lucene { template boost::shared_ptr newInstance() { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T); #else return boost::allocate_shared(LuceneAllocator()); #endif } template boost::shared_ptr newInstance(A1 const& a1) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1)); #else return boost::allocate_shared(LuceneAllocator(), a1); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2)); #else return boost::allocate_shared(LuceneAllocator(), a1, a2); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3)); #else return boost::allocate_shared(LuceneAllocator(), a1, a2, a3); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4)); #else return boost::allocate_shared(LuceneAllocator(), a1, a2, a3, a4); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5)); #else return boost::allocate_shared(LuceneAllocator(), a1, a2, a3, a4, a5); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6)); #else return boost::allocate_shared(LuceneAllocator(), a1, a2, a3, a4, a5, a6); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7)); #else return boost::allocate_shared(LuceneAllocator(), a1, a2, a3, a4, a5, a6, a7); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8)); #else return boost::allocate_shared(LuceneAllocator(), a1, a2, a3, a4, a5, a6, a7, a8); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8, A9 const& a9) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9)); #else return boost::allocate_shared(LuceneAllocator(), a1, a2, a3, a4, a5, a6, a7, a8, a9); #endif } template boost::shared_ptr newLucene() { boost::shared_ptr instance(newInstance()); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1) { boost::shared_ptr instance(newInstance(a1)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2) { boost::shared_ptr instance(newInstance(a1, a2)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3) { boost::shared_ptr instance(newInstance(a1, a2, a3)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7, a8)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8, A9 const& a9) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7, a8, a9)); instance->initialize(); return instance; } } #endif LucenePlusPlus-rel_3.0.4/include/LuceneHeaders.h000066400000000000000000000061361217574114600216240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENEHEADERS_H #define LUCENEHEADERS_H #include "Lucene.h" // Include most common files: analysis #include "StandardAnalyzer.h" #include "StandardFilter.h" #include "StandardTokenizer.h" #include "KeywordAnalyzer.h" #include "KeywordTokenizer.h" #include "LowerCaseFilter.h" #include "LowerCaseTokenizer.h" #include "PerFieldAnalyzerWrapper.h" #include "PorterStemFilter.h" #include "SimpleAnalyzer.h" #include "StopAnalyzer.h" #include "StopFilter.h" #include "Token.h" #include "TokenFilter.h" #include "WhitespaceAnalyzer.h" #include "WhitespaceTokenizer.h" // Include most common files: document #include "DateField.h" #include "DateTools.h" #include "Document.h" #include "Field.h" #include "NumberTools.h" #include "NumericField.h" // Include most common files: index #include "IndexCommit.h" #include "IndexDeletionPolicy.h" #include "IndexReader.h" #include "IndexWriter.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "LogByteSizeMergePolicy.h" #include "LogDocMergePolicy.h" #include "LogMergePolicy.h" #include "MergeScheduler.h" #include "MultiReader.h" #include "ParallelReader.h" #include "Term.h" #include "TermDocs.h" #include "TermEnum.h" // Include most common files: queryparser #include "MultiFieldQueryParser.h" #include "QueryParseError.h" #include "QueryParser.h" // Include most common files: search #include "BooleanClause.h" #include "BooleanQuery.h" #include "DocIdSet.h" #include "DocIdSetIterator.h" #include "Explanation.h" #include "IndexSearcher.h" #include "MatchAllDocsQuery.h" #include "MultiPhraseQuery.h" #include "MultiSearcher.h" #include "MultiTermQuery.h" #include "NumericRangeFilter.h" #include "NumericRangeQuery.h" #include "ParallelMultiSearcher.h" #include "PhraseQuery.h" #include "PrefixFilter.h" #include "PrefixQuery.h" #include "ScoreDoc.h" #include "Scorer.h" #include "Searcher.h" #include "Sort.h" #include "TermQuery.h" #include "TermRangeFilter.h" #include "TermRangeQuery.h" #include "TopDocs.h" #include "TopDocsCollector.h" #include "TopFieldCollector.h" #include "TopScoreDocCollector.h" #include "Weight.h" #include "WildcardQuery.h" #include "SpanFirstQuery.h" #include "SpanNearQuery.h" #include "SpanNotQuery.h" #include "SpanOrQuery.h" #include "SpanQuery.h" // Include most common files: store #include "FSDirectory.h" #include "MMapDirectory.h" #include "RAMDirectory.h" #include "RAMFile.h" #include "RAMInputStream.h" #include "RAMOutputStream.h" #include "SimpleFSDirectory.h" // Include most common files: util #include "MiscUtils.h" #include "StringUtils.h" #include "BufferedReader.h" #include "DocIdBitSet.h" #include "FileReader.h" #include "InfoStream.h" #include "LuceneThread.h" #include "OpenBitSet.h" #include "OpenBitSetDISI.h" #include "OpenBitSetIterator.h" #include "StringReader.h" #include "ThreadPool.h" #endif LucenePlusPlus-rel_3.0.4/include/LuceneObject.h000066400000000000000000000040711217574114600214530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENEOBJECT_H #define LUCENEOBJECT_H #include #include "LuceneSync.h" #ifdef LPP_USE_CYCLIC_CHECK #define LUCENE_INTERFACE(Name) \ static String _getClassName() { return L###Name; } \ virtual String getClassName() { return L###Name; } \ CycleCheckT cycleCheck; #else #define LUCENE_INTERFACE(Name) \ static String _getClassName() { return L###Name; } \ virtual String getClassName() { return L###Name; } #endif #define LUCENE_CLASS(Name) \ LUCENE_INTERFACE(Name); \ boost::shared_ptr shared_from_this() { return boost::static_pointer_cast(LuceneObject::shared_from_this()); } \ namespace Lucene { /// Base class for all Lucene classes class LPPAPI LuceneObject : public LuceneSync, public boost::enable_shared_from_this { public: virtual ~LuceneObject(); protected: LuceneObject(); public: /// Called directly after instantiation to create objects that depend on this object being /// fully constructed. virtual void initialize(); /// Return clone of this object /// @param other clone reference - null when called initially, then set in top virtual override. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Return hash code for this object. virtual int32_t hashCode(); /// Return whether two objects are equal virtual bool equals(LuceneObjectPtr other); /// Compare two objects virtual int32_t compareTo(LuceneObjectPtr other); /// Returns a string representation of the object virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/LuceneSignal.h000066400000000000000000000022221217574114600214560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENESIGNAL_H #define LUCENESIGNAL_H #include #include "Lucene.h" namespace Lucene { /// Utility class to support signaling notifications. class LPPAPI LuceneSignal { public: LuceneSignal(SynchronizePtr objectLock = SynchronizePtr()); virtual ~LuceneSignal(); protected: boost::mutex waitMutex; boost::condition signalCondition; SynchronizePtr objectLock; public: /// create a new LuceneSignal instance atomically. static void createSignal(LuceneSignalPtr& signal, SynchronizePtr objectLock); /// Wait for signal using an optional timeout. void wait(int32_t timeout = 0); /// Notify all threads waiting for signal. void notifyAll(); }; } #endif LucenePlusPlus-rel_3.0.4/include/LuceneSync.h000066400000000000000000000025421217574114600211620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENESYNC_H #define LUCENESYNC_H #include "Lucene.h" namespace Lucene { /// Base class for all Lucene synchronised classes class LPPAPI LuceneSync { public: virtual ~LuceneSync(); protected: SynchronizePtr objectLock; LuceneSignalPtr objectSignal; public: /// Return this object synchronize lock. virtual SynchronizePtr getSync(); /// Return this object signal. virtual LuceneSignalPtr getSignal(); /// Lock this object using an optional timeout. virtual void lock(int32_t timeout = 0); /// Unlock this object. virtual void unlock(); /// Returns true if this object is currently locked by current thread. virtual bool holdsLock(); /// Wait for signal using an optional timeout. virtual void wait(int32_t timeout = 0); /// Notify all threads waiting for signal. virtual void notifyAll(); }; } #endif LucenePlusPlus-rel_3.0.4/include/LuceneThread.h000066400000000000000000000053531217574114600214600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENETHREAD_H #define LUCENETHREAD_H #include "LuceneObject.h" namespace Lucene { /// Lucene thread container. /// /// It seems there are major issues with using boost::thread::id under Windows. /// After many hours of debugging and trying various strategies, I was unable to fix an /// occasional crash whereby boost::thread::thread_data was being deleted prematurely. /// /// This problem is most visible when running the AtomicUpdateTest test suite. /// /// Therefore, I now uniquely identify threads by their native id. class LPPAPI LuceneThread : public LuceneObject { public: LuceneThread(); virtual ~LuceneThread(); LUCENE_CLASS(LuceneThread); public: static const int32_t MAX_PRIORITY; static const int32_t NORM_PRIORITY; static const int32_t MIN_PRIORITY; protected: threadPtr thread; /// Flag to indicate running thread. /// @see #isAlive bool running; public: /// start thread see {@link #run}. virtual void start(); /// return whether thread is current running. virtual bool isAlive(); /// set running thread priority. virtual void setPriority(int32_t priority); /// return running thread priority. virtual int32_t getPriority(); /// wait for thread to finish using an optional timeout. virtual bool join(int32_t timeout = 0); /// causes the currently executing thread object to temporarily pause and allow other threads to execute. virtual void yield(); /// override to provide the body of the thread. virtual void run() = 0; /// Return representation of current execution thread. static int64_t currentId(); /// Suspends current execution thread for a given time. static void threadSleep(int32_t time); /// Yield current execution thread. static void threadYield(); protected: /// set thread running state. void setRunning(bool running); /// return thread running state. bool isRunning(); /// function that controls the lifetime of the running thread. static void runThread(LuceneThread* thread); }; } #endif LucenePlusPlus-rel_3.0.4/include/LuceneTypes.h000066400000000000000000000536751217574114600213670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENETYPES_H #define LUCENETYPES_H #include "Lucene.h" #define DECLARE_SHARED_PTR(Type) \ class Type; \ typedef boost::shared_ptr Type##Ptr; \ typedef boost::weak_ptr Type##WeakPtr; namespace Lucene { // analysis DECLARE_SHARED_PTR(Analyzer) DECLARE_SHARED_PTR(ASCIIFoldingFilter) DECLARE_SHARED_PTR(BaseCharFilter) DECLARE_SHARED_PTR(CachingTokenFilter) DECLARE_SHARED_PTR(CharArraySet) DECLARE_SHARED_PTR(CharFilter) DECLARE_SHARED_PTR(CharReader) DECLARE_SHARED_PTR(CharStream) DECLARE_SHARED_PTR(CharTokenizer) DECLARE_SHARED_PTR(FlagsAttribute) DECLARE_SHARED_PTR(ISOLatin1AccentFilter) DECLARE_SHARED_PTR(KeywordAnalyzer) DECLARE_SHARED_PTR(KeywordTokenizer) DECLARE_SHARED_PTR(LengthFilter) DECLARE_SHARED_PTR(LetterTokenizer) DECLARE_SHARED_PTR(LowerCaseFilter) DECLARE_SHARED_PTR(LowerCaseTokenizer) DECLARE_SHARED_PTR(MappingCharFilter) DECLARE_SHARED_PTR(NormalizeCharMap) DECLARE_SHARED_PTR(NumericTokenStream) DECLARE_SHARED_PTR(OffsetAttribute) DECLARE_SHARED_PTR(PayloadAttribute) DECLARE_SHARED_PTR(PerFieldAnalyzerWrapper) DECLARE_SHARED_PTR(PorterStemFilter) DECLARE_SHARED_PTR(PorterStemmer) DECLARE_SHARED_PTR(PositionIncrementAttribute) DECLARE_SHARED_PTR(SimpleAnalyzer) DECLARE_SHARED_PTR(SinkFilter) DECLARE_SHARED_PTR(SinkTokenStream) DECLARE_SHARED_PTR(StandardAnalyzer) DECLARE_SHARED_PTR(StandardAnalyzerSavedStreams) DECLARE_SHARED_PTR(StandardFilter) DECLARE_SHARED_PTR(StandardTokenizer) DECLARE_SHARED_PTR(StandardTokenizerImpl) DECLARE_SHARED_PTR(StopAnalyzer) DECLARE_SHARED_PTR(StopAnalyzerSavedStreams) DECLARE_SHARED_PTR(StopFilter) DECLARE_SHARED_PTR(TeeSinkTokenFilter) DECLARE_SHARED_PTR(TermAttribute) DECLARE_SHARED_PTR(Token) DECLARE_SHARED_PTR(TokenAttributeFactory) DECLARE_SHARED_PTR(TokenFilter) DECLARE_SHARED_PTR(Tokenizer) DECLARE_SHARED_PTR(TokenStream) DECLARE_SHARED_PTR(TypeAttribute) DECLARE_SHARED_PTR(WhitespaceAnalyzer) DECLARE_SHARED_PTR(WhitespaceTokenizer) DECLARE_SHARED_PTR(WordlistLoader) // document DECLARE_SHARED_PTR(AbstractField) DECLARE_SHARED_PTR(CompressionTools) DECLARE_SHARED_PTR(DateField) DECLARE_SHARED_PTR(DateTools) DECLARE_SHARED_PTR(Document) DECLARE_SHARED_PTR(Field) DECLARE_SHARED_PTR(Fieldable) DECLARE_SHARED_PTR(FieldSelector) DECLARE_SHARED_PTR(LoadFirstFieldSelector) DECLARE_SHARED_PTR(MapFieldSelector) DECLARE_SHARED_PTR(NumberTools) DECLARE_SHARED_PTR(NumericField) DECLARE_SHARED_PTR(SetBasedFieldSelector) // index DECLARE_SHARED_PTR(AbstractAllTermDocs) DECLARE_SHARED_PTR(AllTermDocs) DECLARE_SHARED_PTR(BufferedDeletes) DECLARE_SHARED_PTR(ByteBlockAllocator) DECLARE_SHARED_PTR(ByteBlockPool) DECLARE_SHARED_PTR(ByteBlockPoolAllocatorBase) DECLARE_SHARED_PTR(ByteSliceReader) DECLARE_SHARED_PTR(ByteSliceWriter) DECLARE_SHARED_PTR(CharBlockPool) DECLARE_SHARED_PTR(CheckAbort) DECLARE_SHARED_PTR(CheckIndex) DECLARE_SHARED_PTR(CommitPoint) DECLARE_SHARED_PTR(CompoundFileReader) DECLARE_SHARED_PTR(CompoundFileWriter) DECLARE_SHARED_PTR(ConcurrentMergeScheduler) DECLARE_SHARED_PTR(CoreReaders) DECLARE_SHARED_PTR(CSIndexInput) DECLARE_SHARED_PTR(DefaultIndexingChain) DECLARE_SHARED_PTR(DefaultSkipListReader) DECLARE_SHARED_PTR(DefaultSkipListWriter) DECLARE_SHARED_PTR(DirectoryReader) DECLARE_SHARED_PTR(DocConsumer) DECLARE_SHARED_PTR(DocConsumerPerThread) DECLARE_SHARED_PTR(DocFieldConsumer) DECLARE_SHARED_PTR(DocFieldConsumerPerField) DECLARE_SHARED_PTR(DocFieldConsumerPerThread) DECLARE_SHARED_PTR(DocFieldConsumers) DECLARE_SHARED_PTR(DocFieldConsumersPerDoc) DECLARE_SHARED_PTR(DocFieldConsumersPerField) DECLARE_SHARED_PTR(DocFieldConsumersPerThread) DECLARE_SHARED_PTR(DocFieldProcessor) DECLARE_SHARED_PTR(DocFieldProcessorPerField) DECLARE_SHARED_PTR(DocFieldProcessorPerThread) DECLARE_SHARED_PTR(DocFieldProcessorPerThreadPerDoc) DECLARE_SHARED_PTR(DocInverter) DECLARE_SHARED_PTR(DocInverterPerField) DECLARE_SHARED_PTR(DocInverterPerThread) DECLARE_SHARED_PTR(DocState) DECLARE_SHARED_PTR(DocumentsWriter) DECLARE_SHARED_PTR(DocumentsWriterThreadState) DECLARE_SHARED_PTR(DocWriter) DECLARE_SHARED_PTR(FieldInfo) DECLARE_SHARED_PTR(FieldInfos) DECLARE_SHARED_PTR(FieldInvertState) DECLARE_SHARED_PTR(FieldNormStatus) DECLARE_SHARED_PTR(FieldSortedTermVectorMapper) DECLARE_SHARED_PTR(FieldsReader) DECLARE_SHARED_PTR(FieldsReaderLocal) DECLARE_SHARED_PTR(FieldsWriter) DECLARE_SHARED_PTR(FilterIndexReader) DECLARE_SHARED_PTR(FindSegmentsModified) DECLARE_SHARED_PTR(FindSegmentsOpen) DECLARE_SHARED_PTR(FindSegmentsRead) DECLARE_SHARED_PTR(FindSegmentsReopen) DECLARE_SHARED_PTR(FormatPostingsDocsConsumer) DECLARE_SHARED_PTR(FormatPostingsDocsWriter) DECLARE_SHARED_PTR(FormatPostingsFieldsConsumer) DECLARE_SHARED_PTR(FormatPostingsFieldsWriter) DECLARE_SHARED_PTR(FormatPostingsPositionsConsumer) DECLARE_SHARED_PTR(FormatPostingsPositionsWriter) DECLARE_SHARED_PTR(FormatPostingsTermsConsumer) DECLARE_SHARED_PTR(FormatPostingsTermsWriter) DECLARE_SHARED_PTR(FreqProxFieldMergeState) DECLARE_SHARED_PTR(FreqProxTermsWriter) DECLARE_SHARED_PTR(FreqProxTermsWriterPerField) DECLARE_SHARED_PTR(FreqProxTermsWriterPerThread) DECLARE_SHARED_PTR(FreqProxTermsWriterPostingList) DECLARE_SHARED_PTR(IndexCommit) DECLARE_SHARED_PTR(IndexDeletionPolicy) DECLARE_SHARED_PTR(IndexFileDeleter) DECLARE_SHARED_PTR(IndexFileNameFilter) DECLARE_SHARED_PTR(IndexingChain) DECLARE_SHARED_PTR(IndexReader) DECLARE_SHARED_PTR(IndexReaderWarmer) DECLARE_SHARED_PTR(IndexStatus) DECLARE_SHARED_PTR(IndexWriter) DECLARE_SHARED_PTR(IntBlockPool) DECLARE_SHARED_PTR(IntQueue) DECLARE_SHARED_PTR(InvertedDocConsumer) DECLARE_SHARED_PTR(InvertedDocConsumerPerField) DECLARE_SHARED_PTR(InvertedDocConsumerPerThread) DECLARE_SHARED_PTR(InvertedDocEndConsumer) DECLARE_SHARED_PTR(InvertedDocEndConsumerPerField) DECLARE_SHARED_PTR(InvertedDocEndConsumerPerThread) DECLARE_SHARED_PTR(KeepOnlyLastCommitDeletionPolicy) DECLARE_SHARED_PTR(LogByteSizeMergePolicy) DECLARE_SHARED_PTR(LogDocMergePolicy) DECLARE_SHARED_PTR(LogMergePolicy) DECLARE_SHARED_PTR(MergeDocIDRemapper) DECLARE_SHARED_PTR(MergePolicy) DECLARE_SHARED_PTR(MergeScheduler) DECLARE_SHARED_PTR(MergeSpecification) DECLARE_SHARED_PTR(MergeThread) DECLARE_SHARED_PTR(MultiLevelSkipListReader) DECLARE_SHARED_PTR(MultiLevelSkipListWriter) DECLARE_SHARED_PTR(MultipleTermPositions) DECLARE_SHARED_PTR(MultiReader) DECLARE_SHARED_PTR(MultiTermDocs) DECLARE_SHARED_PTR(MultiTermEnum) DECLARE_SHARED_PTR(MultiTermPositions) DECLARE_SHARED_PTR(MyCommitPoint) DECLARE_SHARED_PTR(MySegmentTermDocs) DECLARE_SHARED_PTR(Norm) DECLARE_SHARED_PTR(NormsWriter) DECLARE_SHARED_PTR(NormsWriterPerField) DECLARE_SHARED_PTR(NormsWriterPerThread) DECLARE_SHARED_PTR(Num) DECLARE_SHARED_PTR(OneMerge) DECLARE_SHARED_PTR(ParallelArrayTermVectorMapper) DECLARE_SHARED_PTR(ParallelReader) DECLARE_SHARED_PTR(ParallelTermEnum) DECLARE_SHARED_PTR(ParallelTermDocs) DECLARE_SHARED_PTR(ParallelTermPositions) DECLARE_SHARED_PTR(Payload) DECLARE_SHARED_PTR(PerDocBuffer) DECLARE_SHARED_PTR(PositionBasedTermVectorMapper) DECLARE_SHARED_PTR(RawPostingList) DECLARE_SHARED_PTR(ReaderCommit) DECLARE_SHARED_PTR(ReaderPool) DECLARE_SHARED_PTR(ReadOnlyDirectoryReader) DECLARE_SHARED_PTR(ReadOnlySegmentReader) DECLARE_SHARED_PTR(RefCount) DECLARE_SHARED_PTR(ReusableStringReader) DECLARE_SHARED_PTR(SegmentInfo) DECLARE_SHARED_PTR(SegmentInfoCollection) DECLARE_SHARED_PTR(SegmentInfos) DECLARE_SHARED_PTR(SegmentInfoStatus) DECLARE_SHARED_PTR(SegmentMergeInfo) DECLARE_SHARED_PTR(SegmentMergeQueue) DECLARE_SHARED_PTR(SegmentMerger) DECLARE_SHARED_PTR(SegmentReader) DECLARE_SHARED_PTR(SegmentReaderRef) DECLARE_SHARED_PTR(SegmentTermDocs) DECLARE_SHARED_PTR(SegmentTermEnum) DECLARE_SHARED_PTR(SegmentTermPositions) DECLARE_SHARED_PTR(SegmentTermPositionVector) DECLARE_SHARED_PTR(SegmentTermVector) DECLARE_SHARED_PTR(SegmentWriteState) DECLARE_SHARED_PTR(SerialMergeScheduler) DECLARE_SHARED_PTR(SingleTokenAttributeSource) DECLARE_SHARED_PTR(SkipBuffer) DECLARE_SHARED_PTR(SkipDocWriter) DECLARE_SHARED_PTR(SnapshotDeletionPolicy) DECLARE_SHARED_PTR(SortedTermVectorMapper) DECLARE_SHARED_PTR(StoredFieldStatus) DECLARE_SHARED_PTR(StoredFieldsWriter) DECLARE_SHARED_PTR(StoredFieldsWriterPerDoc) DECLARE_SHARED_PTR(StoredFieldsWriterPerThread) DECLARE_SHARED_PTR(Term) DECLARE_SHARED_PTR(TermBuffer) DECLARE_SHARED_PTR(TermEnum) DECLARE_SHARED_PTR(TermDocs) DECLARE_SHARED_PTR(TermFreqVector) DECLARE_SHARED_PTR(TermIndexStatus) DECLARE_SHARED_PTR(TermInfo) DECLARE_SHARED_PTR(TermInfosReader) DECLARE_SHARED_PTR(TermInfosReaderThreadResources) DECLARE_SHARED_PTR(TermInfosWriter) DECLARE_SHARED_PTR(TermPositions) DECLARE_SHARED_PTR(TermPositionsQueue) DECLARE_SHARED_PTR(TermPositionVector) DECLARE_SHARED_PTR(TermsHash) DECLARE_SHARED_PTR(TermsHashConsumer) DECLARE_SHARED_PTR(TermsHashConsumerPerField) DECLARE_SHARED_PTR(TermsHashConsumerPerThread) DECLARE_SHARED_PTR(TermsHashPerField) DECLARE_SHARED_PTR(TermsHashPerThread) DECLARE_SHARED_PTR(TermVectorEntry) DECLARE_SHARED_PTR(TermVectorEntryFreqSortedComparator) DECLARE_SHARED_PTR(TermVectorMapper) DECLARE_SHARED_PTR(TermVectorOffsetInfo) DECLARE_SHARED_PTR(TermVectorsReader) DECLARE_SHARED_PTR(TermVectorStatus) DECLARE_SHARED_PTR(TermVectorsTermsWriter) DECLARE_SHARED_PTR(TermVectorsTermsWriterPerDoc) DECLARE_SHARED_PTR(TermVectorsTermsWriterPerField) DECLARE_SHARED_PTR(TermVectorsTermsWriterPerThread) DECLARE_SHARED_PTR(TermVectorsTermsWriterPostingList) DECLARE_SHARED_PTR(TermVectorsWriter) DECLARE_SHARED_PTR(TermVectorsPositionInfo) DECLARE_SHARED_PTR(WaitQueue) // query parser DECLARE_SHARED_PTR(FastCharStream) DECLARE_SHARED_PTR(MultiFieldQueryParser) DECLARE_SHARED_PTR(QueryParser) DECLARE_SHARED_PTR(QueryParserCharStream) DECLARE_SHARED_PTR(QueryParserConstants) DECLARE_SHARED_PTR(QueryParserToken) DECLARE_SHARED_PTR(QueryParserTokenManager) // search DECLARE_SHARED_PTR(AveragePayloadFunction) DECLARE_SHARED_PTR(BooleanClause) DECLARE_SHARED_PTR(BooleanQuery) DECLARE_SHARED_PTR(BooleanScorer) DECLARE_SHARED_PTR(BooleanScorerCollector) DECLARE_SHARED_PTR(BooleanScorer2) DECLARE_SHARED_PTR(BooleanWeight) DECLARE_SHARED_PTR(Bucket) DECLARE_SHARED_PTR(BucketScorer) DECLARE_SHARED_PTR(BucketTable) DECLARE_SHARED_PTR(ByteCache) DECLARE_SHARED_PTR(ByteFieldSource) DECLARE_SHARED_PTR(ByteParser) DECLARE_SHARED_PTR(Cache) DECLARE_SHARED_PTR(CachedDfSource) DECLARE_SHARED_PTR(CachingSpanFilter) DECLARE_SHARED_PTR(CachingWrapperFilter) DECLARE_SHARED_PTR(CellQueue) DECLARE_SHARED_PTR(Collector) DECLARE_SHARED_PTR(ComplexExplanation) DECLARE_SHARED_PTR(ConjunctionScorer) DECLARE_SHARED_PTR(ConstantScoreAutoRewrite) DECLARE_SHARED_PTR(ConstantScoreAutoRewriteDefault) DECLARE_SHARED_PTR(ConstantScoreBooleanQueryRewrite) DECLARE_SHARED_PTR(ConstantScoreFilterRewrite) DECLARE_SHARED_PTR(ConstantScoreQuery) DECLARE_SHARED_PTR(ConstantScorer) DECLARE_SHARED_PTR(ConstantWeight) DECLARE_SHARED_PTR(Coordinator) DECLARE_SHARED_PTR(CountingConjunctionSumScorer) DECLARE_SHARED_PTR(CountingDisjunctionSumScorer) DECLARE_SHARED_PTR(CreationPlaceholder) DECLARE_SHARED_PTR(CustomScoreProvider) DECLARE_SHARED_PTR(CustomScoreQuery) DECLARE_SHARED_PTR(CustomWeight) DECLARE_SHARED_PTR(CustomScorer) DECLARE_SHARED_PTR(DefaultByteParser) DECLARE_SHARED_PTR(DefaultCustomScoreProvider) DECLARE_SHARED_PTR(DefaultDoubleParser) DECLARE_SHARED_PTR(DefaultIntParser) DECLARE_SHARED_PTR(DefaultLongParser) DECLARE_SHARED_PTR(DefaultSimilarity) DECLARE_SHARED_PTR(DisjunctionMaxQuery) DECLARE_SHARED_PTR(DisjunctionMaxScorer) DECLARE_SHARED_PTR(DisjunctionMaxWeight) DECLARE_SHARED_PTR(DisjunctionSumScorer) DECLARE_SHARED_PTR(DocIdSet) DECLARE_SHARED_PTR(DocIdSetIterator) DECLARE_SHARED_PTR(DocValues) DECLARE_SHARED_PTR(DoubleCache) DECLARE_SHARED_PTR(DoubleFieldSource) DECLARE_SHARED_PTR(DoubleParser) DECLARE_SHARED_PTR(EmptyDocIdSet) DECLARE_SHARED_PTR(EmptyDocIdSetIterator) DECLARE_SHARED_PTR(Entry) DECLARE_SHARED_PTR(ExactPhraseScorer) DECLARE_SHARED_PTR(Explanation) DECLARE_SHARED_PTR(FieldCache) DECLARE_SHARED_PTR(FieldCacheDocIdSet) DECLARE_SHARED_PTR(FieldCacheEntry) DECLARE_SHARED_PTR(FieldCacheEntryImpl) DECLARE_SHARED_PTR(FieldCacheImpl) DECLARE_SHARED_PTR(FieldCacheRangeFilter) DECLARE_SHARED_PTR(FieldCacheRangeFilterByte) DECLARE_SHARED_PTR(FieldCacheRangeFilterDouble) DECLARE_SHARED_PTR(FieldCacheRangeFilterInt) DECLARE_SHARED_PTR(FieldCacheRangeFilterLong) DECLARE_SHARED_PTR(FieldCacheRangeFilterString) DECLARE_SHARED_PTR(FieldCacheSource) DECLARE_SHARED_PTR(FieldCacheTermsFilter) DECLARE_SHARED_PTR(FieldCacheTermsFilterDocIdSet) DECLARE_SHARED_PTR(FieldComparator) DECLARE_SHARED_PTR(FieldComparatorSource) DECLARE_SHARED_PTR(FieldDoc) DECLARE_SHARED_PTR(FieldDocIdSetIteratorIncrement) DECLARE_SHARED_PTR(FieldDocIdSetIteratorTermDocs) DECLARE_SHARED_PTR(FieldDocSortedHitQueue) DECLARE_SHARED_PTR(FieldMaskingSpanQuery) DECLARE_SHARED_PTR(FieldScoreQuery) DECLARE_SHARED_PTR(FieldValueHitQueue) DECLARE_SHARED_PTR(FieldValueHitQueueEntry) DECLARE_SHARED_PTR(Filter) DECLARE_SHARED_PTR(FilterCache) DECLARE_SHARED_PTR(FilterCleaner) DECLARE_SHARED_PTR(FilteredDocIdSet) DECLARE_SHARED_PTR(FilteredDocIdSetIterator) DECLARE_SHARED_PTR(FilteredQuery) DECLARE_SHARED_PTR(FilteredQueryWeight) DECLARE_SHARED_PTR(FilteredTermEnum) DECLARE_SHARED_PTR(FilterItem) DECLARE_SHARED_PTR(FilterManager) DECLARE_SHARED_PTR(FuzzyQuery) DECLARE_SHARED_PTR(FuzzyTermEnum) DECLARE_SHARED_PTR(HitQueue) DECLARE_SHARED_PTR(HitQueueBase) DECLARE_SHARED_PTR(IDFExplanation) DECLARE_SHARED_PTR(IndexSearcher) DECLARE_SHARED_PTR(IntCache) DECLARE_SHARED_PTR(IntFieldSource) DECLARE_SHARED_PTR(IntParser) DECLARE_SHARED_PTR(LongCache) DECLARE_SHARED_PTR(LongParser) DECLARE_SHARED_PTR(MatchAllDocsQuery) DECLARE_SHARED_PTR(MatchAllDocsWeight) DECLARE_SHARED_PTR(MatchAllScorer) DECLARE_SHARED_PTR(MaxPayloadFunction) DECLARE_SHARED_PTR(MinPayloadFunction) DECLARE_SHARED_PTR(MultiComparatorsFieldValueHitQueue) DECLARE_SHARED_PTR(MultiPhraseQuery) DECLARE_SHARED_PTR(MultiSearcher) DECLARE_SHARED_PTR(MultiSearcherCallableNoSort) DECLARE_SHARED_PTR(MultiSearcherCallableWithSort) DECLARE_SHARED_PTR(MultiTermQuery) DECLARE_SHARED_PTR(MultiTermQueryWrapperFilter) DECLARE_SHARED_PTR(NearSpansOrdered) DECLARE_SHARED_PTR(NearSpansUnordered) DECLARE_SHARED_PTR(NumericRangeFilter) DECLARE_SHARED_PTR(NumericRangeQuery) DECLARE_SHARED_PTR(NumericUtilsDoubleParser) DECLARE_SHARED_PTR(NumericUtilsIntParser) DECLARE_SHARED_PTR(NumericUtilsLongParser) DECLARE_SHARED_PTR(OneComparatorFieldValueHitQueue) DECLARE_SHARED_PTR(OrdFieldSource) DECLARE_SHARED_PTR(ParallelMultiSearcher) DECLARE_SHARED_PTR(Parser) DECLARE_SHARED_PTR(PayloadFunction) DECLARE_SHARED_PTR(PayloadNearQuery) DECLARE_SHARED_PTR(PayloadNearSpanScorer) DECLARE_SHARED_PTR(PayloadNearSpanWeight) DECLARE_SHARED_PTR(PayloadSpanUtil) DECLARE_SHARED_PTR(PayloadTermQuery) DECLARE_SHARED_PTR(PayloadTermSpanScorer) DECLARE_SHARED_PTR(PayloadTermWeight) DECLARE_SHARED_PTR(PhrasePositions) DECLARE_SHARED_PTR(PhraseQuery) DECLARE_SHARED_PTR(PhraseQueue) DECLARE_SHARED_PTR(PhraseScorer) DECLARE_SHARED_PTR(PositionInfo) DECLARE_SHARED_PTR(PositiveScoresOnlyCollector) DECLARE_SHARED_PTR(PrefixFilter) DECLARE_SHARED_PTR(PrefixQuery) DECLARE_SHARED_PTR(PrefixTermEnum) DECLARE_SHARED_PTR(PriorityQueueScoreDocs) DECLARE_SHARED_PTR(Query) DECLARE_SHARED_PTR(QueryTermVector) DECLARE_SHARED_PTR(QueryWrapperFilter) DECLARE_SHARED_PTR(ReqExclScorer) DECLARE_SHARED_PTR(ReqOptSumScorer) DECLARE_SHARED_PTR(RewriteMethod) DECLARE_SHARED_PTR(ReverseOrdFieldSource) DECLARE_SHARED_PTR(ScoreCachingWrappingScorer) DECLARE_SHARED_PTR(ScoreDoc) DECLARE_SHARED_PTR(Scorer) DECLARE_SHARED_PTR(ScoreTerm) DECLARE_SHARED_PTR(ScoreTermQueue) DECLARE_SHARED_PTR(ScoringBooleanQueryRewrite) DECLARE_SHARED_PTR(Searchable) DECLARE_SHARED_PTR(Searcher) DECLARE_SHARED_PTR(Similarity) DECLARE_SHARED_PTR(SimilarityDisableCoord) DECLARE_SHARED_PTR(SimilarityDelegator) DECLARE_SHARED_PTR(SimilarityIDFExplanation) DECLARE_SHARED_PTR(SingleMatchScorer) DECLARE_SHARED_PTR(SingleTermEnum) DECLARE_SHARED_PTR(SloppyPhraseScorer) DECLARE_SHARED_PTR(Sort) DECLARE_SHARED_PTR(SortField) DECLARE_SHARED_PTR(SpanFilter) DECLARE_SHARED_PTR(SpanFilterResult) DECLARE_SHARED_PTR(SpanFirstQuery) DECLARE_SHARED_PTR(SpanNearQuery) DECLARE_SHARED_PTR(SpanNotQuery) DECLARE_SHARED_PTR(SpanOrQuery) DECLARE_SHARED_PTR(SpanQuery) DECLARE_SHARED_PTR(SpanQueryFilter) DECLARE_SHARED_PTR(SpanQueue) DECLARE_SHARED_PTR(Spans) DECLARE_SHARED_PTR(SpansCell) DECLARE_SHARED_PTR(SpanScorer) DECLARE_SHARED_PTR(SpanTermQuery) DECLARE_SHARED_PTR(SpanWeight) DECLARE_SHARED_PTR(StartEnd) DECLARE_SHARED_PTR(StringCache) DECLARE_SHARED_PTR(StringIndex) DECLARE_SHARED_PTR(StringIndexCache) DECLARE_SHARED_PTR(SubScorer) DECLARE_SHARED_PTR(TermQuery) DECLARE_SHARED_PTR(TermRangeFilter) DECLARE_SHARED_PTR(TermRangeQuery) DECLARE_SHARED_PTR(TermRangeTermEnum) DECLARE_SHARED_PTR(TermScorer) DECLARE_SHARED_PTR(TermSpans) DECLARE_SHARED_PTR(TimeLimitingCollector) DECLARE_SHARED_PTR(TimerThread) DECLARE_SHARED_PTR(TopDocs) DECLARE_SHARED_PTR(TopDocsCollector) DECLARE_SHARED_PTR(TopFieldCollector) DECLARE_SHARED_PTR(TopFieldDocs) DECLARE_SHARED_PTR(TopScoreDocCollector) DECLARE_SHARED_PTR(ValueSource) DECLARE_SHARED_PTR(ValueSourceQuery) DECLARE_SHARED_PTR(ValueSourceScorer) DECLARE_SHARED_PTR(ValueSourceWeight) DECLARE_SHARED_PTR(Weight) DECLARE_SHARED_PTR(WildcardQuery) DECLARE_SHARED_PTR(WildcardTermEnum) // store DECLARE_SHARED_PTR(BufferedIndexInput) DECLARE_SHARED_PTR(BufferedIndexOutput) DECLARE_SHARED_PTR(ChecksumIndexInput) DECLARE_SHARED_PTR(ChecksumIndexOutput) DECLARE_SHARED_PTR(Directory) DECLARE_SHARED_PTR(FileSwitchDirectory) DECLARE_SHARED_PTR(FSDirectory) DECLARE_SHARED_PTR(FSLockFactory) DECLARE_SHARED_PTR(IndexInput) DECLARE_SHARED_PTR(IndexOutput) DECLARE_SHARED_PTR(InputFile) DECLARE_SHARED_PTR(Lock) DECLARE_SHARED_PTR(LockFactory) DECLARE_SHARED_PTR(MMapDirectory) DECLARE_SHARED_PTR(MMapIndexInput) DECLARE_SHARED_PTR(NativeFSLock) DECLARE_SHARED_PTR(NativeFSLockFactory) DECLARE_SHARED_PTR(NoLock) DECLARE_SHARED_PTR(NoLockFactory) DECLARE_SHARED_PTR(OutputFile) DECLARE_SHARED_PTR(RAMDirectory) DECLARE_SHARED_PTR(RAMFile) DECLARE_SHARED_PTR(RAMInputStream) DECLARE_SHARED_PTR(RAMOutputStream) DECLARE_SHARED_PTR(SimpleFSDirectory) DECLARE_SHARED_PTR(SimpleFSIndexInput) DECLARE_SHARED_PTR(SimpleFSIndexOutput) DECLARE_SHARED_PTR(SimpleFSLock) DECLARE_SHARED_PTR(SimpleFSLockFactory) DECLARE_SHARED_PTR(SingleInstanceLock) DECLARE_SHARED_PTR(SingleInstanceLockFactory) // util DECLARE_SHARED_PTR(Attribute) DECLARE_SHARED_PTR(AttributeFactory) DECLARE_SHARED_PTR(AttributeSource) DECLARE_SHARED_PTR(AttributeSourceState) DECLARE_SHARED_PTR(BitSet) DECLARE_SHARED_PTR(BitVector) DECLARE_SHARED_PTR(BufferedReader) DECLARE_SHARED_PTR(Collator) DECLARE_SHARED_PTR(DefaultAttributeFactory) DECLARE_SHARED_PTR(DocIdBitSet) DECLARE_SHARED_PTR(FieldCacheSanityChecker) DECLARE_SHARED_PTR(FileReader) DECLARE_SHARED_PTR(Future) DECLARE_SHARED_PTR(HeapedScorerDoc) DECLARE_SHARED_PTR(InfoStream) DECLARE_SHARED_PTR(InfoStreamFile) DECLARE_SHARED_PTR(InfoStreamOut) DECLARE_SHARED_PTR(InputStreamReader) DECLARE_SHARED_PTR(Insanity) DECLARE_SHARED_PTR(IntRangeBuilder) DECLARE_SHARED_PTR(LongRangeBuilder) DECLARE_SHARED_PTR(LuceneObject) DECLARE_SHARED_PTR(LuceneSignal) DECLARE_SHARED_PTR(LuceneThread) DECLARE_SHARED_PTR(NumericUtils) DECLARE_SHARED_PTR(OpenBitSet) DECLARE_SHARED_PTR(OpenBitSetDISI) DECLARE_SHARED_PTR(OpenBitSetIterator) DECLARE_SHARED_PTR(Random) DECLARE_SHARED_PTR(Reader) DECLARE_SHARED_PTR(ReaderField) DECLARE_SHARED_PTR(ScorerDocQueue) DECLARE_SHARED_PTR(SortedVIntList) DECLARE_SHARED_PTR(StringReader) DECLARE_SHARED_PTR(Synchronize) DECLARE_SHARED_PTR(ThreadPool) DECLARE_SHARED_PTR(UnicodeResult) DECLARE_SHARED_PTR(UTF8Decoder) DECLARE_SHARED_PTR(UTF8DecoderStream) DECLARE_SHARED_PTR(UTF8Encoder) DECLARE_SHARED_PTR(UTF8EncoderStream) DECLARE_SHARED_PTR(UTF8Result) DECLARE_SHARED_PTR(UTF16Decoder) } #endif LucenePlusPlus-rel_3.0.4/include/MMapDirectory.h000066400000000000000000000036701217574114600216340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MMAPDIRECTORY_H #define MMAPDIRECTORY_H #include "FSDirectory.h" namespace Lucene { /// File-based {@link Directory} implementation that uses mmap for reading, and {@link SimpleFSIndexOutput} for writing. /// /// NOTE: memory mapping uses up a portion of the virtual memory address space in your process equal to the size of the /// file being mapped. Before using this class, be sure your have plenty of virtual address space. /// /// NOTE: Accessing this class either directly or indirectly from a thread while it's interrupted can close the /// underlying channel immediately if at the same time the thread is blocked on IO. The channel will remain closed and /// subsequent access to {@link MMapDirectory} will throw an exception. class LPPAPI MMapDirectory : public FSDirectory { public: /// Create a new MMapDirectory for the named location. /// @param path the path of the directory. /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) MMapDirectory(const String& path, LockFactoryPtr lockFactory = LockFactoryPtr()); virtual ~MMapDirectory(); LUCENE_CLASS(MMapDirectory); public: using FSDirectory::openInput; /// Creates an IndexInput for the file with the given name. virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); /// Creates an IndexOutput for the file with the given name. virtual IndexOutputPtr createOutput(const String& name); }; } #endif LucenePlusPlus-rel_3.0.4/include/Map.h000066400000000000000000000070431217574114600176300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAP_H #define MAP_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle maps that can be safely copied and shared template < class KEY, class VALUE, class LESS = std::less > class Map : public LuceneSync { public: typedef Map this_type; typedef std::pair key_value; typedef std::map< KEY, VALUE, LESS, LuceneAllocator > map_type; typedef typename map_type::iterator iterator; typedef typename map_type::const_iterator const_iterator; typedef KEY key_type; typedef VALUE value_type; virtual ~Map() { } protected: boost::shared_ptr mapContainer; public: static this_type newInstance() { this_type instance; instance.mapContainer = Lucene::newInstance(); return instance; } void reset() { mapContainer.reset(); } int32_t size() const { return (int32_t)mapContainer->size(); } bool empty() const { return mapContainer->empty(); } void clear() { mapContainer->clear(); } iterator begin() { return mapContainer->begin(); } iterator end() { return mapContainer->end(); } const_iterator begin() const { return mapContainer->begin(); } const_iterator end() const { return mapContainer->end(); } operator bool() const { return mapContainer; } bool operator! () const { return !mapContainer; } map_type& operator= (const map_type& other) { mapContainer = other.mapContainer; return *this; } void put(const KEY& key, const VALUE& value) { (*mapContainer)[key] = value; } template void putAll(ITER first, ITER last) { for (iterator current = first; current != last; ++current) (*mapContainer)[current->first] = current->second; } template void remove(ITER pos) { mapContainer->erase(pos); } template ITER remove(ITER first, ITER last) { return mapContainer->erase(first, last); } bool remove(const KEY& key) { return (mapContainer->erase(key) > 0); } iterator find(const KEY& key) { return mapContainer->find(key); } VALUE get(const KEY& key) const { iterator findValue = mapContainer->find(key); return findValue == mapContainer->end() ? VALUE() : findValue->second; } bool contains(const KEY& key) const { return (mapContainer->find(key) != mapContainer->end()); } VALUE& operator[] (const KEY& key) { return (*mapContainer)[key]; } }; } #endif LucenePlusPlus-rel_3.0.4/include/MapFieldSelector.h000066400000000000000000000030501217574114600222670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAPFIELDSELECTOR_H #define MAPFIELDSELECTOR_H #include "FieldSelector.h" namespace Lucene { typedef HashMap MapStringFieldSelectorResult; /// A {@link FieldSelector} based on a Map of field names to {@link FieldSelectorResult}s class LPPAPI MapFieldSelector : public FieldSelector { public: /// Create a MapFieldSelector /// @param fieldSelections maps from field names (String) to {@link FieldSelectorResult}s MapFieldSelector(MapStringFieldSelectorResult fieldSelections); /// Create a MapFieldSelector /// @param fields fields to LOAD. List of Strings. All other fields are NO_LOAD. MapFieldSelector(Collection fields); virtual ~MapFieldSelector(); LUCENE_CLASS(MapFieldSelector); public: MapStringFieldSelectorResult fieldSelections; public: /// Load field according to its associated value in fieldSelections /// @param field a field name /// @return the fieldSelections value that field maps to or NO_LOAD if none. virtual FieldSelectorResult accept(const String& fieldName); }; } #endif LucenePlusPlus-rel_3.0.4/include/MapOfSets.h000066400000000000000000000050531217574114600207530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAPOFSETS_H #define MAPOFSETS_H #include "Lucene.h" namespace Lucene { /// Helper class for keeping Lists of Objects associated with keys. template class MapOfSets { public: typedef HashSet set_type; typedef HashMap map_type; MapOfSets(map_type m) { theMap = m; } protected: map_type theMap; public: /// @return direct access to the map backing this object. map_type getMap() { return theMap; } /// Adds val to the HashSet associated with key in the HashMap. If key is not already in the map, /// a new HashSet will first be created. /// @return the size of the HashSet associated with key once val is added to it. int32_t put(MAPKEY key, SETVALUE val) { typename map_type::iterator entry = theMap.find(key); if (entry != theMap.end()) { entry->second.add(val); return entry->second.size(); } else { set_type theSet(set_type::newInstance()); theSet.add(val); theMap.put(key, theSet); return 1; } } /// Adds multiple vals to the HashSet associated with key in the HashMap. If key is not already in /// the map, a new HashSet will first be created. /// @return the size of the HashSet associated with key once val is added to it. int32_t putAll(MAPKEY key, set_type vals) { typename map_type::iterator entry = theMap.find(key); if (entry != theMap.end()) { entry->second.addAll(vals.begin(), vals.end()); return entry->second.size(); } else { set_type theSet(set_type::newInstance(vals.begin(), vals.end())); theMap.put(key, theSet); return theSet.size(); } } }; } #endif LucenePlusPlus-rel_3.0.4/include/MappingCharFilter.h000066400000000000000000000031001217574114600224400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAPPINGCHARFILTER_H #define MAPPINGCHARFILTER_H #include "BaseCharFilter.h" namespace Lucene { /// Simplistic {@link CharFilter} that applies the mappings contained in a {@link NormalizeCharMap} to the character /// stream, and correcting the resulting changes to the offsets. class LPPAPI MappingCharFilter : public BaseCharFilter { public: /// Default constructor that takes a {@link CharStream}. MappingCharFilter(NormalizeCharMapPtr normMap, CharStreamPtr in); /// Easy-use constructor that takes a {@link Reader}. MappingCharFilter(NormalizeCharMapPtr normMap, ReaderPtr in); virtual ~MappingCharFilter(); LUCENE_CLASS(MappingCharFilter); protected: NormalizeCharMapPtr normMap; Collection buffer; String replacement; int32_t charPointer; int32_t nextCharCounter; public: virtual int32_t read(); virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); protected: int32_t nextChar(); void pushChar(int32_t c); void pushLastChar(int32_t c); NormalizeCharMapPtr match(NormalizeCharMapPtr map); }; } #endif LucenePlusPlus-rel_3.0.4/include/MatchAllDocsQuery.h000066400000000000000000000024371217574114600224410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MATCHALLDOCSQUERY_H #define MATCHALLDOCSQUERY_H #include "Query.h" namespace Lucene { /// A query that matches all documents. class LPPAPI MatchAllDocsQuery : public Query { public: /// @param normsField Field used for normalization factor (document boost). Null if nothing. MatchAllDocsQuery(const String& normsField = EmptyString); virtual ~MatchAllDocsQuery(); LUCENE_CLASS(MatchAllDocsQuery); protected: String normsField; public: using Query::toString; virtual WeightPtr createWeight(SearcherPtr searcher); virtual void extractTerms(SetTerm terms); virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); friend class MatchAllDocsWeight; }; } #endif LucenePlusPlus-rel_3.0.4/include/MaxPayloadFunction.h000066400000000000000000000022641217574114600226600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAXPAYLOADFUNCTION_H #define MAXPAYLOADFUNCTION_H #include "PayloadFunction.h" namespace Lucene { /// Returns the maximum payload score seen, else 1 if there are no payloads on the doc. /// /// Is thread safe and completely reusable. class LPPAPI MaxPayloadFunction : public PayloadFunction { public: virtual ~MaxPayloadFunction(); LUCENE_CLASS(MaxPayloadFunction); public: virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore); virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); }; } #endif LucenePlusPlus-rel_3.0.4/include/MergeDocIDRemapper.h000066400000000000000000000030251217574114600225050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MERGEDOCIDREMAPPER_H #define MERGEDOCIDREMAPPER_H #include "LuceneObject.h" namespace Lucene { /// Remaps docIDs after a merge has completed, where the merged segments had at least one deletion. /// This is used to renumber the buffered deletes in IndexWriter when a merge of segments with deletions /// commits. class MergeDocIDRemapper : public LuceneObject { public: MergeDocIDRemapper(SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergedDocCount); virtual ~MergeDocIDRemapper(); LUCENE_CLASS(MergeDocIDRemapper); public: Collection starts; // used for binary search of mapped docID Collection newStarts; // starts, minus the deletes Collection< Collection > docMaps; // maps docIDs in the merged set int32_t minDocID; // minimum docID that needs renumbering int32_t maxDocID; // 1+ the max docID that needs renumbering int32_t docShift; // total # deleted docs that were compacted by this merge public: int32_t remap(int32_t oldDocID); }; } #endif LucenePlusPlus-rel_3.0.4/include/MergePolicy.h000066400000000000000000000144311217574114600213310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MERGEPOLICY_H #define MERGEPOLICY_H #include "SegmentInfos.h" namespace Lucene { /// A MergePolicy determines the sequence of primitive merge operations to be used for overall merge /// and optimize operations. /// /// Whenever the segments in an index have been altered by {@link IndexWriter}, either the addition of /// a newly flushed segment, addition of many segments from addIndexes* calls, or a previous merge that /// may now need to cascade, {@link IndexWriter} invokes {@link #findMerges} to give the MergePolicy a /// chance to pick merges that are now required. This method returns a {@link MergeSpecification} /// instance describing the set of merges that should be done, or null if no merges are necessary. /// When IndexWriter.optimize is called, it calls {@link #findMergesForOptimize} and the MergePolicy /// should then return the necessary merges. /// /// Note that the policy can return more than one merge at a time. In this case, if the writer is using /// {@link SerialMergeScheduler}, the merges will be run sequentially but if it is using {@link /// ConcurrentMergeScheduler} they will be run concurrently. /// /// The default MergePolicy is {@link LogByteSizeMergePolicy}. /// /// NOTE: This API is new and still experimental (subject to change suddenly in the next release) class LPPAPI MergePolicy : public LuceneObject { public: MergePolicy(IndexWriterPtr writer); virtual ~MergePolicy(); LUCENE_CLASS(MergePolicy); protected: IndexWriterWeakPtr _writer; public: /// Determine what set of merge operations are now necessary on the index. {@link IndexWriter} calls /// this whenever there is a change to the segments. This call is always synchronized on the {@link /// IndexWriter} instance so only one thread at a time will call this method. /// @param segmentInfos the total set of segments in the index virtual MergeSpecificationPtr findMerges(SegmentInfosPtr segmentInfos) = 0; /// Determine what set of merge operations is necessary in order to optimize the index. {@link /// IndexWriter} calls this when its {@link IndexWriter#optimize()} method is called. This call is /// always synchronized on the {@link IndexWriter} instance so only one thread at a time will call /// this method. /// @param segmentInfos the total set of segments in the index /// @param maxSegmentCount requested maximum number of segments in the index (currently this is always 1) /// @param segmentsToOptimize contains the specific SegmentInfo instances that must be merged away. /// This may be a subset of all SegmentInfos. virtual MergeSpecificationPtr findMergesForOptimize(SegmentInfosPtr segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize) = 0; /// Determine what set of merge operations is necessary in order to expunge all deletes from the index. /// @param segmentInfos the total set of segments in the index virtual MergeSpecificationPtr findMergesToExpungeDeletes(SegmentInfosPtr segmentInfos) = 0; /// Release all resources for the policy. virtual void close() = 0; /// Returns true if a newly flushed (not from merge) segment should use the compound file format. virtual bool useCompoundFile(SegmentInfosPtr segments, SegmentInfoPtr newSegment) = 0; /// Returns true if the doc store files should use the compound file format. virtual bool useCompoundDocStore(SegmentInfosPtr segments) = 0; }; /// OneMerge provides the information necessary to perform an individual primitive merge operation, /// resulting in a single new segment. The merge spec includes the subset of segments to be merged /// as well as whether the new segment should use the compound file format. class LPPAPI OneMerge : public LuceneObject { public: OneMerge(SegmentInfosPtr segments, bool useCompoundFile); virtual ~OneMerge(); LUCENE_CLASS(OneMerge); public: SegmentInfoPtr info; // used by IndexWriter bool mergeDocStores; // used by IndexWriter bool optimize; // used by IndexWriter bool registerDone; // used by IndexWriter int64_t mergeGen; // used by IndexWriter bool isExternal; // used by IndexWriter int32_t maxNumSegmentsOptimize; // used by IndexWriter Collection readers; // used by IndexWriter Collection readersClone; // used by IndexWriter SegmentInfosPtr segments; bool useCompoundFile; bool aborted; LuceneException error; public: /// Record that an exception occurred while executing this merge void setException(const LuceneException& error); /// Retrieve previous exception set by {@link #setException}. LuceneException getException(); /// Mark this merge as aborted. If this is called before the merge is committed then the merge will not be committed. void abort(); /// Returns true if this merge was aborted. bool isAborted(); void checkAborted(DirectoryPtr dir); String segString(DirectoryPtr dir); }; /// A MergeSpecification instance provides the information necessary to perform multiple merges. /// It simply contains a list of {@link OneMerge} instances. class LPPAPI MergeSpecification : public LuceneObject { public: MergeSpecification(); virtual ~MergeSpecification(); LUCENE_CLASS(MergeSpecification); public: Collection merges; public: void add(OneMergePtr merge); String segString(DirectoryPtr dir); }; } #endif LucenePlusPlus-rel_3.0.4/include/MergeScheduler.h000066400000000000000000000020171217574114600220050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MERGESCHEDULER_H #define MERGESCHEDULER_H #include "LuceneObject.h" namespace Lucene { /// {@link IndexWriter} uses an instance implementing this interface to execute the merges /// selected by a {@link MergePolicy}. The default MergeScheduler is {@link ConcurrentMergeScheduler}. class LPPAPI MergeScheduler : public LuceneObject { public: virtual ~MergeScheduler(); LUCENE_CLASS(MergeScheduler); public: /// Run the merges provided by {@link IndexWriter#getNextMerge()}. virtual void merge(IndexWriterPtr writer) = 0; /// Close this MergeScheduler. virtual void close() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/MinPayloadFunction.h000066400000000000000000000021141217574114600226500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MINPAYLOADFUNCTION_H #define MINPAYLOADFUNCTION_H #include "PayloadFunction.h" namespace Lucene { /// Calculates the minimum payload seen class LPPAPI MinPayloadFunction : public PayloadFunction { public: virtual ~MinPayloadFunction(); LUCENE_CLASS(MinPayloadFunction); public: virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore); virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); }; } #endif LucenePlusPlus-rel_3.0.4/include/MiscUtils.h000066400000000000000000000142601217574114600210260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MISCUTILS_H #define MISCUTILS_H #include "Lucene.h" namespace Lucene { class LPPAPI MiscUtils { protected: static const uint32_t SINGLE_EXPONENT_MASK; static const uint32_t SINGLE_MANTISSA_MASK; static const uint32_t SINGLE_NAN_BITS; static const uint64_t DOUBLE_SIGN_MASK; static const uint64_t DOUBLE_EXPONENT_MASK; static const uint64_t DOUBLE_MANTISSA_MASK; static const uint64_t DOUBLE_NAN_BITS; public: /// Return given time in milliseconds. static uint64_t getTimeMillis(boost::posix_time::ptime time); /// Returns the current time in milliseconds. static uint64_t currentTimeMillis(); /// This over-allocates proportional to the list size, making room for additional growth. /// The over-allocation is mild, but is enough to give linear-time amortized behavior over a long /// sequence of appends(). /// The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... static int32_t getNextSize(int32_t targetSize); /// Only reallocate if we are "substantially" smaller. This saves us from "running hot" (constantly /// making a bit bigger then a bit smaller, over and over) static int32_t getShrinkSize(int32_t currentSize, int32_t targetSize); /// Compares two byte[] arrays, element by element, and returns the number of elements common to /// both arrays. /// @param bytes1 The first byte[] to compare /// @param bytes2 The second byte[] to compare /// @return The number of common elements. static int32_t bytesDifference(uint8_t* bytes1, int32_t len1, uint8_t* bytes2, int32_t len2); template static int32_t hashLucene(TYPE type) { return type->hashCode(); } template static int32_t hashNumeric(TYPE type) { return type; } template static int32_t hashCode(ITER first, ITER last, PRED pred) { int32_t code = 0; for (ITER hash = first; hash != last; ++hash) code = code * 31 + pred(*hash); return code; } /// Returns hash of chars in range start (inclusive) to end (inclusive) static int32_t hashCode(const wchar_t* array, int32_t start, int32_t end); /// Returns hash of bytes in range start (inclusive) to end (inclusive) static int32_t hashCode(const uint8_t* array, int32_t start, int32_t end); /// Returns hash code of given boolean static int32_t hashCode(bool value); /// Copy elements from on buffer to another template static void arrayCopy(SOURCE source, int32_t sourceOffset, DEST dest, int32_t destOffset, int32_t length) { std::copy(source + sourceOffset, source + sourceOffset + length, dest + destOffset); } /// Fill buffer with given element template static void arrayFill(DEST dest, int32_t destFrom, int32_t destTo, FILL value) { std::fill(dest + destFrom, dest + destTo, value); } /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point /// "single format" bit layout. static int32_t doubleToIntBits(double value); /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point /// "single format" bit layout, preserving Not-a-Number (NaN) values. static int32_t doubleToRawIntBits(double value); /// Returns the float value corresponding to a given bit representation. The argument is considered to be a /// representation of a floating-point value according to the IEEE 754 floating-point "single format" bit layout. static double intBitsToDouble(int32_t bits); /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point /// "double format" bit layout. static int64_t doubleToLongBits(double value); /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point /// "double format" bit layout, preserving Not-a-Number (NaN) values. static int64_t doubleToRawLongBits(double value); /// Returns the double value corresponding to a given bit representation. The argument is considered to be a /// representation of a floating-point value according to the IEEE 754 floating-point "double format" bit layout. static double longBitsToDouble(int64_t bits); /// Returns true if the specified number is infinitely large in magnitude, false otherwise. static bool isInfinite(double value); /// Returns true if this Double value is a Not-a-Number (NaN), false otherwise. static bool isNaN(double value); /// Return whether given Lucene object is of a specified type template static bool typeOf(LuceneObjectPtr object) { return boost::dynamic_pointer_cast(object); } /// Return whether given Lucene objects are of equal type. static bool equalTypes(LuceneObjectPtr first, LuceneObjectPtr second); /// Perform unsigned right-shift (left bits are zero filled) static int64_t unsignedShift(int64_t num, int64_t shift); /// Perform unsigned right-shift (left bits are zero filled) static int32_t unsignedShift(int32_t num, int32_t shift); }; } #endif LucenePlusPlus-rel_3.0.4/include/MultiFieldQueryParser.h000066400000000000000000000150071217574114600233530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTIFIELDQUERYPARSER_H #define MULTIFIELDQUERYPARSER_H #include "QueryParser.h" #include "BooleanClause.h" namespace Lucene { /// A QueryParser which constructs queries to search multiple fields. class LPPAPI MultiFieldQueryParser : public QueryParser { public: /// Creates a MultiFieldQueryParser. Allows passing of a map with term to Boost, and the boost to /// apply to each term. /// /// It will, when parse(String query) is called, construct a query like this (assuming the query /// consists of two terms and you specify the two fields title and body): ///
        /// (title:term1 body:term1) (title:term2 body:term2)
        /// 
/// /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: ///
        /// +(title:term1 body:term1) +(title:term2 body:term2)
        /// 
/// /// When you pass a boost (title=>5 body=>10) you can get: ///
        /// +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
        /// 
/// /// In other words, all the query's terms must appear, but it doesn't matter in what fields they /// appear. MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, AnalyzerPtr analyzer, MapStringDouble boosts); /// Creates a MultiFieldQueryParser. It will, when parse(String query) is called, construct a /// query like this (assuming the query consists of two terms and you specify the two fields /// title and body): ///
        /// (title:term1 body:term1) (title:term2 body:term2)
        /// 
/// /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: ///
        /// +(title:term1 body:term1) +(title:term2 body:term2)
        /// 
/// /// In other words, all the query's terms must appear, but it doesn't matter in what fields they /// appear. MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, AnalyzerPtr analyzer); virtual ~MultiFieldQueryParser(); LUCENE_CLASS(MultiFieldQueryParser); protected: Collection fields; MapStringDouble boosts; public: using QueryParser::parse; /// Parses a query which searches on the fields specified. /// /// If x fields are specified, this effectively constructs: ///
        /// (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
        /// 
/// @param matchVersion Lucene version to match; this is passed through to QueryParser. /// @param queries Queries strings to parse /// @param fields Fields to search on /// @param analyzer Analyzer to use static QueryPtr parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, AnalyzerPtr analyzer); /// Parses a query, searching on the fields specified. Use this if you need to specify certain fields as /// required, and others as prohibited. /// ///
        /// Usage:
        /// Collection fields = newCollection(L"filename", L"contents", L"description");
        /// Collection flags = newCollection(BooleanClause::SHOULD, BooleanClause::MUST, BooleanClause::MUST_NOT);
        /// MultiFieldQueryParser::parse(L"query", fields, flags, analyzer);
        /// 
/// /// The code above would construct a query: ///
        /// (filename:query) +(contents:query) -(description:query)
        /// 
/// /// @param matchVersion Lucene version to match; this is passed through to QueryParser. /// @param query Query string to parse /// @param fields Fields to search on /// @param flags Flags describing the fields /// @param analyzer Analyzer to use static QueryPtr parse(LuceneVersion::Version matchVersion, const String& query, Collection fields, Collection flags, AnalyzerPtr analyzer); /// Parses a query, searching on the fields specified. Use this if you need to specify certain fields as /// required, and others as prohibited. /// ///
        /// Usage:
        /// Collection query = newCollection(L"query1", L"query2", L"query3");
        /// Collection fields = newCollection(L"filename", L"contents", L"description");
        /// Collection flags = newCollection(BooleanClause::SHOULD, BooleanClause::MUST, BooleanClause::MUST_NOT);
        /// MultiFieldQueryParser::parse(query, fields, flags, analyzer);
        /// 
/// /// The code above would construct a query: ///
        /// (filename:query1) +(contents:query2) -(description:query3)
        /// 
/// /// @param matchVersion Lucene version to match; this is passed through to QueryParser. /// @param queries Queries string to parse /// @param fields Fields to search on /// @param flags Flags describing the fields /// @param analyzer Analyzer to use static QueryPtr parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, Collection flags, AnalyzerPtr analyzer); protected: virtual QueryPtr getFieldQuery(const String& field, const String& queryText, int32_t slop); virtual QueryPtr getFieldQuery(const String& field, const String& queryText); void applySlop(QueryPtr query, int32_t slop); virtual QueryPtr getFuzzyQuery(const String& field, const String& termStr, double minSimilarity); virtual QueryPtr getPrefixQuery(const String& field, const String& termStr); virtual QueryPtr getWildcardQuery(const String& field, const String& termStr); virtual QueryPtr getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); }; } #endif LucenePlusPlus-rel_3.0.4/include/MultiLevelSkipListReader.h000066400000000000000000000113761217574114600240070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTILEVELSKIPLISTREADER_H #define MULTILEVELSKIPLISTREADER_H #include "IndexInput.h" namespace Lucene { /// This abstract class reads skip lists with multiple levels. /// /// See {@link MultiLevelSkipListWriter} for the information about the encoding of the multi level skip lists. /// /// Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)} which defines the /// actual format of the skip data. class MultiLevelSkipListReader : public LuceneObject { public: MultiLevelSkipListReader(IndexInputPtr skipStream, int32_t maxSkipLevels, int32_t skipInterval); virtual ~MultiLevelSkipListReader(); LUCENE_CLASS(MultiLevelSkipListReader); protected: /// the maximum number of skip levels possible for this index int32_t maxNumberOfSkipLevels; /// number of levels in this skip list int32_t numberOfSkipLevels; /// Defines the number of top skip levels to buffer in memory. Reducing this number results in less /// memory usage, but possibly slower performance due to more random I/Os. Please notice that the space /// each level occupies is limited by the skipInterval. The top level can not contain more than /// skipLevel entries, the second top level can not contain more than skipLevel^2 entries and so forth. int32_t numberOfLevelsToBuffer; int32_t docCount; bool haveSkipped; Collection skipStream; // skipStream for each level Collection skipPointer; // the start pointer of each skip level Collection skipInterval; // skipInterval of each level Collection numSkipped; // number of docs skipped per level Collection skipDoc; // doc id of current skip entry per level int32_t lastDoc; // doc id of last read skip entry with docId <= target Collection childPointer; // child pointer of current skip entry per level int64_t lastChildPointer; // childPointer of last read skip entry with docId <= target bool inputIsBuffered; public: /// Returns the id of the doc to which the last call of {@link #skipTo(int)} has skipped. virtual int32_t getDoc(); /// Skips entries to the first beyond the current whose document number is greater than or equal to /// target. Returns the current doc count. virtual int32_t skipTo(int32_t target); virtual void close(); /// Initializes the reader. virtual void init(int64_t skipPointer, int32_t df); protected: virtual bool loadNextSkip(int32_t level); /// Seeks the skip entry on the given level virtual void seekChild(int32_t level); /// Loads the skip levels virtual void loadSkipLevels(); /// Subclasses must implement the actual skip data encoding in this method. /// /// @param level the level skip data shall be read from /// @param skipStream the skip stream to read from virtual int32_t readSkipData(int32_t level, IndexInputPtr skipStream) = 0; /// Copies the values of the last read skip entry on this level virtual void setLastSkipData(int32_t level); }; /// Used to buffer the top skip levels class SkipBuffer : public IndexInput { public: SkipBuffer(IndexInputPtr input, int32_t length); virtual ~SkipBuffer(); LUCENE_CLASS(SkipBuffer); protected: ByteArray data; int64_t pointer; int32_t pos; public: /// Closes the stream to further operations. virtual void close(); /// Returns the current position in this file, where the next read will occur. virtual int64_t getFilePointer(); /// The number of bytes in the file. virtual int64_t length(); /// Reads and returns a single byte. virtual uint8_t readByte(); /// Reads a specified number of bytes into an array at the specified offset. virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Sets current position in this file, where the next read will occur. virtual void seek(int64_t pos); }; } #endif LucenePlusPlus-rel_3.0.4/include/MultiLevelSkipListWriter.h000066400000000000000000000060731217574114600240570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTILEVELSKIPLISTWRITER_H #define MULTILEVELSKIPLISTWRITER_H #include "LuceneObject.h" namespace Lucene { /// This abstract class writes skip lists with multiple levels. /// /// Example for skipInterval = 3: /// /// c (skip level 2) /// c c c (skip level 1) /// x x x x x x x x x x (skip level 0) /// d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) /// 3 6 9 12 15 18 21 24 27 30 (df) /// /// d - document /// x - skip data /// c - skip data with child pointer /// /// Skip level i contains every skipInterval-th entry from skip level i-1. /// Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))). /// /// Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1. /// This guarantees a logarithmic amount of skips to find the target document. /// /// While this class takes care of writing the different skip levels, subclasses must define the /// actual format of the skip data. class MultiLevelSkipListWriter : public LuceneObject { public: MultiLevelSkipListWriter(int32_t skipInterval, int32_t maxSkipLevels, int32_t df); virtual ~MultiLevelSkipListWriter(); LUCENE_CLASS(MultiLevelSkipListWriter); protected: /// number of levels in this skip list int32_t numberOfSkipLevels; /// the skip interval in the list with level = 0 int32_t skipInterval; /// for every skip level a different buffer is used Collection skipBuffer; public: /// Writes the current skip data to the buffers. The current document frequency determines /// the max level is skip data is to be written to. /// @param df the current document frequency void bufferSkip(int32_t df); /// Writes the buffered skip lists to the given output. /// @param output the IndexOutput the skip lists shall be written to /// @return the pointer the skip list starts int64_t writeSkip(IndexOutputPtr output); protected: void init(); virtual void resetSkip(); /// Subclasses must implement the actual skip data encoding in this method. /// @param level the level skip data shall be writing for /// @param skipBuffer the skip buffer to write to virtual void writeSkipData(int32_t level, IndexOutputPtr skipBuffer) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/MultiPhraseQuery.h000066400000000000000000000056771217574114600224110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTIPHRASEQUERY_H #define MULTIPHRASEQUERY_H #include "Query.h" namespace Lucene { /// MultiPhraseQuery is a generalized version of PhraseQuery, with an added method {@link #add(Term[])}. /// To use this class, to search for the phrase "Microsoft app*" first use add(Term) on the term "Microsoft", /// then find all terms that have "app" as prefix using IndexReader.terms(Term), and use /// MultiPhraseQuery.add(Term[] terms) to add them to the query. class LPPAPI MultiPhraseQuery : public Query { public: MultiPhraseQuery(); virtual ~MultiPhraseQuery(); LUCENE_CLASS(MultiPhraseQuery); protected: String field; Collection< Collection > termArrays; Collection positions; int32_t slop; public: using Query::toString; /// Sets the phrase slop for this query. /// @see PhraseQuery#setSlop(int32_t) void setSlop(int32_t s); /// Gets the phrase slop for this query. /// @see PhraseQuery#getSlop() int32_t getSlop(); /// Add a single term at the next position in the phrase. /// @see PhraseQuery#add(Term) void add(TermPtr term); /// Add multiple terms at the next position in the phrase. Any of the terms may match. /// @see PhraseQuery#add(Term) void add(Collection terms); /// Allows to specify the relative position of terms within the phrase. /// @see PhraseQuery#add(Term, int) void add(Collection terms, int32_t position); /// Returns a List of the terms in the multiphrase. Do not modify the List or its contents. Collection< Collection > getTermArrays(); /// Returns the relative positions of terms in this phrase. Collection getPositions(); virtual void extractTerms(SetTerm terms); virtual QueryPtr rewrite(IndexReaderPtr reader); virtual WeightPtr createWeight(SearcherPtr searcher); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); protected: int32_t termArraysHashCode(); bool termArraysEquals(Collection< Collection > first, Collection< Collection > second); friend class MultiPhraseWeight; }; } #endif LucenePlusPlus-rel_3.0.4/include/MultiReader.h000066400000000000000000000141601217574114600213260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTIREADER_H #define MULTIREADER_H #include "IndexReader.h" namespace Lucene { /// An IndexReader which reads multiple indexes, appending their content. class LPPAPI MultiReader : public IndexReader { public: /// Construct a MultiReader aggregating the named set of (sub)readers. Directory locking for delete, /// undeleteAll, and setNorm operations is left to the subreaders. /// @param closeSubReaders indicates whether the subreaders should be closed when this MultiReader is closed /// @param subReaders set of (sub)readers MultiReader(Collection subReaders, bool closeSubReaders = true); virtual ~MultiReader(); LUCENE_CLASS(MultiReader); protected: Collection subReaders; Collection starts; // 1st docno for each segment Collection decrefOnClose; // remember which subreaders to decRef on close MapStringByteArray normsCache; int32_t _maxDoc; int32_t _numDocs; bool _hasDeletions; public: /// Tries to reopen the subreaders. /// /// If one or more subreaders could be re-opened (ie. subReader.reopen() returned a new instance != subReader), /// then a new MultiReader instance is returned, otherwise this instance is returned. /// /// A re-opened instance might share one or more subreaders with the old instance. Index modification /// operations result in undefined behavior when performed before the old instance is closed. (see {@link /// IndexReader#reopen()}). /// /// If subreaders are shared, then the reference count of those readers is increased to ensure that the /// subreaders remain open until the last referring reader is closed. virtual IndexReaderPtr reopen(); /// Clones the subreaders. (see {@link IndexReader#clone()}). /// /// If subreaders are shared, then the reference count of those readers is increased to ensure that the /// subreaders remain open until the last referring reader is closed. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual Collection getTermFreqVectors(int32_t docNumber); virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); virtual bool isOptimized(); /// Returns the number of documents in this index. virtual int32_t numDocs(); /// Returns one greater than the largest possible document number. virtual int32_t maxDoc(); /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine /// what {@link Field}s to load and how they should be loaded. virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n); /// Returns true if any documents have been deleted virtual bool hasDeletions(); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. virtual ByteArray norms(const String& field); /// Reads the byte-encoded normalization factor for the named field of every document. virtual void norms(const String& field, ByteArray norms, int32_t offset); /// Returns an enumeration of all the terms in the index. virtual TermEnumPtr terms(); /// Returns an enumeration of all terms starting at a given term. virtual TermEnumPtr terms(TermPtr t); /// Returns the number of documents containing the term t. virtual int32_t docFreq(TermPtr t); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs(); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions(); /// Get a list of unique field names that exist in this index and have the specified field option /// information. virtual HashSet getFieldNames(FieldOption fieldOption); /// Checks recursively if all subreaders are up to date. virtual bool isCurrent(); /// Not implemented. virtual int64_t getVersion(); /// Returns the sequential sub readers that this reader is logically composed of. virtual Collection getSequentialSubReaders(); protected: /// If clone is true then we clone each of the subreaders /// @param doClone /// @return New IndexReader, or same one (this) if reopen/clone is not necessary IndexReaderPtr doReopen(bool doClone); /// Implements deletion of the document numbered docNum. virtual void doDelete(int32_t docNum); /// Implements actual undeleteAll() in subclass. virtual void doUndeleteAll(); /// Find reader for doc n int32_t readerIndex(int32_t n); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); virtual void doCommit(MapStringString commitUserData); /// Implements close. virtual void doClose(); }; } #endif LucenePlusPlus-rel_3.0.4/include/MultiSearcher.h000066400000000000000000000054421217574114600216630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTISEARCHER_H #define MULTISEARCHER_H #include "Searcher.h" #include "Collector.h" namespace Lucene { /// Implements search over a set of Searchables. /// /// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or {@link /// #search(QueryPtr, FilterPtr, int32_t)} methods. class LPPAPI MultiSearcher : public Searcher { public: /// Creates a searcher which searches searchers. MultiSearcher(Collection searchables); virtual ~MultiSearcher(); LUCENE_CLASS(MultiSearcher); protected: Collection searchables; Collection starts; int32_t _maxDoc; public: using Searcher::search; /// Return the array of {@link Searchable}s this searches. Collection getSearchables(); virtual void close(); virtual int32_t docFreq(TermPtr term); virtual DocumentPtr doc(int32_t n); virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector); /// Returns index of the searcher for document n in the array used to construct this searcher. int32_t subSearcher(int32_t n); /// Returns the document number of document n within its sub-index. int32_t subDoc(int32_t n); virtual int32_t maxDoc(); virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n); virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort); virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr results); virtual QueryPtr rewrite(QueryPtr query); virtual ExplanationPtr explain(WeightPtr weight, int32_t doc); protected: Collection getStarts(); /// Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query. /// 2. extract necessary terms. /// 3. collect dfs for these terms from the Searchables. /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables. /// 6. merge results. /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// @return rewritten queries virtual WeightPtr createWeight(QueryPtr query); }; } #endif LucenePlusPlus-rel_3.0.4/include/MultiTermQuery.h000066400000000000000000000207101217574114600220570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTITERMQUERY_H #define MULTITERMQUERY_H #include "Query.h" namespace Lucene { /// An abstract {@link Query} that matches documents containing a subset of terms provided by a {@link /// FilteredTermEnum} enumeration. /// /// This query cannot be used directly; you must subclass it and define {@link #getEnum} to provide a /// {@link FilteredTermEnum} that iterates through the terms to be matched. /// /// NOTE: if {@link #setRewriteMethod} is either {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link /// #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a {@link BooleanQuery.TooManyClauses} exception /// during searching, which happens when the number of terms to be searched exceeds {@link /// BooleanQuery#getMaxClauseCount()}. Setting {@link #setRewriteMethod} to {@link /// #CONSTANT_SCORE_FILTER_REWRITE} prevents this. /// /// The recommended rewrite method is {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU /// computing unhelpful scores, and it tries to pick the most performant rewrite method given the query. /// /// Note that {@link QueryParser} produces MultiTermQueries using {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} /// by default. class LPPAPI MultiTermQuery : public Query { public: MultiTermQuery(); virtual ~MultiTermQuery(); LUCENE_CLASS(MultiTermQuery); protected: RewriteMethodPtr rewriteMethod; int32_t numberOfTerms; public: /// A rewrite method that first creates a private Filter, by visiting each term in sequence and marking /// all docs for that term. Matching documents are assigned a constant score equal to the query's boost. /// /// This method is faster than the BooleanQuery rewrite methods when the number of matched terms or matched /// documents is non-trivial. Also, it will never hit an errant TooManyClauses exception. /// /// @see #setRewriteMethod static RewriteMethodPtr CONSTANT_SCORE_FILTER_REWRITE(); /// A rewrite method that first translates each term into {@link BooleanClause.Occur#SHOULD} clause in a /// BooleanQuery, and keeps the scores as computed by the query. Note that typically such scores are /// meaningless to the user, and require non-trivial CPU to compute, so it's almost always better to use /// {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead. /// /// NOTE: This rewrite method will hit {@link BooleanQuery.TooManyClauses} if the number of terms exceeds /// {@link BooleanQuery#getMaxClauseCount}. /// /// @see #setRewriteMethod static RewriteMethodPtr SCORING_BOOLEAN_QUERY_REWRITE(); /// Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except scores are not computed. Instead, each matching /// document receives a constant score equal to the query's boost. /// /// NOTE: This rewrite method will hit TooManyClauses if the number of terms exceeds {@link /// BooleanQuery#getMaxClauseCount}. /// /// @see #setRewriteMethod static RewriteMethodPtr CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE(); /// Read-only default instance of {@link ConstantScoreAutoRewrite}, with {@link /// ConstantScoreAutoRewrite#setTermCountCutoff} set to {@link ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF} /// and {@link ConstantScoreAutoRewrite#setDocCountPercent} set to {@link /// ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}. Note that you cannot alter the configuration of /// this instance; you'll need to create a private instance instead. static RewriteMethodPtr CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); /// Return the number of unique terms visited during execution of the query. If there are many of them, /// you may consider using another query type or optimize your total term count in index. /// /// This method is not thread safe, be sure to only call it when no query is running! If you re-use the /// same query instance for another search, be sure to first reset the term counter with {@link /// #clearTotalNumberOfTerms}. /// /// On optimized indexes / no MultiReaders, you get the correct number of unique terms for the whole index. /// Use this number to compare different queries. For non-optimized indexes this number can also be achieved /// in non-constant-score mode. In constant-score mode you get the total number of terms seeked for all /// segments / sub-readers. /// @see #clearTotalNumberOfTerms int32_t getTotalNumberOfTerms(); /// Resets the counting of unique terms. Do this before executing the query/filter. /// @see #getTotalNumberOfTerms void clearTotalNumberOfTerms(); virtual QueryPtr rewrite(IndexReaderPtr reader); /// @see #setRewriteMethod virtual RewriteMethodPtr getRewriteMethod(); /// Sets the rewrite method to be used when executing the query. You can use one of the four core methods, /// or implement your own subclass of {@link RewriteMethod}. virtual void setRewriteMethod(RewriteMethodPtr method); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); protected: /// Construct the enumeration to be used, expanding the pattern term. virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader) = 0; void incTotalNumberOfTerms(int32_t inc); friend class MultiTermQueryWrapperFilter; friend class ScoringBooleanQueryRewrite; friend class ConstantScoreAutoRewrite; }; /// Abstract class that defines how the query is rewritten. class LPPAPI RewriteMethod : public LuceneObject { public: virtual ~RewriteMethod(); LUCENE_CLASS(RewriteMethod); public: virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) = 0; }; /// A rewrite method that tries to pick the best constant-score rewrite method based on term and document /// counts from the query. If both the number of terms and documents is small enough, then {@link /// #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used. Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is /// used. class LPPAPI ConstantScoreAutoRewrite : public RewriteMethod { public: ConstantScoreAutoRewrite(); virtual ~ConstantScoreAutoRewrite(); LUCENE_CLASS(ConstantScoreAutoRewrite); public: // Defaults derived from rough tests with a 20.0 million doc Wikipedia index. With more than 350 terms // in the query, the filter method is fastest static const int32_t DEFAULT_TERM_COUNT_CUTOFF; // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest static const double DEFAULT_DOC_COUNT_PERCENT; protected: int32_t termCountCutoff; double docCountPercent; public: /// If the number of terms in this query is equal to or larger than this setting then {@link /// #CONSTANT_SCORE_FILTER_REWRITE} is used. virtual void setTermCountCutoff(int32_t count); /// @see #setTermCountCutoff virtual int32_t getTermCountCutoff(); /// If the number of documents to be visited in the postings exceeds this specified percentage of the /// maxDoc() for the index, then {@link #CONSTANT_SCORE_FILTER_REWRITE} is used. /// @param percent 0.0 to 100.0 virtual void setDocCountPercent(double percent); /// @see #setDocCountPercent virtual double getDocCountPercent(); virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); }; } #endif LucenePlusPlus-rel_3.0.4/include/MultiTermQueryWrapperFilter.h000066400000000000000000000046271217574114600245770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTITERMQUERYWRAPPERFILTER_H #define MULTITERMQUERYWRAPPERFILTER_H #include "Filter.h" namespace Lucene { /// A wrapper for {@link MultiTermQuery}, that exposes its functionality as a {@link Filter}. /// /// MultiTermQueryWrapperFilter is not designed to be used by itself. Normally you subclass it to /// provide a Filter counterpart for a {@link MultiTermQuery} subclass. /// /// For example, {@link TermRangeFilter} and {@link PrefixFilter} extend MultiTermQueryWrapperFilter. /// This class also provides the functionality behind {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}; /// this is why it is not abstract. class LPPAPI MultiTermQueryWrapperFilter : public Filter { INTERNAL: /// Wrap a {@link MultiTermQuery} as a Filter. MultiTermQueryWrapperFilter(MultiTermQueryPtr query); public: virtual ~MultiTermQueryWrapperFilter(); LUCENE_CLASS(MultiTermQueryWrapperFilter); protected: MultiTermQueryPtr query; public: virtual String toString(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); /// Return the number of unique terms visited during execution of the filter. If there are many of them, /// you may consider using another filter type or optimize your total term count in index. /// /// This method is not thread safe, be sure to only call it when no filter is running! If you re-use the /// same filter instance for another search, be sure to first reset the term counter with {@link /// #clearTotalNumberOfTerms}. /// @see #clearTotalNumberOfTerms int32_t getTotalNumberOfTerms(); /// Resets the counting of unique terms. Do this before executing the filter. /// @see #getTotalNumberOfTerms void clearTotalNumberOfTerms(); /// Returns a DocIdSet with documents that should be permitted in search results. virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/MultipleTermPositions.h000066400000000000000000000033041217574114600234420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTIPLETERMPOSITIONS_H #define MULTIPLETERMPOSITIONS_H #include "TermPositions.h" namespace Lucene { /// Allows you to iterate over the {@link TermPositions} for multiple {@link Term}s as a single /// {@link TermPositions}. class LPPAPI MultipleTermPositions : public TermPositions, public LuceneObject { public: MultipleTermPositions(IndexReaderPtr indexReader, Collection terms); virtual ~MultipleTermPositions(); LUCENE_CLASS(MultipleTermPositions); protected: int32_t _doc; int32_t _freq; TermPositionsQueuePtr termPositionsQueue; IntQueuePtr posList; public: virtual bool next(); virtual int32_t nextPosition(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t freq(); virtual void close(); /// Not implemented. virtual void seek(TermPtr term); /// Not implemented. virtual void seek(TermEnumPtr termEnum); /// Not implemented. virtual int32_t read(Collection docs, Collection freqs); /// Not implemented. virtual ByteArray getPayload(ByteArray data, int32_t offset); /// @return false virtual bool isPayloadAvailable(); }; } #endif LucenePlusPlus-rel_3.0.4/include/NativeFSLockFactory.h000066400000000000000000000034131217574114600227300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NATIVEFSLOCKFACTORY_H #define NATIVEFSLOCKFACTORY_H #include "FSLockFactory.h" namespace Lucene { /// Implements {@link LockFactory} using native file lock. /// @see LockFactory class LPPAPI NativeFSLockFactory : public FSLockFactory { public: /// Create a NativeFSLockFactory instance, storing lock files into /// the specified lockDirName. /// @param lockDirName where lock files are created. NativeFSLockFactory(const String& lockDirName = EmptyString); virtual ~NativeFSLockFactory(); LUCENE_CLASS(NativeFSLockFactory); public: /// Return a new Lock instance identified by lockName. /// @param lockName name of the lock to be created. virtual LockPtr makeLock(const String& lockName); /// Attempt to clear (forcefully unlock and remove) the /// specified lock. Only call this at a time when you are /// certain this lock is no longer in use. /// @param lockName name of the lock to be cleared. virtual void clearLock(const String& lockName); protected: /// Simple test to verify locking system is "working". On NFS, if /// it's mis-configured, you can hit long (35 second) timeouts which /// cause Lock.obtain to take far too long (it assumes the obtain() /// call takes zero time). void acquireTestLock(); }; } #endif LucenePlusPlus-rel_3.0.4/include/NearSpansOrdered.h000066400000000000000000000072751217574114600223210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NEARSPANSORDERED_H #define NEARSPANSORDERED_H #include "Spans.h" namespace Lucene { /// A Spans that is formed from the ordered subspans of a SpanNearQuery where the subspans do not overlap /// and have a maximum slop between them. /// /// The formed spans only contains minimum slop matches. The matching slop is computed from the distance(s) /// between the non overlapping matching Spans. /// /// Successive matches are always formed from the successive Spans of the SpanNearQuery. /// /// The formed spans may contain overlaps when the slop is at least 1. For example, when querying using ///
t1 t2 t3
/// with slop at least 1, the fragment: ///
t1 t2 t1 t3 t2 t3
/// matches twice: ///
t1 t2 .. t3      
///
      t1 .. t2 t3
/// /// Note: Only public for subclassing. Most implementations should not need this class class LPPAPI NearSpansOrdered : public Spans { public: NearSpansOrdered(SpanNearQueryPtr spanNearQuery, IndexReaderPtr reader, bool collectPayloads = true); virtual ~NearSpansOrdered(); LUCENE_CLASS(NearSpansOrdered); protected: int32_t allowedSlop; bool firstTime; bool more; /// The spans in the same order as the SpanNearQuery Collection subSpans; /// Indicates that all subSpans have same doc() bool inSameDoc; int32_t matchDoc; int32_t matchStart; int32_t matchEnd; Collection matchPayload; Collection subSpansByDoc; SpanNearQueryPtr query; bool collectPayloads; public: virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); Collection getSubSpans(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual bool next(); virtual bool skipTo(int32_t target); /// Check whether two Spans in the same document are ordered. /// @return true if spans1 starts before spans2 or the spans start at the same position, and /// spans1 ends before spans2. static bool docSpansOrdered(SpansPtr spans1, SpansPtr spans2); virtual String toString(); protected: /// Advances the subSpans to just after an ordered match with a minimum slop that is smaller than the /// slop allowed by the SpanNearQuery. /// @return true if there is such a match. bool advanceAfterOrdered(); /// Advance the subSpans to the same document. bool toSameDoc(); // Like {@link #docSpansOrdered(SpansPtr, SpansPtr)}, but use the spans starts and ends as parameters. static bool docSpansOrdered(int32_t start1, int32_t end1, int32_t start2, int32_t end2); /// Order the subSpans within the same document by advancing all later spans after the previous one. bool stretchToOrder(); /// The subSpans are ordered in the same doc, so there is a possible match. Compute the slop while /// making the match as short as possible by advancing all subSpans except the last one in reverse order. bool shrinkToAfterShortestMatch(); }; } #endif LucenePlusPlus-rel_3.0.4/include/NearSpansUnordered.h000066400000000000000000000041611217574114600226530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NEARSPANSUNORDERED_H #define NEARSPANSUNORDERED_H #include "Spans.h" namespace Lucene { /// Similar to {@link NearSpansOrdered}, but for the unordered case. /// /// Only public for subclassing. Most implementations should not need this class class LPPAPI NearSpansUnordered : public Spans { public: NearSpansUnordered(SpanNearQueryPtr query, IndexReaderPtr reader); virtual ~NearSpansUnordered(); LUCENE_CLASS(NearSpansUnordered); protected: SpanNearQueryPtr query; IndexReaderPtr reader; Collection ordered; // spans in query order Collection subSpans; int32_t slop; // from query SpansCellPtr first; // linked list of spans SpansCellPtr last; // sorted by doc only int32_t totalLength; // sum of current lengths CellQueuePtr queue; // sorted queue of spans SpansCellPtr max; // max element in queue bool more; // true if not done bool firstTime; // true before first next() public: virtual void initialize(); Collection getSubSpans(); virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); protected: SpansCellPtr min(); void initList(bool next); void addToList(SpansCellPtr cell); void firstToLast(); void queueToList(); void listToQueue(); bool atMatch(); friend class SpansCell; }; } #endif LucenePlusPlus-rel_3.0.4/include/NoLockFactory.h000066400000000000000000000024501217574114600216250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NOLOCKFACTORY_H #define NOLOCKFACTORY_H #include "LockFactory.h" namespace Lucene { /// Use this {@link LockFactory} to disable locking entirely. Only one instance of this lock is created. /// You should call {@link #getNoLockFactory()} to get the instance. /// /// @see LockFactory class LPPAPI NoLockFactory : public LockFactory { public: virtual ~NoLockFactory(); LUCENE_CLASS(NoLockFactory); private: static NoLockPtr getSingletonLock(); public: static NoLockFactoryPtr getNoLockFactory(); /// Return a new Lock instance identified by lockName. virtual LockPtr makeLock(const String& lockName); /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you /// are certain this lock is no longer in use. virtual void clearLock(const String& lockName); }; } #endif LucenePlusPlus-rel_3.0.4/include/NormalizeCharMap.h000066400000000000000000000022761217574114600223120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NORMALIZECHARMAP_H #define NORMALIZECHARMAP_H #include "LuceneObject.h" namespace Lucene { /// Holds a map of String input to String output, to be used with {@link MappingCharFilter}. class LPPAPI NormalizeCharMap : public LuceneObject { public: NormalizeCharMap(); virtual ~NormalizeCharMap(); LUCENE_CLASS(NormalizeCharMap); public: MapCharNormalizeCharMap submap; String normStr; int32_t diff; public: /// Records a replacement to be applied to the inputs stream. Whenever singleMatch occurs in the input, it /// will be replaced with replacement. /// /// @param singleMatch input String to be replaced /// @param replacement output String void add(const String& singleMatch, const String& replacement); }; } #endif LucenePlusPlus-rel_3.0.4/include/NormsWriter.h000066400000000000000000000030451217574114600214040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NORMSWRITER_H #define NORMSWRITER_H #include "InvertedDocEndConsumer.h" namespace Lucene { /// Writes norms. Each thread X field accumulates the norms for the doc/fields it saw, then the flush method /// below merges all of these together into a single _X.nrm file. class NormsWriter : public InvertedDocEndConsumer { public: NormsWriter(); virtual ~NormsWriter(); LUCENE_CLASS(NormsWriter); protected: FieldInfosPtr fieldInfos; public: virtual InvertedDocEndConsumerPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread); virtual void abort(); // We only write the _X.nrm file at flush virtual void files(HashSet files); virtual void setFieldInfos(FieldInfosPtr fieldInfos); /// Produce _X.nrm if any document had a field with norms not disabled virtual void flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, SegmentWriteStatePtr state); virtual void closeDocStore(SegmentWriteStatePtr state); protected: static uint8_t getDefaultNorm(); }; } #endif LucenePlusPlus-rel_3.0.4/include/NormsWriterPerField.h000066400000000000000000000027711217574114600230240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NORMSWRITERPERFIELD_H #define NORMSWRITERPERFIELD_H #include "InvertedDocEndConsumerPerField.h" namespace Lucene { /// Taps into DocInverter, as an InvertedDocEndConsumer, which is called at the end of inverting each field. /// We just look at the length for the field (docState.length) and record the norm. class NormsWriterPerField : public InvertedDocEndConsumerPerField { public: NormsWriterPerField(DocInverterPerFieldPtr docInverterPerField, NormsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo); virtual ~NormsWriterPerField(); LUCENE_CLASS(NormsWriterPerField); public: NormsWriterPerThreadWeakPtr _perThread; FieldInfoPtr fieldInfo; DocStatePtr docState; // Holds all docID/norm pairs we've seen Collection docIDs; ByteArray norms; int32_t upto; FieldInvertStatePtr fieldState; public: void reset(); virtual void abort(); /// Compare two objects virtual int32_t compareTo(LuceneObjectPtr other); virtual void finish(); }; } #endif LucenePlusPlus-rel_3.0.4/include/NormsWriterPerThread.h000066400000000000000000000022011217574114600231740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NORMSWRITERPERTHREAD_H #define NORMSWRITERPERTHREAD_H #include "InvertedDocEndConsumerPerThread.h" namespace Lucene { class NormsWriterPerThread : public InvertedDocEndConsumerPerThread { public: NormsWriterPerThread(DocInverterPerThreadPtr docInverterPerThread, NormsWriterPtr normsWriter); virtual ~NormsWriterPerThread(); LUCENE_CLASS(NormsWriterPerThread); public: NormsWriterWeakPtr _normsWriter; DocStatePtr docState; public: virtual InvertedDocEndConsumerPerFieldPtr addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo); virtual void abort(); virtual void startDocument(); virtual void finishDocument(); bool freeRAM(); }; } #endif LucenePlusPlus-rel_3.0.4/include/NumberTools.h000066400000000000000000000044001217574114600213560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMBERTOOLS_H #define NUMBERTOOLS_H #include "LuceneObject.h" namespace Lucene { /// Provides support for converting longs to Strings, and back again. The strings are structured so that /// lexicographic sorting order is preserved. /// /// That is, if l1 is less than l2 for any two longs l1 and l2, then NumberTools.longToString(l1) is /// lexicographically less than NumberTools.longToString(l2). (Similarly for "greater than" and "equals".) /// /// This class handles all long values (unlike {@link DateField}). /// /// @deprecated For new indexes use {@link NumericUtils} instead, which provides a sortable binary representation /// (prefix encoded) of numeric values. /// To index and efficiently query numeric values use {@link NumericField} and {@link NumericRangeQuery}. This /// class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0). class LPPAPI NumberTools : public LuceneObject { public: virtual ~NumberTools(); LUCENE_CLASS(NumberTools); protected: static const int32_t RADIX; static const wchar_t NEGATIVE_PREFIX; // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX static const wchar_t POSITIVE_PREFIX; public: /// Equivalent to longToString(LLONG_MIN) static const String& MIN_STRING_VALUE(); /// Equivalent to longToString(LLONG_MAX) static const String& MAX_STRING_VALUE(); /// The length of (all) strings returned by {@link #longToString} static int32_t STR_SIZE(); /// Converts a long to a String suitable for indexing. static String longToString(int64_t l); /// Converts a String that was returned by {@link #longToString} back to a long. static int64_t stringToLong(const String& str); }; } #endif LucenePlusPlus-rel_3.0.4/include/NumericField.h000066400000000000000000000177601217574114600214700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICFIELD_H #define NUMERICFIELD_H #include "Field.h" namespace Lucene { /// This class provides a {@link Field} that enables indexing of numeric values for efficient range filtering and /// sorting. The native types int32_t, int64_t and double are directly supported. However, any value that can be /// converted into these native types can also be indexed. For example, date/time values represented by a {@link /// Date} can be translated into a int64_t value. If you don't need millisecond precision, you can quantize the /// value, either by dividing the result or using the separate getters (for year, month, etc.) to construct an int32_t /// or int64_t value. /// /// To perform range querying or filtering against a NumericField, use {@link NumericRangeQuery} or {@link /// NumericRangeFilter}. To sort according to a NumericField, use the normal numeric sort types, eg {@link /// SortField#INT}. NumericField values can also be loaded directly from {@link FieldCache}. /// /// By default, a NumericField's value is not stored but is indexed for range filtering and sorting. You can use the /// {@link #NumericField(String,Field.Store,boolean)} constructor if you need to change these defaults. /// /// You may add the same field name as a NumericField to the same document more than once. Range querying and /// filtering will be the logical OR of all values; so a range query will hit all documents that have at least one /// value in the range. However sort behavior is not defined. If you need to sort, you should separately index a /// single-valued NumericField. /// /// A NumericField will consume somewhat more disk space in the index than an ordinary single-valued field. However, /// for a typical index that includes substantial textual content per document, this increase will likely be in the /// noise. /// /// Within Lucene, each numeric value is indexed as a trie structure, where each term is logically assigned to larger /// and larger pre-defined brackets (which are simply lower-precision representations of the value). The step size /// between each successive bracket is called the precisionStep, measured in bits. Smaller precisionStep values /// result in larger number of brackets, which consumes more disk space in the index but may result in faster range /// search performance. The default value 4 was selected for a reasonable trade off of disk space consumption versus /// performance. You can use the expert constructor {@link #NumericField(String,int,Field.Store,boolean)} if you'd /// like to change the value. Note that you must also specify a congruent value when creating {@link NumericRangeQuery} /// or {@link NumericRangeFilter}. For low cardinality fields larger precision steps are good. If the cardinality /// is < 100, it is fair to use {@link INT_MAX}, which produces one term per value. /// /// For more information on the internals of numeric trie indexing, including the precisionStep configuration, see /// {@link NumericRangeQuery}. The format of indexed values is described in {@link NumericUtils}. /// /// If you only need to sort by numeric value, and never run range querying/filtering, you can index using a /// precisionStep of {@link MAX_INT}. This will minimize disk space consumed. /// /// More advanced users can instead use {@link NumericTokenStream} directly, when indexing numbers. This class is a /// wrapper around this token stream type for easier, more intuitive usage. /// /// NOTE: This class is only used during indexing. When retrieving the stored field value from a {@link Document} /// instance after search, you will get a conventional {@link Fieldable} instance where the numeric values are /// returned as strings (according to toString(value) of the used data type). class LPPAPI NumericField : public AbstractField { public: /// Creates a field for numeric values using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} /// (4). The instance is not yet initialized with a numeric value, before indexing a document containing this field, /// set a value using the various set???Value() methods. /// This constructor creates an indexed, but not stored field. /// @param name the field name NumericField(const String& name); /// Creates a field for numeric values using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} /// (4). The instance is not yet initialized with a numeric value, before indexing a document containing this field, /// set a value using the various set???Value() methods. /// This constructor creates an indexed, but not stored field. /// @param name the field name /// @param store if the field should be stored in plain text form (according to toString(value) of the used /// data type) /// @param index if the field should be indexed using {@link NumericTokenStream} NumericField(const String& name, Field::Store store, bool index); /// Creates a field for numeric values with the specified precisionStep. The instance is not yet initialized with /// a numeric value, before indexing a document containing this field, set a value using the various set???Value() /// methods. This constructor creates an indexed, but not stored field. /// @param name the field name /// @param precisionStep the used precision step NumericField(const String& name, int32_t precisionStep); /// Creates a field for numeric values with the specified precisionStep. The instance is not yet initialized with /// a numeric value, before indexing a document containing this field, set a value using the various set???Value() /// methods. This constructor creates an indexed, but not stored field. /// @param name the field name /// @param precisionStep the used precision step /// @param store if the field should be stored in plain text form (according to toString(value) of the used /// data type) /// @param index if the field should be indexed using {@link NumericTokenStream} NumericField(const String& name, int32_t precisionStep, Field::Store store, bool index); virtual ~NumericField(); LUCENE_CLASS(NumericField); protected: NumericTokenStreamPtr tokenStream; public: /// Returns a {@link NumericTokenStream} for indexing the numeric value. virtual TokenStreamPtr tokenStreamValue(); /// Returns always null for numeric fields virtual ByteArray getBinaryValue(ByteArray result); /// Returns always null for numeric fields virtual ReaderPtr readerValue(); /// Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). virtual String stringValue(); /// Returns the current numeric value. virtual int64_t getNumericValue(); /// Initializes the field with the supplied long value. /// @param value the numeric value virtual NumericFieldPtr setLongValue(int64_t value); /// Initializes the field with the supplied int value. /// @param value the numeric value virtual NumericFieldPtr setIntValue(int32_t value); /// Initializes the field with the supplied double value. /// @param value the numeric value virtual NumericFieldPtr setDoubleValue(double value); }; } #endif LucenePlusPlus-rel_3.0.4/include/NumericRangeFilter.h000066400000000000000000000113631217574114600226400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICRANGEFILTER_H #define NUMERICRANGEFILTER_H #include "MultiTermQueryWrapperFilter.h" namespace Lucene { /// A {@link Filter} that only accepts numeric values within a specified range. To use this, you must first /// index the numeric values using {@link NumericField} ({@link NumericTokenStream}). /// /// You create a new NumericRangeFilter with the static factory methods, eg: ///
    /// FilterPtr f = NumericRangeFilter::newDoubleRange(L"weight", 0.3, 0.10, true, true);
    /// 
/// accepts all documents whose double valued "weight" field ranges from 0.3 to 0.10, inclusive. /// /// See {@link NumericRangeQuery} for details on how Lucene indexes and searches numeric valued fields. class LPPAPI NumericRangeFilter : public MultiTermQueryWrapperFilter { public: NumericRangeFilter(NumericRangeQueryPtr query); virtual ~NumericRangeFilter(); LUCENE_CLASS(NumericRangeFilter); public: /// Factory that creates a NumericRangeFilter, that filters a long range using the given precisionStep. static NumericRangeFilterPtr newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a long range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeFilterPtr newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int range using the given precisionStep. static NumericRangeFilterPtr newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeFilterPtr newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a double range using the given precisionStep. static NumericRangeFilterPtr newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a double range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeFilterPtr newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int, long or double range using the given /// precisionStep. You can have half-open ranges (which are in fact <= or >= queries) by setting the min /// or max value to VariantUtils::null(). By setting inclusive to false it will match all documents /// excluding the bounds, with inclusive on the boundaries are hits, too. static NumericRangeFilterPtr newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int, long or double range range using the default /// precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). You can have half-open ranges (which are in /// fact <= or >= queries) by setting the min or max value to VariantUtils::null(). By setting inclusive to false /// it will match all documents excluding the bounds, with inclusive on the boundaries are hits, too. static NumericRangeFilterPtr newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); /// Returns the field name for this filter String getField(); /// Returns true if the lower endpoint is inclusive bool includesMin(); /// Returns true if the upper endpoint is inclusive bool includesMax(); /// Returns the lower value of this range filter NumericValue getMin(); /// Returns the upper value of this range filter NumericValue getMax(); }; } #endif LucenePlusPlus-rel_3.0.4/include/NumericRangeQuery.h000066400000000000000000000276071217574114600225300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICRANGEQUERY_H #define NUMERICRANGEQUERY_H #include "MultiTermQuery.h" #include "FilteredTermEnum.h" #include "NumericUtils.h" namespace Lucene { /// A {@link Query} that matches numeric values within a specified range. To use this, you must first /// index the numeric values using {@link NumericField} (expert: {@link NumericTokenStream}). If your /// terms are instead textual, you should use {@link TermRangeQuery}. {@link NumericRangeFilter} is the /// filter equivalent of this query. /// /// You create a new NumericRangeQuery with the static factory methods, eg: ///
    /// QueryPtr q = NumericRangeQuery::newDoubleRange("weight", 0.3, 0.10, true, true);
    /// 
/// matches all documents whose double valued "weight" field ranges from 0.3 to 0.10, inclusive. /// /// The performance of NumericRangeQuery is much better than the corresponding {@link TermRangeQuery} /// because the number of terms that must be searched is usually far fewer, thanks to trie indexing, /// described below. /// /// You can optionally specify a precisionStep when creating this query. This is necessary if you've /// changed this configuration from its default (4) during indexing. Lower values consume more disk /// space but speed up searching. Suitable values are between 1 and 8. A good starting point to test /// is 4, which is the default value for all Numeric* classes. See below for details. /// /// This query defaults to {@linkplain MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} for 32 bit /// integer ranges with precisionStep <=8 and 64 bit (long/double) ranges with precisionStep <=6. /// Otherwise it uses {@linkplain MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} as the number of terms /// is likely to be high. With precision steps of <=4, this query can be run with one of the BooleanQuery /// rewrite methods without changing BooleanQuery's default max clause count. /// /// How it works /// /// See the publication about panFMP, where this /// algorithm was described (referred to as TrieRangeQuery): ///
Schindler, U, Diepenbroek, M, 2008. /// Generic XML-based Framework for Metadata Portals. /// Computers & Geosciences 34 (12), 1947-1955. /// doi:10.1016/j.cageo.2008.02.023
/// /// A quote from this paper: Because Apache Lucene is a full-text search engine and not a conventional /// database, it cannot handle numerical ranges (eg., field value is inside user defined bounds, even /// dates are numerical values). We have developed an extension to Apache Lucene that stores the /// numerical values in a special string-encoded format with variable precision (all numerical values like /// doubles, longs, and ints are converted to lexicographic sortable string representations and stored /// with different precisions (for a more detailed description of how the values are stored, see {@link /// NumericUtils}). A range is then divided recursively into multiple intervals for searching: /// The center of the range is searched only with the lowest possible precision in the trie, while the /// boundaries are matched more exactly. This reduces the number of terms dramatically. /// /// For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that uses a /// lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the lowest /// precision. Overall, a range could consist of a theoretical maximum of 7*255*2 + 255 = 3825 distinct /// terms (when there is a term for every distinct value of an 8-byte-number in the index and the range /// covers almost all of them; a maximum of 255 distinct values is used because it would always be possible /// to reduce the full 256 values to one term with degraded precision). In practice, we have seen up to /// 300 terms in most cases (index with 500,000 metadata records and a uniform value distribution). /// /// Precision Step: /// You can choose any precisionStep when encoding values. Lower step values mean more precisions and so /// more terms in index (and index gets larger). On the other hand, the maximum number of terms to match /// reduces, which optimized query speed. The formula to calculate the maximum term count is: ///
    /// n = [ (bitsPerValue/precisionStep - 1) * (2 ^ precisionStep - 1 ) * 2 ] + (2 ^ precisionStep - 1 )
    /// 
/// /// (this formula is only correct, when bitsPerValue/precisionStep is an integer; in other cases, the value /// must be rounded up and the last summand must contain the modulo of the division as precision step). /// For longs stored using a precision step of 4, n = 15*15*2 + 15 = 465, and for a precision step of 2, /// n = 31*3*2 + 3 = 189. But the faster search speed is reduced by more seeking in the term enum of the /// index. Because of this, the ideal precisionStep value can only be found out by testing. Important: You /// can index with a lower precision step value and test search speed using a multiple of the original step /// value. /// /// Good values for precisionStep are depending on usage and data type: ///
    ///
  • The default for all data types is 4, which is used, when no precisionStep is given. ///
  • Ideal value in most cases for 64 bit data types (long, double) is 6 or 8. ///
  • Ideal value in most cases for 32 bit data types (int) is 4. ///
  • For low cardinality fields larger precision steps are good. If the cardinality is < 100, it is /// fair to use {@link Integer#MAX_VALUE} (see below). ///
  • Steps >=64 for long/double and >=32 for int/float produces one token per value in the index and /// querying is as slow as a conventional {@link TermRangeQuery}. But it can be used to produce fields, /// that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as precisionStep). /// Using {@link NumericField NumericFields} for sorting is ideal, because building the field cache is much /// faster than with text-only numbers. These fields have one term per value and therefore also work with /// term enumeration for building distinct lists (eg. facets / preselected values to search for). /// Sorting is also possible with range query optimized fields using one of the above precisionSteps. ///
/// /// Comparisons of the different types of RangeQueries on an index with about 500,000 docs showed that /// {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count) took /// about 30-40 secs to complete, {@link TermRangeQuery} in constant score filter rewrite mode took 5 secs /// and executing this class took <100ms to complete (on an Opteron64 machine, 8 bit precision step). This /// query type was developed for a geographic portal, where the performance for eg. bounding boxes or exact /// date/time stamps is important. class LPPAPI NumericRangeQuery : public MultiTermQuery { public: NumericRangeQuery(const String& field, int32_t precisionStep, int32_t valSize, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); virtual ~NumericRangeQuery(); LUCENE_CLASS(NumericRangeQuery); INTERNAL: String field; int32_t precisionStep; int32_t valSize; NumericValue min; NumericValue max; bool minInclusive; bool maxInclusive; public: using MultiTermQuery::toString; /// Factory that creates a NumericRangeFilter, that filters a long range using the given precisionStep. static NumericRangeQueryPtr newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a long range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeQueryPtr newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int range using the given precisionStep. static NumericRangeQueryPtr newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeQueryPtr newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a double range using the given precisionStep. static NumericRangeQueryPtr newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a double range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeQueryPtr newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeQuery, that queries a int, long or double range using the given /// precisionStep. You can have half-open ranges (which are in fact <= or >= queries) by setting the min /// or max value to VariantUtils::null(). By setting inclusive to false it will match all documents /// excluding the bounds, with inclusive on the boundaries are hits, too. static NumericRangeQueryPtr newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeQuery, that queries a int, long or double range using the default /// precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). You can have half-open ranges (which /// are in fact <= or >= queries) by setting the min or max value to VariantUtils::null(). By setting /// inclusive to false it will match all documents excluding the bounds, with inclusive on the boundaries /// are hits, too. static NumericRangeQueryPtr newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); /// Returns the field name for this query String getField(); /// Returns true if the lower endpoint is inclusive bool includesMin(); /// Returns true if the upper endpoint is inclusive bool includesMax(); /// Returns the lower value of this range query NumericValue getMin(); /// Returns the upper value of this range query NumericValue getMax(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); protected: virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); friend class NumericRangeTermEnum; }; } #endif LucenePlusPlus-rel_3.0.4/include/NumericTokenStream.h000066400000000000000000000131701217574114600226700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICTOKENSTREAM_H #define NUMERICTOKENSTREAM_H #include "TokenStream.h" namespace Lucene { /// This class provides a {@link TokenStream} for indexing numeric values that can be used by {@link NumericRangeQuery} /// or {@link NumericRangeFilter}. /// /// Note that for simple usage, {@link NumericField} is recommended. {@link NumericField} disables norms and term freqs, /// as they are not usually needed during searching. If you need to change these settings, you should use this class. /// /// See {@link NumericField} for capabilities of fields indexed numerically. /// /// Here's an example usage, for an int field: /// /// FieldPtr field = newLucene(name, newLucene(precisionStep)->setIntValue(value)); /// field->setOmitNorms(true); /// field->setOmitTermFreqAndPositions(true); /// document->add(field); /// /// For optimal performance, re-use the TokenStream and Field instance for more than one document: /// /// NumericTokenStreamPtr stream = newLucene(precisionStep); /// FieldPtr field = newLucene(name, stream); /// field->setOmitNorms(true); /// field->setOmitTermFreqAndPositions(true); /// DocumentPtr document = newLucene(); /// document->add(field); /// /// for (all documents) /// { /// stream->setIntValue(value); /// writer->addDocument(document); /// } /// /// This stream is not intended to be used in analyzers; it's more for iterating the different precisions during /// indexing a specific numeric value. /// /// NOTE: as token streams are only consumed once the document is added to the index, if you index more than one /// numeric field, use a separate NumericTokenStream * instance for each. /// /// See {@link NumericRangeQuery} for more details on the precisionStep /// parameter as well as how numeric fields work under the hood. class LPPAPI NumericTokenStream : public TokenStream { public: /// Creates a token stream for numeric values using the default precisionStep {@link /// NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, before using set a /// value using the various setValue() methods. NumericTokenStream(); /// Creates a token stream for numeric values with the specified precisionStep. The stream is not yet /// initialized, before using set a value using the various setValue() methods. NumericTokenStream(int32_t precisionStep); /// Creates a token stream for numeric values with the specified precisionStep using the given {@link /// AttributeSource}. The stream is not yet initialized, before using set a value using the various /// setValue() methods. NumericTokenStream(AttributeSourcePtr source, int32_t precisionStep); /// Creates a token stream for numeric values with the specified precisionStep using the given {@link /// AttributeFactory}. The stream is not yet initialized, before using set a value using the various /// setValue() methods. NumericTokenStream(AttributeFactoryPtr factory, int32_t precisionStep); virtual ~NumericTokenStream(); LUCENE_CLASS(NumericTokenStream); protected: TermAttributePtr termAtt; TypeAttributePtr typeAtt; PositionIncrementAttributePtr posIncrAtt; int32_t shift; int32_t valSize; // valSize == 0 means not initialized int32_t precisionStep; int64_t value; public: /// The full precision token gets this token type assigned. static const String& TOKEN_TYPE_FULL_PREC(); /// The lower precision tokens gets this token type assigned. static const String& TOKEN_TYPE_LOWER_PREC(); /// Initializes the token stream with the supplied long value. /// @param value the value, for which this TokenStream should enumerate tokens. /// @return this instance, because of this you can use it the following way: /// newLucene(name, newLucene(precisionStep)->setLongValue(value)) NumericTokenStreamPtr setLongValue(int64_t value); /// Initializes the token stream with the supplied int value. /// @param value the value, for which this TokenStream should enumerate tokens. /// @return this instance, because of this you can use it the following way: /// newLucene(name, newLucene(precisionStep)->setIntValue(value)) NumericTokenStreamPtr setIntValue(int32_t value); /// Initializes the token stream with the supplied double value. /// @param value the value, for which this TokenStream should enumerate tokens. /// @return this instance, because of this you can use it the following way: /// newLucene(name, newLucene(precisionStep)->setDoubleValue(value)) NumericTokenStreamPtr setDoubleValue(double value); virtual void reset(); virtual bool incrementToken(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/NumericUtils.h000066400000000000000000000236051217574114600215400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICUTILS_H #define NUMERICUTILS_H #include "LuceneObject.h" namespace Lucene { /// This is a helper class to generate prefix-encoded representations for numerical values and supplies converters /// to represent double values as sortable integers/longs. /// /// To quickly execute range queries in Apache Lucene, a range is divided recursively into multiple intervals for /// searching: The center of the range is searched only with the lowest possible precision in the trie, while the /// boundaries are matched more exactly. This reduces the number of terms dramatically. /// /// This class generates terms to achieve this: First the numerical integer values need to be converted to strings. /// For that integer values (32 bit or 64 bit) are made unsigned and the bits are converted to ASCII chars with each /// 7 bit. The resulting string is sortable like the original integer value. Each value is also prefixed (in the /// first char) by the shift value (number of bits removed) used during encoding. /// /// To also index floating point numbers, this class supplies two methods to convert them to integer values by /// changing their bit layout: {@link #doubleToSortableLong}, {@link #doubleToSortableInt}. You will have no precision /// loss by converting floating point numbers to integers and back (only that the integer form is not usable). Other /// data types like dates can easily converted to longs or ints (eg. date to long). /// /// For easy usage, the trie algorithm is implemented for indexing inside {@link NumericTokenStream} that can index /// int, long, and double. For querying, {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query /// part for the same data types. /// /// This class can also be used, to generate lexicographically sortable (according {@link std::string#compare}) /// representations of numeric data types for other usages (eg. sorting). class LPPAPI NumericUtils : public LuceneObject { public: virtual ~NumericUtils(); LUCENE_CLASS(NumericUtils); public: /// The default precision step used by {@link NumericField}, {@link NumericTokenStream}, {@link NumericRangeQuery}, /// and {@link NumericRangeFilter} as default. static const int32_t PRECISION_STEP_DEFAULT; /// Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + /// shift in the first character. static const wchar_t SHIFT_START_LONG; /// The maximum term length (used for char[] buffer size) for encoding long values. /// @see #longToPrefixCoded(long,int,char[]) static const int32_t BUF_SIZE_LONG; /// Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + /// shift in the first character. static const wchar_t SHIFT_START_INT; /// The maximum term length (used for char[] buffer size) for encoding int values. /// @see #intToPrefixCoded(int,int,char[]) static const int32_t BUF_SIZE_INT; public: /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by /// {@link NumericTokenStream}. /// @param val the numeric value /// @param shift how many bits to strip from the right /// @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG} length /// @return number of chars written to buffer static int32_t longToPrefixCoded(int64_t val, int32_t shift, CharArray buffer); /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by /// {@link LongRangeBuilder}. /// @param val the numeric value /// @param shift how many bits to strip from the right static String longToPrefixCoded(int64_t val, int32_t shift); /// This is a convenience method, that returns prefix coded bits of a long without reducing the precision. /// It can be used to store the full precision value as a stored field in index. /// To decode, use {@link #prefixCodedToLong}. static String longToPrefixCoded(int64_t val); /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by {@link /// NumericTokenStream}. /// @param val the numeric value /// @param shift how many bits to strip from the right /// @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT} length /// @return number of chars written to buffer static int32_t intToPrefixCoded(int32_t val, int32_t shift, CharArray buffer); /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by {@link /// IntRangeBuilder}. /// @param val the numeric value /// @param shift how many bits to strip from the right static String intToPrefixCoded(int32_t val, int32_t shift); /// This is a convenience method, that returns prefix coded bits of an int without reducing the precision. /// It can be used to store the full precision value as a stored field in index. /// To decode, use {@link #prefixCodedToInt}. static String intToPrefixCoded(int32_t val); /// Returns a long from prefixCoded characters. Rightmost bits will be zero for lower precision codes. /// This method can be used to decode eg. a stored field. /// @see #longToPrefixCoded(int64_t) static int64_t prefixCodedToLong(const String& prefixCoded); /// Returns an int from prefixCoded characters. Rightmost bits will be zero for lower precision codes. /// This method can be used to decode eg. a stored field. /// @see #intToPrefixCoded(int32_t) static int32_t prefixCodedToInt(const String& prefixCoded); /// Converts a double value to a sortable signed long. The value is converted by getting their IEEE 754 /// floating-point "double format" bit layout and then some bits are swapped, to be able to compare the /// result as int64_t. By this the precision is not reduced, but the value can easily used as a int64_t. /// @see #sortableLongToDouble static int64_t doubleToSortableLong(double val); /// Convenience method: this just returns: longToPrefixCoded(doubleToSortableLong(val)) static String doubleToPrefixCoded(double val); /// Converts a sortable long back to a double. /// @see #doubleToSortableLong static double sortableLongToDouble(int64_t val); /// Convenience method: this just returns: sortableLongToDouble(prefixCodedToLong(val)) static double prefixCodedToDouble(const String& val); /// Splits a int64_t range recursively. You may implement a builder that adds clauses to a {@link BooleanQuery} /// for each call to its {@link LongRangeBuilder#addRange(String,String)} method. /// This method is used by {@link NumericRangeQuery}. static void splitLongRange(LongRangeBuilderPtr builder, int32_t precisionStep, int64_t minBound, int64_t maxBound); /// Splits an int32_t range recursively. You may implement a builder that adds clauses to a {@link BooleanQuery} /// for each call to its {@link IntRangeBuilder#addRange(String,String)} method. /// This method is used by {@link NumericRangeQuery}. static void splitIntRange(IntRangeBuilderPtr builder, int32_t precisionStep, int32_t minBound, int32_t maxBound); /// This helper does the splitting for both 32 and 64 bit. static void splitRange(LuceneObjectPtr builder, int32_t valSize, int32_t precisionStep, int64_t minBound, int64_t maxBound); /// Helper that delegates to correct range builder static void addRange(LuceneObjectPtr builder, int32_t valSize, int64_t minBound, int64_t maxBound, int32_t shift); }; /// Callback for {@link #splitLongRange}. You need to overwrite only one of the methods. /// NOTE: This is a very low-level interface, the method signatures may change in later versions. class LPPAPI LongRangeBuilder : public LuceneObject { public: virtual ~LongRangeBuilder(); public: /// Overwrite this method, if you like to receive the already prefix encoded range bounds. You can directly build /// classical (inclusive) range queries from them. virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); /// Overwrite this method, if you like to receive the raw long range bounds. You can use this for eg. debugging /// purposes (print out range bounds). virtual void addRange(int64_t min, int64_t max, int32_t shift); }; class LPPAPI IntRangeBuilder : public LuceneObject { public: virtual ~IntRangeBuilder(); public: /// Overwrite this method, if you like to receive the already prefix encoded range bounds. You can directly build /// classical range (inclusive) queries from them. virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); /// Overwrite this method, if you like to receive the raw int range bounds. You can use this for eg. debugging /// purposes (print out range bounds). virtual void addRange(int32_t min, int32_t max, int32_t shift); }; } #endif LucenePlusPlus-rel_3.0.4/include/OffsetAttribute.h000066400000000000000000000036271217574114600222310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef OFFSETATTRIBUTE_H #define OFFSETATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// The start and end character offset of a Token. class LPPAPI OffsetAttribute : public Attribute { public: OffsetAttribute(); virtual ~OffsetAttribute(); LUCENE_CLASS(OffsetAttribute); protected: int32_t _startOffset; int32_t _endOffset; public: virtual String toString(); /// Returns this Token's starting offset, the position of the first character corresponding to this token /// in the source text. /// /// Note that the difference between endOffset() and startOffset() may not be equal to termText.length(), /// as the term text may have been altered by a stemmer or some other filter. virtual int32_t startOffset(); /// Set the starting and ending offset. /// @see #startOffset() and #endOffset() virtual void setOffset(int32_t startOffset, int32_t endOffset); /// Returns this Token's ending offset, one greater than the position of the last character corresponding /// to this token in the source text. The length of the token in the source text is (endOffset - startOffset). virtual int32_t endOffset(); virtual void clear(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual void copyTo(AttributePtr target); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/OpenBitSet.h000066400000000000000000000224301217574114600211240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef OPENBITSET_H #define OPENBITSET_H #include "DocIdSet.h" namespace Lucene { /// An "open" BitSet implementation that allows direct access to the array of words storing the bits. /// /// The goals of OpenBitSet are the fastest implementation possible, and maximum code reuse. Extra /// safety and encapsulation may always be built on top, but if that's built in, the cost can never /// be removed (and hence people re-implement their own version in order to get better performance). class LPPAPI OpenBitSet : public DocIdSet { public: /// Constructs an OpenBitSet large enough to hold numBits. OpenBitSet(int64_t numBits = 64); /// Constructs an OpenBitSet from an existing LongArray. /// /// The first 64 bits are in long[0], with bit index 0 at the least significant bit, and bit /// index 63 at the most significant. Given a bit index, the word containing it is long[index/64], /// and it is at bit number index%64 within that word. /// /// numWords are the number of elements in the array that contain set bits (non-zero longs). /// numWords should be <= bits.length(), and any existing words in the array at position >= /// numWords should be zero. OpenBitSet(LongArray bits, int32_t numWords); virtual ~OpenBitSet(); LUCENE_CLASS(OpenBitSet); protected: LongArray bits; int32_t wlen; // number of words (elements) used in the array public: virtual DocIdSetIteratorPtr iterator(); /// This DocIdSet implementation is cacheable. virtual bool isCacheable(); /// Returns the current capacity in bits (1 greater than the index of the last bit) int64_t capacity(); /// Returns the current capacity of this set. Included for compatibility. This is *not* /// equal to {@link #cardinality} int64_t size(); /// Returns true if there are no set bits bool isEmpty(); /// Returns the long[] storing the bits LongArray getBits(); /// Sets a new long[] to use as the bit storage void setBits(LongArray bits); /// Gets the number of longs in the array that are in use int32_t getNumWords(); /// Sets the number of longs in the array that are in use void setNumWords(int32_t numWords); /// Returns true or false for the specified bit index. bool get(int32_t index); /// Returns true or false for the specified bit index. /// The index should be less than the OpenBitSet size bool fastGet(int32_t index); /// Returns true or false for the specified bit index bool get(int64_t index); /// Returns true or false for the specified bit index. /// The index should be less than the OpenBitSet size. bool fastGet(int64_t index); /// Returns 1 if the bit is set, 0 if not. /// The index should be less than the OpenBitSet size int32_t getBit(int32_t index); /// Sets a bit, expanding the set size if necessary void set(int64_t index); /// Sets the bit at the specified index. /// The index should be less than the OpenBitSet size. void fastSet(int32_t index); /// Sets the bit at the specified index. /// The index should be less than the OpenBitSet size. void fastSet(int64_t index); /// Sets a range of bits, expanding the set size if necessary /// @param startIndex lower index /// @param endIndex one-past the last bit to set void set(int64_t startIndex, int64_t endIndex); /// Clears a bit. /// The index should be less than the OpenBitSet size. void fastClear(int32_t index); /// Clears a bit. /// The index should be less than the OpenBitSet size. void fastClear(int64_t index); /// Clears a bit, allowing access beyond the current set size without changing the size. void clear(int64_t index); /// Clears a range of bits. Clearing past the end does not change the size of the set. /// @param startIndex lower index /// @param endIndex one-past the last bit to clear void clear(int32_t startIndex, int32_t endIndex); /// Clears a range of bits. Clearing past the end does not change the size of the set. /// @param startIndex lower index /// @param endIndex one-past the last bit to clear void clear(int64_t startIndex, int64_t endIndex); /// Sets a bit and returns the previous value. /// The index should be less than the OpenBitSet size. bool getAndSet(int32_t index); /// Sets a bit and returns the previous value. /// The index should be less than the OpenBitSet size. bool getAndSet(int64_t index); /// Flips a bit. /// The index should be less than the OpenBitSet size. void fastFlip(int32_t index); /// Flips a bit. /// The index should be less than the OpenBitSet size. void fastFlip(int64_t index); /// Flips a bit, expanding the set size if necessary void flip(int64_t index); /// Flips a bit and returns the resulting bit value. /// The index should be less than the OpenBitSet size. bool flipAndGet(int32_t index); /// Flips a bit and returns the resulting bit value. /// The index should be less than the OpenBitSet size. bool flipAndGet(int64_t index); /// Flips a range of bits, expanding the set size if necessary /// @param startIndex lower index /// @param endIndex one-past the last bit to flip void flip(int64_t startIndex, int64_t endIndex); /// @return the number of set bits int64_t cardinality(); /// Returns the popcount or cardinality of the intersection of the two sets. /// Neither set is modified. static int64_t intersectionCount(OpenBitSetPtr a, OpenBitSetPtr b); /// Returns the popcount or cardinality of the union of the two sets. /// Neither set is modified. static int64_t unionCount(OpenBitSetPtr a, OpenBitSetPtr b); /// Returns the popcount or cardinality of "a and not b" or "intersection(a, not(b))". /// Neither set is modified. static int64_t andNotCount(OpenBitSetPtr a, OpenBitSetPtr b); /// Returns the popcount or cardinality of the exclusive-or of the two sets. /// Neither set is modified. static int64_t xorCount(OpenBitSetPtr a, OpenBitSetPtr b); /// Returns the index of the first set bit starting at the index specified. /// -1 is returned if there are no more set bits. int32_t nextSetBit(int32_t index); /// Returns the index of the first set bit starting at the index specified. /// -1 is returned if there are no more set bits. int64_t nextSetBit(int64_t index); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// this = this AND other void intersect(OpenBitSetPtr other); /// this = this OR other void _union(OpenBitSetPtr other); /// Remove all elements set in other. this = this AND_NOT other void remove(OpenBitSetPtr other); /// this = this XOR other void _xor(OpenBitSetPtr other); /// see {@link intersect} void _and(OpenBitSetPtr other); /// see {@link union} void _or(OpenBitSetPtr other); /// see {@link remove} void andNot(OpenBitSetPtr other); /// Returns true if the sets have any elements in common bool intersects(OpenBitSetPtr other); /// Expand the LongArray with the size given as a number of words (64 bit longs). /// getNumWords() is unchanged by this call. void ensureCapacityWords(int32_t numWords); /// Ensure that the LongArray is big enough to hold numBits, expanding it if necessary. /// getNumWords() is unchanged by this call. void ensureCapacity(int64_t numBits); /// Lowers numWords, the number of words in use, by checking for trailing zero words. void trimTrailingZeros(); /// Returns the number of 64 bit words it would take to hold numBits. static int32_t bits2words(int64_t numBits); /// Returns true if both sets have the same bits set virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); protected: int32_t expandingWordNum(int64_t index); }; } #endif LucenePlusPlus-rel_3.0.4/include/OpenBitSetDISI.h000066400000000000000000000044221217574114600215760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef OPENBITSETDISI_H #define OPENBITSETDISI_H #include "OpenBitSet.h" namespace Lucene { class LPPAPI OpenBitSetDISI : public OpenBitSet { public: /// Construct an OpenBitSetDISI with its bits set from the doc ids of the given DocIdSetIterator. /// Also give a maximum size one larger than the largest doc id for which a bit may ever be set on /// this OpenBitSetDISI. OpenBitSetDISI(DocIdSetIteratorPtr disi, int32_t maxSize); /// Construct an OpenBitSetDISI with no bits set, and a given maximum size one larger than the largest /// doc id for which a bit may ever be set on this OpenBitSetDISI. OpenBitSetDISI(int32_t maxSize); virtual ~OpenBitSetDISI(); LUCENE_CLASS(OpenBitSetDISI); public: /// Perform an in-place OR with the doc ids from a given DocIdSetIterator, setting the bit for each /// such doc id. These doc ids should be smaller than the maximum size passed to the constructor. void inPlaceOr(DocIdSetIteratorPtr disi); /// Perform an in-place AND with the doc ids from a given DocIdSetIterator, leaving only the bits set /// for which the doc ids are in common. These doc ids should be smaller than the maximum size passed /// to the constructor. void inPlaceAnd(DocIdSetIteratorPtr disi); /// Perform an in-place NOT with the doc ids from a given DocIdSetIterator, clearing all the bits for /// each such doc id. These doc ids should be smaller than the maximum size passed to the constructor. void inPlaceNot(DocIdSetIteratorPtr disi); /// Perform an inplace XOR with the doc ids from a given DocIdSetIterator, flipping all the bits for /// each such doc id. These doc ids should be smaller than the maximum size passed to the constructor. void inPlaceXor(DocIdSetIteratorPtr disi); }; } #endif LucenePlusPlus-rel_3.0.4/include/OpenBitSetIterator.h000066400000000000000000000033141217574114600226360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef OPENBITSETITERATOR_H #define OPENBITSETITERATOR_H #include "DocIdSetIterator.h" namespace Lucene { /// An iterator to iterate over set bits in an OpenBitSet. /// This is faster than nextSetBit() for iterating over the complete set of bits, /// especially when the density of the bits set is high. class LPPAPI OpenBitSetIterator : public DocIdSetIterator { public: OpenBitSetIterator(OpenBitSetPtr bitSet); OpenBitSetIterator(LongArray bits, int32_t numWords); virtual ~OpenBitSetIterator(); LUCENE_CLASS(OpenBitSetIterator); protected: LongArray arr; int32_t words; int32_t i; int64_t word; int32_t wordShift; int32_t indexArray; int32_t curDocId; /// The General Idea: instead of having an array per byte that has the offsets of the /// next set bit, that array could be packed inside a 32 bit integer (8 4 bit numbers). /// That should be faster than accessing an array for each index, and the total array /// size is kept smaller (256*sizeof(int32_t))=1K static const int32_t bitlist[]; public: virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); virtual int32_t docID(); protected: /// 64 bit shifts void shift(); }; } #endif LucenePlusPlus-rel_3.0.4/include/OrdFieldSource.h000066400000000000000000000042111217574114600217560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ORDFIELDSOURCE_H #define ORDFIELDSOURCE_H #include "ValueSource.h" namespace Lucene { /// Obtains the ordinal of the field value from the default Lucene {@link FieldCache} using getStringIndex(). /// /// The native lucene index order is used to assign an ordinal value for each field value. /// /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. /// Example: /// If there were only three field values: "apple","banana","pear" then ord("apple")=1, ord("banana")=2, /// ord("pear")=3 /// /// WARNING: ord() depends on the position in an index and can thus change when other documents are inserted /// or deleted, or if a MultiSearcher is used. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU /// per lookup but will not consume double the FieldCache RAM. class LPPAPI OrdFieldSource : public ValueSource { public: /// Constructor for a certain field. ///@param field field whose values order is used. OrdFieldSource(const String& field); virtual ~OrdFieldSource(); LUCENE_CLASS(OrdFieldSource); protected: String field; public: virtual String description(); virtual DocValuesPtr getValues(IndexReaderPtr reader); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ParallelMultiSearcher.h000066400000000000000000000034121217574114600233330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PARALLELMULTISEARCHER_H #define PARALLELMULTISEARCHER_H #include "MultiSearcher.h" namespace Lucene { /// Implements parallel search over a set of Searchables. /// /// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or /// {@link #search(QueryPtr, FilterPtr, int32_t)} methods. class LPPAPI ParallelMultiSearcher : public MultiSearcher { public: /// Creates a {@link Searchable} which searches searchables. ParallelMultiSearcher(Collection searchables); virtual ~ParallelMultiSearcher(); LUCENE_CLASS(ParallelMultiSearcher); public: /// Executes each {@link Searchable}'s docFreq() in its own thread and waits for each search to /// complete and merge the results back together. virtual int32_t docFreq(TermPtr term); /// A search implementation which executes each {@link Searchable} in its own thread and waits /// for each search to complete and merge the results back together. virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n); /// A search implementation allowing sorting which spans a new thread for each Searchable, waits /// for each search to complete and merges the results back together. virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort); }; } #endif LucenePlusPlus-rel_3.0.4/include/ParallelReader.h000066400000000000000000000206021217574114600217660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PARALLELREADER_H #define PARALLELREADER_H #include "IndexReader.h" namespace Lucene { /// An IndexReader which reads multiple, parallel indexes. Each index added must have the same number of /// documents, but typically each contains different fields. Each document contains the union of the fields /// of all documents with the same document number. When searching, matches for a query term are from the /// first index added that has the field. /// /// This is useful, eg., with collections that have large fields which change rarely and small fields that /// change more frequently. The smaller fields may be re-indexed in a new index and both indexes may be /// searched together. /// /// Warning: It is up to you to make sure all indexes are created and modified the same way. For example, /// if you add documents to one index, you need to add the same documents in the same order to the other /// indexes. Failure to do so will result in undefined behavior class LPPAPI ParallelReader : public IndexReader { public: /// Construct a ParallelReader. /// @param closeSubReaders indicates whether the subreaders should be closed when this ParallelReader /// is closed ParallelReader(bool closeSubReaders = true); virtual ~ParallelReader(); LUCENE_CLASS(ParallelReader); protected: Collection readers; Collection decrefOnClose; // remember which subreaders to decRef on close bool incRefReaders; MapStringIndexReader fieldToReader; MapIndexReaderSetString readerToFields; Collection storedFieldReaders; int32_t _maxDoc; int32_t _numDocs; bool _hasDeletions; public: /// Add an IndexReader. void add(IndexReaderPtr reader); /// Add an IndexReader whose stored fields will not be returned. This can accelerate search when stored /// fields are only needed from a subset of the IndexReaders. void add(IndexReaderPtr reader, bool ignoreStoredFields); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Tries to reopen the subreaders. /// /// If one or more subreaders could be re-opened (ie. subReader.reopen() returned a new instance != subReader), /// then a new ParallelReader instance is returned, otherwise this instance is returned. /// /// A re-opened instance might share one or more subreaders with the old instance. Index modification /// operations result in undefined behavior when performed before the old instance is closed. /// (see {@link IndexReader#reopen()}). /// /// If subreaders are shared, then the reference count of those readers is increased to ensure that the /// subreaders remain open until the last referring reader is closed. virtual IndexReaderPtr reopen(); /// Returns the number of documents in this index. virtual int32_t numDocs(); /// Returns one greater than the largest possible document number. This may be used to, eg., determine /// how big to allocate an array which will have an element for every document number in an index. virtual int32_t maxDoc(); /// Returns true if any documents have been deleted virtual bool hasDeletions(); /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n); /// Get the {@link Document} at the n'th position. virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); /// Return an array of term frequency vectors for the specified document. virtual Collection getTermFreqVectors(int32_t docNumber); /// Return a term frequency vector for the specified document and field. virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays /// of the {@link TermFreqVector}. virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); /// Map all the term vectors for all fields in a Document virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. virtual ByteArray norms(const String& field); /// Reads the byte-encoded normalization factor for the named field of every document. virtual void norms(const String& field, ByteArray norms, int32_t offset); /// Returns an enumeration of all the terms in the index. The enumeration is ordered by /// Term::compareTo(). Each term is greater than all that precede it in the enumeration. /// Note that after calling terms(), {@link TermEnum#next()} must be called on the resulting /// enumeration before calling other methods such as {@link TermEnum#term()}. virtual TermEnumPtr terms(); /// Returns an enumeration of all terms starting at a given term. If the given term does not /// exist, the enumeration is positioned at the first term greater than the supplied term. /// The enumeration is ordered by Term::compareTo(). Each term is greater than all that precede /// it in the enumeration. virtual TermEnumPtr terms(TermPtr t); /// Returns the number of documents containing the term t. virtual int32_t docFreq(TermPtr t); /// Returns an enumeration of all the documents which contain term. For each document, the /// document number, the frequency of the term in that document is also provided, for use in /// search scoring. If term is null, then all non-deleted docs are returned with freq=1. /// The enumeration is ordered by document number. Each document number is greater than all /// that precede it in the enumeration. virtual TermDocsPtr termDocs(TermPtr term); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs(); /// Returns an enumeration of all the documents which contain term. virtual TermPositionsPtr termPositions(TermPtr term); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions(); /// Checks recursively if all subreaders are up to date. virtual bool isCurrent(); /// Checks recursively if all subindexes are optimized virtual bool isOptimized(); /// Not implemented. virtual int64_t getVersion(); Collection getSubReaders(); /// Get a list of unique field names that exist in this index and have the specified field option /// information. virtual HashSet getFieldNames(FieldOption fieldOption); protected: IndexReaderPtr doReopen(bool doClone); /// Implements deletion of the document numbered docNum. virtual void doDelete(int32_t docNum); /// Implements actual undeleteAll(). virtual void doUndeleteAll(); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); /// Implements commit. virtual void doCommit(MapStringString commitUserData); /// Implements close. virtual void doClose(); friend class ParallelTermEnum; friend class ParallelTermDocs; friend class ParallelTermPositions; }; } #endif LucenePlusPlus-rel_3.0.4/include/Payload.h000066400000000000000000000064761217574114600205150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOAD_H #define PAYLOAD_H #include "LuceneObject.h" namespace Lucene { /// A Payload is metadata that can be stored together with each occurrence of a term. This metadata is stored /// inline in the posting list of the specific term. /// /// To store payloads in the index a {@link TokenStream} has to be used that produces payload data. /// /// Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} to retrieve /// the payloads from the index. class LPPAPI Payload : public LuceneObject { public: /// Creates an empty payload and does not allocate a byte array. Payload(); /// Creates a new payload with the the given array as data. A reference to the passed-in array is held, /// ie. no copy is made. /// @param data the data of this payload Payload(ByteArray data); /// Creates a new payload with the the given array as data. A reference to the passed-in array is held, /// ie. no copy is made. /// @param data the data of this payload /// @param offset the offset in the data byte array /// @param length the length of the data Payload(ByteArray data, int32_t offset, int32_t length); virtual ~Payload(); LUCENE_CLASS(Payload); protected: /// the byte array containing the payload data ByteArray data; /// the offset within the byte array int32_t offset; /// the length of the payload data int32_t _length; public: /// Sets this payloads data. A reference to the passed-in array is held, ie. no copy is made. void setData(ByteArray data); /// Sets this payloads data. A reference to the passed-in array is held, ie. no copy is made. void setData(ByteArray data, int32_t offset, int32_t length); /// Returns a reference to the underlying byte array that holds this payloads data. ByteArray getData(); /// Returns the offset in the underlying byte array int32_t getOffset(); /// Returns the length of the payload data. int32_t length(); /// Returns the byte at the given index. uint8_t byteAt(int32_t index); /// Allocates a new byte array, copies the payload data into it and returns it. ByteArray toByteArray(); /// Copies the payload data to a byte array. /// @param target the target byte array /// @param targetOffset the offset in the target byte array void copyTo(ByteArray target, int32_t targetOffset); /// Clones this payload by creating a copy of the underlying byte array. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PayloadAttribute.h000066400000000000000000000026231217574114600223670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADATTRIBUTE_H #define PAYLOADATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// The start and end character offset of a Token. class LPPAPI PayloadAttribute : public Attribute { public: /// Initialize this attribute with no payload. PayloadAttribute(); /// Initialize this attribute with the given payload. PayloadAttribute(PayloadPtr payload); virtual ~PayloadAttribute(); LUCENE_CLASS(PayloadAttribute); protected: PayloadPtr payload; public: virtual String toString(); /// Returns this Token's payload. virtual PayloadPtr getPayload(); /// Sets this Token's payload. virtual void setPayload(PayloadPtr payload); virtual void clear(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual void copyTo(AttributePtr target); }; } #endif LucenePlusPlus-rel_3.0.4/include/PayloadFunction.h000066400000000000000000000044541217574114600222150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADFUNCTION_H #define PAYLOADFUNCTION_H #include "LuceneObject.h" namespace Lucene { /// An abstract class that defines a way for Payload*Query instances to transform the cumulative /// effects of payload scores for a document. /// /// @see PayloadTermQuery for more information class LPPAPI PayloadFunction : public LuceneObject { protected: PayloadFunction(); public: virtual ~PayloadFunction(); LUCENE_CLASS(PayloadFunction); public: /// Calculate the score up to this point for this doc and field /// @param docId The current doc /// @param field The field /// @param start The start position of the matching Span /// @param end The end position of the matching Span /// @param numPayloadsSeen The number of payloads seen so far /// @param currentScore The current score so far /// @param currentPayloadScore The score for the current payload /// @return The new current Score /// /// @see Spans virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) = 0; /// Calculate the final score for all the payloads seen so far for this doc/field /// @param docId The current doc /// @param field The current field /// @param numPayloadsSeen The total number of payloads seen on this document /// @param payloadScore The raw score for those payloads /// @return The final score for the payloads virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) = 0; /// Return hash code for this object. virtual int32_t hashCode() = 0; /// Return whether two objects are equal virtual bool equals(LuceneObjectPtr other) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/PayloadNearQuery.h000066400000000000000000000065551217574114600223470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADNEARQUERY_H #define PAYLOADNEARQUERY_H #include "SpanNearQuery.h" #include "SpanWeight.h" #include "SpanScorer.h" namespace Lucene { /// This class is very similar to {@link SpanNearQuery} except that it factors in the value of the payloads /// located at each of the positions where the {@link TermSpans} occurs. /// /// In order to take advantage of this, you must override {@link Similarity#scorePayload} which returns 1 /// by default. /// /// Payload scores are aggregated using a pluggable {@link PayloadFunction}. /// /// @see Similarity#scorePayload class LPPAPI PayloadNearQuery : public SpanNearQuery { public: PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder); PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder, PayloadFunctionPtr function); virtual ~PayloadNearQuery(); LUCENE_CLASS(PayloadNearQuery); protected: String fieldName; PayloadFunctionPtr function; public: using SpanNearQuery::toString; virtual WeightPtr createWeight(SearcherPtr searcher); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); friend class PayloadNearSpanWeight; friend class PayloadNearSpanScorer; }; class LPPAPI PayloadNearSpanWeight : public SpanWeight { public: PayloadNearSpanWeight(SpanQueryPtr query, SearcherPtr searcher); virtual ~PayloadNearSpanWeight(); LUCENE_CLASS(PayloadNearSpanWeight); public: virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); }; class LPPAPI PayloadNearSpanScorer : public SpanScorer { public: PayloadNearSpanScorer(SpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms); virtual ~PayloadNearSpanScorer(); LUCENE_CLASS(PayloadNearSpanScorer); public: SpansPtr spans; SimilarityPtr similarity; protected: double payloadScore; int32_t payloadsSeen; public: /// Get the payloads associated with all underlying subspans void getPayloads(Collection subSpans); virtual double score(); protected: /// By default, uses the {@link PayloadFunction} to score the payloads, but can be overridden to do /// other things. /// @param payLoads The payloads /// @param start The start position of the span being scored /// @param end The end position of the span being scored /// @see Spans void processPayloads(Collection payLoads, int32_t start, int32_t end); virtual bool setFreqCurrentDoc(); virtual ExplanationPtr explain(int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.4/include/PayloadSpanUtil.h000066400000000000000000000025041217574114600221610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADSPANUTIL_H #define PAYLOADSPANUTIL_H #include "LuceneObject.h" namespace Lucene { /// Experimental class to get set of payloads for most standard Lucene queries. Operates like Highlighter - /// IndexReader should only contain doc of interest, best to use MemoryIndex. class LPPAPI PayloadSpanUtil : public LuceneObject { public: /// @param reader That contains doc with payloads to extract PayloadSpanUtil(IndexReaderPtr reader); virtual ~PayloadSpanUtil(); LUCENE_CLASS(PayloadSpanUtil); protected: IndexReaderPtr reader; public: /// Query should be rewritten for wild/fuzzy support. /// @return payloads Collection Collection getPayloadsForQuery(QueryPtr query); protected: void queryToSpanQuery(QueryPtr query, Collection payloads); void getPayloads(Collection payloads, SpanQueryPtr query); }; } #endif LucenePlusPlus-rel_3.0.4/include/PayloadTermQuery.h000066400000000000000000000031501217574114600223550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADTERMQUERY_H #define PAYLOADTERMQUERY_H #include "SpanTermQuery.h" namespace Lucene { /// This class is very similar to {@link SpanTermQuery} except that it factors in the value of the payload /// located at each of the positions where the {@link Term} occurs. /// /// In order to take advantage of this, you must override {@link Similarity#scorePayload(int32_t, const String&, /// int32_t, int32_t, ByteArray, int32_t, int32_t)} which returns 1 by default. /// /// Payload scores are aggregated using a pluggable {@link PayloadFunction}. class LPPAPI PayloadTermQuery : public SpanTermQuery { public: PayloadTermQuery(TermPtr term, PayloadFunctionPtr function, bool includeSpanScore = true); virtual ~PayloadTermQuery(); LUCENE_CLASS(PayloadTermQuery); protected: PayloadFunctionPtr function; bool includeSpanScore; public: virtual WeightPtr createWeight(SearcherPtr searcher); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); friend class PayloadTermWeight; friend class PayloadTermSpanScorer; }; } #endif LucenePlusPlus-rel_3.0.4/include/PerFieldAnalyzerWrapper.h000066400000000000000000000060121217574114600236470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PERFIELDANALYZERWRAPPER_H #define PERFIELDANALYZERWRAPPER_H #include "Analyzer.h" namespace Lucene { /// This analyzer is used to facilitate scenarios where different fields require different analysis techniques. /// Use {@link #addAnalyzer} to add a non-default analyzer on a field name basis. /// /// Example usage: /// ///
    /// PerFieldAnalyzerWrapperPtr aWrapper = newLucene(newLucene());
    /// aWrapper->addAnalyzer(L"firstname", newLucene());
    /// aWrapper->addAnalyzer(L"lastname", newLucene());
    /// 
/// /// In this example, StandardAnalyzer will be used for all fields except "firstname" and "lastname", for which /// KeywordAnalyzer will be used. /// /// A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing and query parsing. class LPPAPI PerFieldAnalyzerWrapper : public Analyzer { public: /// Constructs with default analyzer. /// @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use the /// one provided here. PerFieldAnalyzerWrapper(AnalyzerPtr defaultAnalyzer); /// Constructs with default analyzer and a map of analyzers to use for specific fields. /// @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use the one provided here. /// @param fieldAnalyzers a Map (String field name to the Analyzer) to be used for those fields PerFieldAnalyzerWrapper(AnalyzerPtr defaultAnalyzer, MapStringAnalyzer fieldAnalyzers); virtual ~PerFieldAnalyzerWrapper(); LUCENE_CLASS(PerFieldAnalyzerWrapper); protected: AnalyzerPtr defaultAnalyzer; MapStringAnalyzer analyzerMap; public: /// Defines an analyzer to use for the specified field. /// @param fieldName field name requiring a non-default analyzer /// @param analyzer non-default analyzer to use for field void addAnalyzer(const String& fieldName, AnalyzerPtr analyzer); virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); /// Return the positionIncrementGap from the analyzer assigned to fieldName. virtual int32_t getPositionIncrementGap(const String& fieldName); /// Return the offsetGap from the analyzer assigned to field virtual int32_t getOffsetGap(FieldablePtr field); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PhrasePositions.h000066400000000000000000000024061217574114600222430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PHRASEPOSITIONS_H #define PHRASEPOSITIONS_H #include "LuceneObject.h" namespace Lucene { /// Position of a term in a document that takes into account the term offset within the phrase. class PhrasePositions : public LuceneObject { public: PhrasePositions(TermPositionsPtr t, int32_t o); virtual ~PhrasePositions(); LUCENE_CLASS(PhrasePositions); public: int32_t doc; // current doc int32_t position; // position in doc int32_t count; // remaining pos in this doc int32_t offset; // position in phrase TermPositionsPtr tp; // stream of positions PhrasePositionsPtr _next; // used to make lists bool repeats; // there's other pp for same term (eg. query="1st word 2nd word"~1) public: bool next(); bool skipTo(int32_t target); void firstPosition(); bool nextPosition(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PhraseQuery.h000066400000000000000000000062611217574114600213640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PHRASEQUERY_H #define PHRASEQUERY_H #include "Query.h" namespace Lucene { /// A Query that matches documents containing a particular sequence of terms. A PhraseQuery is built by /// QueryParser for input like "new york". /// /// This query may be combined with other terms or queries with a {@link BooleanQuery}. class LPPAPI PhraseQuery : public Query { public: /// Constructs an empty phrase query. PhraseQuery(); virtual ~PhraseQuery(); LUCENE_CLASS(PhraseQuery); protected: String field; Collection terms; Collection positions; int32_t maxPosition; int32_t slop; public: using Query::toString; /// Sets the number of other words permitted between words in query phrase. If zero, then this is an /// exact phrase search. For larger values this works like a WITHIN or NEAR operator. /// /// The slop is in fact an edit-distance, where the units correspond to moves of terms in the query phrase /// out of position. For example, to switch the order of two words requires two moves (the first move /// places the words atop one another), so to permit re-orderings of phrases, the slop must be at least two. /// /// More exact matches are scored higher than sloppier matches, thus search results are sorted by exactness. /// /// The slop is zero by default, requiring exact matches. void setSlop(int32_t slop); /// Returns the slop. /// @see #setSlop() int32_t getSlop(); /// Adds a term to the end of the query phrase. /// The relative position of the term is the one immediately after the last term added. void add(TermPtr term); /// Adds a term to the end of the query phrase. /// The relative position of the term within the phrase is specified explicitly. This allows eg. phrases /// with more than one term at the same position or phrases with gaps (eg. in connection with stopwords). void add(TermPtr term, int32_t position); /// Returns the set of terms in this phrase. Collection getTerms(); /// Returns the relative positions of terms in this phrase. Collection getPositions(); virtual WeightPtr createWeight(SearcherPtr searcher); virtual void extractTerms(SetTerm terms); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); friend class PhraseWeight; }; } #endif LucenePlusPlus-rel_3.0.4/include/PhraseQueue.h000066400000000000000000000013641217574114600213420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PHRASEQUEUE_H #define PHRASEQUEUE_H #include "PriorityQueue.h" namespace Lucene { class PhraseQueue : public PriorityQueue { public: PhraseQueue(int32_t size); virtual ~PhraseQueue(); LUCENE_CLASS(PhraseQueue); protected: virtual bool lessThan(const PhrasePositionsPtr& first, const PhrasePositionsPtr& second); }; } #endif LucenePlusPlus-rel_3.0.4/include/PhraseScorer.h000066400000000000000000000051451217574114600215140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PHRASESCORER_H #define PHRASESCORER_H #include "Scorer.h" namespace Lucene { /// Scoring functionality for phrase queries. A document is considered matching if it contains the /// phrase-query terms at "valid" positions. What "valid positions" are depends on the type of the /// phrase query: for an exact phrase query terms are required to appear in adjacent locations, while /// for a sloppy phrase query some distance between the terms is allowed. The abstract method {@link /// #phraseFreq()} of extending classes is invoked for each document containing all the phrase query /// terms, in order to compute the frequency of the phrase query in that document. A non zero frequency /// means a match. class PhraseScorer : public Scorer { public: PhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, ByteArray norms); virtual ~PhraseScorer(); LUCENE_CLASS(PhraseScorer); protected: WeightPtr weight; ByteArray norms; double value; bool firstTime; bool more; PhraseQueuePtr pq; PhrasePositionsPtr first; PhrasePositionsPtr last; double freq; // phrase frequency in current doc as computed by phraseFreq(). public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); virtual int32_t advance(int32_t target); /// Phrase frequency in current doc as computed by phraseFreq(). double currentFreq(); virtual String toString(); protected: /// Next without initial increment bool doNext(); /// For a document containing all the phrase query terms, compute the frequency of the phrase in /// that document. A non zero frequency means a match. /// Note, that containing all phrase terms does not guarantee a match - they have to be found in /// matching locations. /// @return frequency of the phrase in current doc, 0 if not found. virtual double phraseFreq() = 0; void init(); void sort(); void pqToList(); void firstToLast(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PorterStemFilter.h000066400000000000000000000032051217574114600223610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PORTERSTEMFILTER_H #define PORTERSTEMFILTER_H #include "TokenFilter.h" namespace Lucene { /// Transforms the token stream as per the Porter stemming algorithm. Note: the input to the stemming filter must /// already be in lower case, so you will need to use LowerCaseFilter or LowerCaseTokenizer further down the Tokenizer /// chain in order for this to work properly. /// /// To use this filter with other analyzers, you'll want to write an Analyzer class that sets up the TokenStream chain /// as you want it. To use this with LowerCaseTokenizer, for example, you'd write an analyzer like this: /// ///
    /// class MyAnalyzer : public Analyzer
    /// {
    /// public:
    ///     virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader)
    ///     {
    ///         return newLucene(newLucene(reader));
    ///     }
    /// };
    /// 
class LPPAPI PorterStemFilter : public TokenFilter { public: PorterStemFilter(TokenStreamPtr input); virtual ~PorterStemFilter(); LUCENE_CLASS(PorterStemFilter); protected: PorterStemmerPtr stemmer; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PorterStemmer.h000066400000000000000000000113071217574114600217210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PORTERSTEMMER_H #define PORTERSTEMMER_H #include "LuceneObject.h" namespace Lucene { /// This is the Porter stemming algorithm, coded up as thread-safe ANSI C by the author. /// /// It may be be regarded as canonical, in that it follows the algorithm presented in Porter, 1980, An algorithm /// for suffix stripping, Program, Vol. 14, no. 3, pp 130-137, only differing from it at the points marked DEPARTURE. /// /// See also http://www.tartarus.org/~martin/PorterStemmer /// /// The algorithm as described in the paper could be exactly replicated by adjusting the points of DEPARTURE, but /// this is barely necessary, because (a) the points of DEPARTURE are definitely improvements, and (b) no encoding /// of the Porter stemmer I have seen is anything like as exact as this version, even with the points of DEPARTURE! /// /// Release 2 (the more old-fashioned, non-thread-safe version may be regarded as release 1.) class PorterStemmer : public LuceneObject { public: PorterStemmer(); virtual ~PorterStemmer(); LUCENE_CLASS(PorterStemmer); protected: wchar_t* b; // buffer for word to be stemmed int32_t k; // offset to the end of the string int32_t j; // a general offset into the string int32_t i; // initial length of word bool dirty; public: bool stem(CharArray word); /// In stem(b, k), b is a char pointer, and the string to be stemmed is from b[0] to b[k] inclusive. /// Possibly b[k+1] == '\0', but it is not important. The stemmer adjusts the characters b[0] ... b[k] and /// stores the new end-point of the string, k'. Stemming never increases word length, so 0 <= k' <= k. bool stem(wchar_t* b, int32_t k); wchar_t* getResultBuffer(); int32_t getResultLength(); protected: /// Returns true if b[i] is a consonant. ('b' means 'z->b', but here and below we drop 'z->' in comments. bool cons(int32_t i); /// Measures the number of consonant sequences between 0 and j. If c is a consonant sequence and v a vowel /// sequence, and <..> indicates arbitrary presence, /// /// gives 0 /// vc gives 1 /// vcvc gives 2 /// vcvcvc gives 3 /// ... int32_t m(); /// Return true if 0,...j contains a vowel bool vowelinstem(); /// Return true if j,(j-1) contain a double consonant. bool doublec(int32_t j); /// Return true if i-2,i-1,i has the form consonant - vowel - consonant and also if the second c is not w,x or y. /// This is used when trying to restore an e at the end of a short word. /// /// eg. cav(e), lov(e), hop(e), crim(e), but /// snow, box, tray. bool cvc(int32_t i); /// Returns true if 0,...k ends with the string s. bool ends(const wchar_t* s); /// Sets (j+1),...k to the characters in the string s, readjusting k. void setto(const wchar_t* s); void r(const wchar_t* s); /// step1ab() gets rid of plurals and -ed or -ing. eg. /// /// caresses -> caress /// ponies -> poni /// ties -> ti /// caress -> caress /// cats -> cat /// /// feed -> feed /// agreed -> agree /// disabled -> disable /// /// matting -> mat /// mating -> mate /// meeting -> meet /// milling -> mill /// messing -> mess /// /// meetings -> meet void step1ab(); /// Turns terminal y to i when there is another vowel in the stem. void step1c(); /// Maps double suffices to single ones. so -ization ( = -ize plus -ation) maps to -ize etc. note that the /// string before the suffix must give m() > 0. void step2(); /// Deals with -ic-, -full, -ness etc. similar strategy to step2. void step3(); /// Takes off -ant, -ence etc., in context vcvc. void step4(); /// Removes a final -e if m() > 1, and changes -ll to -l if m() > 1. void step5(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PositionBasedTermVectorMapper.h000066400000000000000000000057441217574114600250440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef POSITIONBASEDTERMVECTORMAPPER_H #define POSITIONBASEDTERMVECTORMAPPER_H #include "TermVectorMapper.h" namespace Lucene { class LPPAPI PositionBasedTermVectorMapper : public TermVectorMapper { public: PositionBasedTermVectorMapper(bool ignoringOffsets = false); virtual ~PositionBasedTermVectorMapper(); LUCENE_CLASS(PositionBasedTermVectorMapper); protected: MapStringMapIntTermVectorsPositionInfo fieldToTerms; String currentField; /// A Map of Integer and TermVectorsPositionInfo MapIntTermVectorsPositionInfo currentPositions; bool storeOffsets; public: /// Never ignores positions. This mapper doesn't make much sense unless there are positions. /// @return false virtual bool isIgnoringPositions(); /// Callback for the TermVectorReader. virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); /// Callback mechanism used by the TermVectorReader. virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); /// Get the mapping between fields and terms, sorted by the comparator /// @return A map between field names and a Map. The sub-Map key is the position as the integer, the value is /// {@link PositionBasedTermVectorMapper}. MapStringMapIntTermVectorsPositionInfo getFieldToTerms(); }; /// Container for a term at a position class LPPAPI TermVectorsPositionInfo : public LuceneObject { public: TermVectorsPositionInfo(int32_t position, bool storeOffsets); virtual ~TermVectorsPositionInfo(); LUCENE_CLASS(TermVectorsPositionInfo); protected: int32_t position; Collection terms; Collection offsets; public: void addTerm(const String& term, TermVectorOffsetInfoPtr info); /// @return The position of the term int32_t getPosition(); /// Note, there may be multiple terms at the same position /// @return A List of Strings Collection getTerms(); /// Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple /// entries since there may be multiple terms at a position. /// @return A List of TermVectorOffsetInfo objects, if offsets are stored. Collection getOffsets(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PositionIncrementAttribute.h000066400000000000000000000050441217574114600244470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef POSITIONINCREMENTATTRIBUTE_H #define POSITIONINCREMENTATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// The positionIncrement determines the position of this token relative to the previous Token in a /// TokenStream, used in phrase searching. /// /// The default value is one. /// /// Some common uses for this are: /// /// Set it to zero to put multiple terms in the same position. This is useful if, eg., a word has multiple /// stems. Searches for phrases including either stem will match. In this case, all but the first stem's /// increment should be set to zero: the increment of the first instance should be one. Repeating a token /// with an increment of zero can also be used to boost the scores of matches on that token. /// /// Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want /// phrases to match across removed stop words, then one could build a stop word filter that removes stop /// words and also sets the increment to the number of stop words removed before each non-stop word. Then /// exact phrase queries will only match when the terms occur with no intervening stop words. /// /// @see TermPositions class LPPAPI PositionIncrementAttribute : public Attribute { public: PositionIncrementAttribute(); virtual ~PositionIncrementAttribute(); LUCENE_CLASS(PositionIncrementAttribute); protected: int32_t positionIncrement; public: virtual String toString(); /// Set the position increment. The default value is one. /// @param positionIncrement the distance from the prior term virtual void setPositionIncrement(int32_t positionIncrement); /// Returns the position increment of this Token. /// @see #setPositionIncrement virtual int32_t getPositionIncrement(); virtual void clear(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual void copyTo(AttributePtr target); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/PositiveScoresOnlyCollector.h000066400000000000000000000022301217574114600245760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef POSITIVESCORESONLYCOLLECTOR_H #define POSITIVESCORESONLYCOLLECTOR_H #include "Collector.h" namespace Lucene { /// A {@link Collector} implementation which wraps another {@link Collector} and makes sure only /// documents with scores > 0 are collected. class LPPAPI PositiveScoresOnlyCollector : public Collector { public: PositiveScoresOnlyCollector(CollectorPtr c); virtual ~PositiveScoresOnlyCollector(); LUCENE_CLASS(PositiveScoresOnlyCollector); protected: CollectorPtr collector; ScorerPtr scorer; public: virtual void collect(int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setScorer(ScorerPtr scorer); virtual bool acceptsDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PrefixFilter.h000066400000000000000000000015261217574114600215160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PREFIXFILTER_H #define PREFIXFILTER_H #include "MultiTermQueryWrapperFilter.h" namespace Lucene { /// A Filter that restricts search results to values that have a matching prefix in a given field. class LPPAPI PrefixFilter : public MultiTermQueryWrapperFilter { public: PrefixFilter(TermPtr prefix); virtual ~PrefixFilter(); LUCENE_CLASS(PrefixFilter); public: TermPtr getPrefix(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PrefixQuery.h000066400000000000000000000027761217574114600214060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PREFIXQUERY_H #define PREFIXQUERY_H #include "MultiTermQuery.h" namespace Lucene { /// A Query that matches documents containing terms with a specified prefix. A PrefixQuery is built by /// QueryParser for input like app*. /// /// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. class LPPAPI PrefixQuery : public MultiTermQuery { public: /// Constructs a query for terms starting with prefix. PrefixQuery(TermPtr prefix); virtual ~PrefixQuery(); LUCENE_CLASS(PrefixQuery); protected: TermPtr prefix; public: using MultiTermQuery::toString; /// Returns the prefix of this query. TermPtr getPrefix(); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); protected: virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/PrefixTermEnum.h000066400000000000000000000022531217574114600220230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PREFIXTERMENUM_H #define PREFIXTERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that match the specified prefix filter term. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than /// all that precede it. class LPPAPI PrefixTermEnum : public FilteredTermEnum { public: PrefixTermEnum(IndexReaderPtr reader, TermPtr prefix); virtual ~PrefixTermEnum(); LUCENE_CLASS(PrefixTermEnum); protected: TermPtr prefix; bool _endEnum; public: virtual double difference(); protected: virtual bool endEnum(); virtual bool termCompare(TermPtr term); TermPtr getPrefixTerm(); }; } #endif LucenePlusPlus-rel_3.0.4/include/PriorityQueue.h000066400000000000000000000167371217574114600217530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PRIORITYQUEUE_H #define PRIORITYQUEUE_H #include "LuceneObject.h" #include "MiscUtils.h" namespace Lucene { /// A PriorityQueue maintains a partial ordering of its elements such that the least element can always /// be found in constant time. Put()'s and pop()'s require log(size) time. /// /// NOTE: This class pre-allocates a full array of length maxSize + 1. template class PriorityQueue : public LuceneObject { public: typedef typename std::vector< TYPE, LuceneAllocator > heap_type; PriorityQueue(int32_t maxSize) { this->_size = 0; this->_maxSize = maxSize; } virtual ~PriorityQueue() { } protected: heap_type heap; int32_t _size; int32_t _maxSize; public: virtual void initialize() { bool empty = heap.empty(); if (empty) { int32_t heapSize = 0; if (_maxSize == 0) { // We allocate 1 extra to avoid if statement in top() heapSize = 2; } else if (_maxSize == INT_MAX) { // Don't wrap heapSize to -1, in this case, which causes a confusing NegativeArraySizeException. // Note that very likely this will simply then hit an OOME, but at least that's more indicative // to caller that this values is too big. We don't +1 in this case, but it's very unlikely in // practice one will actually insert this many objects into the PQ heapSize = INT_MAX; } else { // NOTE: we add +1 because all access to heap is 1-based not 0-based. heap[0] is unused. heapSize = _maxSize + 1; } this->heap.resize(heapSize); } // If sentinel objects are supported, populate the queue with them TYPE sentinel = getSentinelObject(); if (empty && sentinel) { heap[1] = sentinel; for (int32_t i = 2; i < (int32_t)heap.size(); ++i) heap[i] = getSentinelObject(); _size = _maxSize; } } /// Return maximum size of queue int32_t maxSize() { return _maxSize; } /// Adds an Object to a PriorityQueue in log(size) time. If one tries to add more objects /// than maxSize from initialize an {@link IndexOutOfBoundsException} is thrown. TYPE add(const TYPE& type) { ++_size; if (_size < 0 || _size >= (int32_t)heap.size()) boost::throw_exception(IndexOutOfBoundsException()); heap[_size] = type; upHeap(); return heap[1]; } /// Adds an Object to a PriorityQueue in log(size) time. It returns the object (if any) that was /// dropped off the heap because it was full. This can be the given parameter (in case it is /// smaller than the full heap's minimum, and couldn't be added), or another object that was /// previously the smallest value in the heap and now has been replaced by a larger one, or null /// if the queue wasn't yet full with maxSize elements. TYPE addOverflow(const TYPE& type) { if (_size < _maxSize) { add(type); return TYPE(); } else if (_size > 0 && !lessThan(type, heap[1])) { TYPE result = heap[1]; heap[1] = type; updateTop(); return result; } else return type; } /// Returns the least element of the PriorityQueue. TYPE top() { // We don't need to check size here: if maxSize is 0, then heap is length 2 array with both // entries null. If size is 0 then heap[1] is already null. return heap[1]; } /// Removes and returns the least element of the PriorityQueue. TYPE pop() { if (_size > 0) { TYPE result = heap[1]; // save first value heap[1] = heap[_size]; // move last to first heap[_size--] = TYPE(); downHeap(); // adjust heap return result; } else return TYPE(); } /// Should be called when the Object at top changes values. TYPE updateTop() { downHeap(); return heap[1]; } /// Returns the number of elements currently stored in the PriorityQueue. int32_t size() const { return _size; } /// Returns whether PriorityQueue is currently empty. bool empty() const { return (_size == 0); } /// Removes all entries from the PriorityQueue. void clear() { for (int32_t i = 0; i <= _size; ++i) heap[i] = TYPE(); _size = 0; } protected: void upHeap() { int32_t i = _size; TYPE node = heap[i]; // save bottom node int32_t j = MiscUtils::unsignedShift(i, 1); while (j > 0 && lessThan(node, heap[j])) { heap[i] = heap[j]; // shift parents down i = j; j = MiscUtils::unsignedShift(j, 1); } heap[i] = node; // install saved node } void downHeap() { int32_t i = 1; TYPE node = heap[i]; // save top node int32_t j = i << 1; // find smaller child int32_t k = j + 1; if (k <= _size && lessThan(heap[k], heap[j])) j = k; while (j <= _size && lessThan(heap[j], node)) { heap[i] = heap[j]; // shift up child i = j; j = i << 1; k = j + 1; if (k <= _size && lessThan(heap[k], heap[j])) j = k; } heap[i] = node; // install saved node } /// Determines the ordering of objects in this priority queue. Subclasses must define this one method. virtual bool lessThan(const TYPE& first, const TYPE& second) { return std::less()(first, second); } /// This method can be overridden by extending classes to return a sentinel object which will be used by /// {@link #initialize} to fill the queue, so that the code which uses that queue can always assume it's /// full and only change the top without attempting to insert any new object. /// /// Those sentinel values should always compare worse than any non-sentinel value (ie., {@link #lessThan} /// should always favour the non-sentinel values). virtual TYPE getSentinelObject() { return TYPE(); } }; } #endif LucenePlusPlus-rel_3.0.4/include/Query.h000066400000000000000000000114561217574114600202230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERY_H #define QUERY_H #include "LuceneObject.h" namespace Lucene { /// The abstract base class for queries. /// /// Instantiable subclasses are: /// /// {@link TermQuery} /// {@link MultiTermQuery} /// {@link BooleanQuery} /// {@link WildcardQuery} /// {@link PhraseQuery} /// {@link PrefixQuery} /// {@link MultiPhraseQuery} /// {@link FuzzyQuery} /// {@link TermRangeQuery} /// {@link NumericRangeQuery} /// {@link org.apache.lucene.search.spans.SpanQuery} /// /// A parser for queries is contained in: {@link QueryParser} class LPPAPI Query : public LuceneObject { public: Query(); virtual ~Query(); LUCENE_CLASS(Query); protected: double boost; // query boost factor public: /// Sets the boost for this query clause to b. Documents matching this clause will (in addition to /// the normal weightings) have their score multiplied by b. virtual void setBoost(double b); /// Gets the boost for this clause. Documents matching this clause will (in addition to the normal /// weightings) have their score multiplied by b. The boost is 1.0 by default. virtual double getBoost(); /// Prints a query to a string, with field assumed to be the default field and omitted. /// /// The representation used is one that is supposed to be readable by {@link QueryParser}. However, /// there are the following limitations: /// /// If the query was created by the parser, the printed representation may not be exactly what was /// parsed. For example, characters that need to be escaped will be represented without the required /// backslash. /// /// Some of the more complicated queries (eg. span queries) don't have a representation that can be /// parsed by QueryParser. virtual String toString(const String& field); /// Prints a query to a string. virtual String toString(); /// Constructs an appropriate Weight implementation for this query. /// Only implemented by primitive queries, which re-write to themselves. virtual WeightPtr createWeight(SearcherPtr searcher); /// Constructs and initializes a Weight for a top-level query. virtual WeightPtr weight(SearcherPtr searcher); /// Called to re-write queries into primitive queries. For example, a PrefixQuery will be rewritten /// into a BooleanQuery that consists of TermQuerys. virtual QueryPtr rewrite(IndexReaderPtr reader); /// Called when re-writing queries under MultiSearcher. /// /// Create a single query suitable for use by all subsearchers (in 1-1 correspondence with queries). /// This is an optimization of the OR of all queries. We handle the common optimization cases of equal /// queries and overlapping clauses of boolean OR queries (as generated by MultiTermQuery.rewrite()). /// Be careful overriding this method as queries[0] determines which method will be called and is not /// necessarily of the same type as the other queries. virtual QueryPtr combine(Collection queries); /// Adds all terms occurring in this query to the terms set. Only works if this query is in its /// {@link #rewrite rewritten} form. virtual void extractTerms(SetTerm terms); /// Merges the clauses of a set of BooleanQuery's into a single BooleanQuery. /// /// A utility for use by {@link #combine(Query[])} implementations. static QueryPtr mergeBooleanQueries(Collection queries); /// Returns the Similarity implementation to be used for this query. Subclasses may override this method /// to specify their own Similarity implementation, perhaps one that delegates through that of the Searcher. /// By default the Searcher's Similarity implementation is returned. virtual SimilarityPtr getSimilarity(SearcherPtr searcher); /// Returns a clone of this query. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); /// Return given boost value as a string. String boostString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/QueryParseError.h000066400000000000000000000046661217574114600222350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSEERROR_H #define QUERYPARSEERROR_H #include "LuceneObject.h" namespace Lucene { /// Utility class to handle query parse errors class QueryParseError : public LuceneObject { public: virtual ~QueryParseError(); LUCENE_CLASS(QueryParseError); public: /// Returns a detailed message for the Error when it is thrown by the token manager to indicate a /// lexical error. /// @param EOFSeen Indicates if EOF caused the lexical error /// @param curLexState Lexical state in which this error occurred /// @param errorLine Line number when the error occurred /// @param errorColumn Column number when the error occurred /// @param errorAfter Prefix that was seen before this error occurred /// @param curChar The offending character static String lexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, const String& errorAfter, wchar_t curChar); /// Generate a parse error message and returns it. /// @param currentToken This is the last token that has been consumed successfully. If this object /// has been created due to a parse error, the token following this token will (therefore) be the first /// error token. /// @param expectedTokenSequences Each entry in this array is an array of integers. Each array of /// integers represents a sequence of tokens (by their ordinal values) that is expected at this point /// of the parse. /// @param tokenImage This is a reference to the "tokenImage" array of the generated parser within /// which the parse error occurred. static String parseError(QueryParserTokenPtr currentToken, Collection< Collection > expectedTokenSequences, Collection tokenImage); protected: /// Replaces unprintable characters by their escaped (or unicode escaped) equivalents in the /// given string static String addEscapes(const String& str); }; } #endif LucenePlusPlus-rel_3.0.4/include/QueryParser.h000066400000000000000000000523651217574114600214040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSER_H #define QUERYPARSER_H #include "QueryParserConstants.h" #include "DateTools.h" #include "BooleanClause.h" namespace Lucene { typedef HashMap MapStringResolution; /// The most important method is {@link #parse(const String&)}. /// /// The syntax for query strings is as follows: /// A Query is a series of clauses. /// A clause may be prefixed by: ///
    ///
  • a plus (+) or a minus (-) sign, indicating that the clause is required or prohibited respectively; or ///
  • a term followed by a colon, indicating the field to be searched. This enables one to construct queries /// which search multiple fields. ///
/// /// A clause may be either: ///
    ///
  • a term, indicating all the documents that contain this term; or ///
  • a nested query, enclosed in parentheses. Note that this may be used with a +/- prefix to require any /// of a set of terms. ///
/// /// Thus, in BNF, the query grammar is: ///
    /// Query  ::= ( Clause )*
    /// Clause ::= ["+", "-"] [ ":"] (  | "(" Query ")" )
    /// 
/// /// Examples of appropriately formatted queries can be found in the query syntax documentation. /// /// In {@link TermRangeQuery}s, QueryParser tries to detect date values, eg. /// date:[6/1/2005 TO 6/4/2005] produces a range query that searches for "date" fields between /// 2005-06-01 and 2005-06-04. Note that the format of the accepted input depends on {@link #setLocale(Locale) /// the locale}. /// /// By default a date is converted into a search term using the deprecated {@link DateField} for compatibility /// reasons. To use the new {@link DateTools} to convert dates, a {@link Resolution} has to be set. /// /// The date resolution that shall be used for RangeQueries can be set using {@link #setDateResolution(Resolution)} /// or {@link #setDateResolution(const String&, Resolution)}. The former sets the default date resolution for /// all fields, whereas the latter can be used to set field specific date resolutions. Field specific date /// resolutions take, if set, precedence over the default date resolution. /// /// If you use neither {@link DateField} nor {@link DateTools} in your index, you can create your own query /// parser that inherits QueryParser and overwrites {@link #getRangeQuery(const String&, const String&, /// const String&, bool)} to use a different method for date conversion. /// /// Note that QueryParser is not thread-safe. /// /// NOTE: there is a new QueryParser in contrib, which matches the same syntax as this class, but is more modular, /// enabling substantial customization to how a query is created. /// /// NOTE: You must specify the required {@link Version} compatibility when creating QueryParser: ///
    ///
  • As of 2.9, {@link #setEnablePositionIncrements} is true by default. ///
class LPPAPI QueryParser : public QueryParserConstants, public LuceneObject { public: /// Constructs a query parser. /// @param matchVersion Lucene version to match. /// @param field The default field for query terms. /// @param analyzer Used to find terms in the query text. QueryParser(LuceneVersion::Version matchVersion, const String& field, AnalyzerPtr analyzer); /// Constructor with user supplied QueryParserCharStream. QueryParser(QueryParserCharStreamPtr stream); /// Constructor with generated Token Manager. QueryParser(QueryParserTokenManagerPtr tokenMgr); virtual ~QueryParser(); LUCENE_CLASS(QueryParser); /// The default operator for parsing queries. Use {@link QueryParser#setDefaultOperator} to change it. enum Operator { OR_OPERATOR, AND_OPERATOR }; protected: static const int32_t CONJ_NONE; static const int32_t CONJ_AND; static const int32_t CONJ_OR; static const int32_t MOD_NONE; static const int32_t MOD_NOT; static const int32_t MOD_REQ; /// The actual operator that parser uses to combine query terms Operator _operator; /// Next token. int32_t _jj_ntk; QueryParserTokenPtr jj_scanpos; QueryParserTokenPtr jj_lastpos; int32_t jj_la; int32_t jj_gen; Collection jj_la1; static const int32_t jj_la1_0[]; static const int32_t jj_la1_1[]; struct JJCalls; typedef boost::shared_ptr JJCallsPtr; struct JJCalls { JJCalls() { gen = 0; arg = 0; } int32_t gen; QueryParserTokenPtr first; int32_t arg; JJCallsPtr next; }; Collection jj_2_rtns; bool jj_rescan; int32_t jj_gc; Collection< Collection > jj_expentries; Collection jj_expentry; int32_t jj_kind; Collection jj_lasttokens; int32_t jj_endpos; public: bool lowercaseExpandedTerms; RewriteMethodPtr multiTermRewriteMethod; bool allowLeadingWildcard; bool enablePositionIncrements; AnalyzerPtr analyzer; String field; int32_t phraseSlop; double fuzzyMinSim; int32_t fuzzyPrefixLength; std::locale locale; // the default date resolution DateTools::Resolution dateResolution; // maps field names to date resolutions MapStringResolution fieldToDateResolution; // The collator to use when determining range inclusion, for use when constructing RangeQuerys CollatorPtr rangeCollator; /// Generated Token Manager. QueryParserTokenManagerPtr token_source; /// Current token. QueryParserTokenPtr token; /// Next token. QueryParserTokenPtr jj_nt; public: /// Parses a query string, returning a {@link Query}. /// @param query The query string to be parsed. QueryPtr parse(const String& query); /// @return Returns the analyzer. AnalyzerPtr getAnalyzer(); /// @return Returns the field. String getField(); /// Get the minimal similarity for fuzzy queries. double getFuzzyMinSim(); /// Set the minimum similarity for fuzzy queries. Default is 0.5. void setFuzzyMinSim(double fuzzyMinSim); /// Get the prefix length for fuzzy queries. /// @return Returns the fuzzyPrefixLength. int32_t getFuzzyPrefixLength(); /// Set the prefix length for fuzzy queries. Default is 0. /// @param fuzzyPrefixLength The fuzzyPrefixLength to set. void setFuzzyPrefixLength(int32_t fuzzyPrefixLength); /// Sets the default slop for phrases. If zero, then exact phrase matches are required. /// Default value is zero. void setPhraseSlop(int32_t phraseSlop); /// Gets the default slop for phrases. int32_t getPhraseSlop(); /// Set to true to allow leading wildcard characters. /// /// When set, * or ? are allowed as the first character of a PrefixQuery and WildcardQuery. /// Note that this can produce very slow queries on big indexes. Default: false. void setAllowLeadingWildcard(bool allowLeadingWildcard); /// @see #setAllowLeadingWildcard(bool) bool getAllowLeadingWildcard(); /// Set to true to enable position increments in result query. /// /// When set, result phrase and multi-phrase queries will be aware of position increments. /// Useful when eg. a StopFilter increases the position increment of the token that follows an /// omitted token. Default: false. void setEnablePositionIncrements(bool enable); /// @see #setEnablePositionIncrements(bool) bool getEnablePositionIncrements(); /// Sets the boolean operator of the QueryParser. In default mode (OR_OPERATOR) terms without /// any modifiers are considered optional: for example capital of Hungary is equal to capital /// OR of OR Hungary. /// In AND_OPERATOR mode terms are considered to be in conjunction: the above mentioned query is /// parsed as capital AND of AND Hungary void setDefaultOperator(Operator op); /// Gets implicit operator setting, which will be either AND_OPERATOR or OR_OPERATOR. Operator getDefaultOperator(); /// Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically lower-cased /// or not. Default is true. void setLowercaseExpandedTerms(bool lowercaseExpandedTerms); /// @see #setLowercaseExpandedTerms(bool) bool getLowercaseExpandedTerms(); /// By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} when /// creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable /// because it a) Runs faster b) Does not have the scarcity of terms unduly influence score c) avoids /// any "TooManyClauses" exception. However, if your application really needs to use the old- /// fashioned BooleanQuery expansion rewriting and the above points are not relevant then use this /// to change the rewrite method. void setMultiTermRewriteMethod(RewriteMethodPtr method); /// @see #setMultiTermRewriteMethod RewriteMethodPtr getMultiTermRewriteMethod(); /// Set locale used by date range parsing. void setLocale(std::locale locale); /// Returns current locale, allowing access by subclasses. std::locale getLocale(); /// Sets the default date resolution used by RangeQueries for fields for which no specific date /// resolutions has been set. Field specific resolutions can be set with {@link /// #setDateResolution(const String&, DateTools::Resolution)}. /// @param dateResolution The default date resolution to set void setDateResolution(DateTools::Resolution dateResolution); /// Sets the date resolution used by RangeQueries for a specific field. /// @param fieldName Field for which the date resolution is to be set /// @param dateResolution Date resolution to set void setDateResolution(const String& fieldName, DateTools::Resolution dateResolution); /// Returns the date resolution that is used by RangeQueries for the given field. Returns null, if /// no default or field specific date resolution has been set for the given field. DateTools::Resolution getDateResolution(const String& fieldName); /// Sets the collator used to determine index term inclusion in ranges for RangeQuerys. /// /// WARNING: Setting the rangeCollator to a non-null collator using this method will cause every /// single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending /// on the number of index Terms in this Field, the operation could be very slow. /// @param rc The collator to use when constructing RangeQuerys void setRangeCollator(CollatorPtr rc); /// @return the collator used to determine index term inclusion in ranges for RangeQuerys. CollatorPtr getRangeCollator(); /// Command line tool to test QueryParser, using {@link SimpleAnalyzer}. static int main(Collection args); /// Query ::= ( Clause )* /// Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) int32_t Conjunction(); int32_t Modifiers(); /// This makes sure that there is no garbage after the query string virtual QueryPtr TopLevelQuery(const String& field); virtual QueryPtr ParseQuery(const String& field); virtual QueryPtr ParseClause(const String& field); virtual QueryPtr ParseTerm(const String& field); /// Reinitialise. virtual void ReInit(QueryParserCharStreamPtr stream); /// Reinitialise. virtual void ReInit(QueryParserTokenManagerPtr tokenMgr); /// Get the next Token. virtual QueryParserTokenPtr getNextToken(); /// Get the specific Token. virtual QueryParserTokenPtr getToken(int32_t index); /// Generate QueryParserError exception. virtual void generateParseException(); /// Enable tracing. virtual void enable_tracing(); /// Disable tracing. virtual void disable_tracing(); protected: /// Construct query parser with supplied QueryParserCharStream or TokenManager void ConstructParser(QueryParserCharStreamPtr stream, QueryParserTokenManagerPtr tokenMgr); virtual void addClause(Collection clauses, int32_t conj, int32_t mods, QueryPtr q); /// Use the analyzer to get all the tokens, and then build a TermQuery, PhraseQuery, or nothing /// based on the term count. virtual QueryPtr getFieldQuery(const String& field, const String& queryText); /// Base implementation delegates to {@link #getFieldQuery(const String&, const String&)}. /// This method may be overridden, for example, to return a SpanNearQuery instead of a PhraseQuery. virtual QueryPtr getFieldQuery(const String& field, const String& queryText, int32_t slop); /// Builds a new TermRangeQuery instance for given min/max parts virtual QueryPtr getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); /// Builds a new BooleanQuery instance /// @param disableCoord disable coord BooleanQueryPtr newBooleanQuery(bool disableCoord); /// Builds a new BooleanClause instance /// @param q sub query /// @param occur how this clause should occur when matching documents /// @return new BooleanClause instance BooleanClausePtr newBooleanClause(QueryPtr q, BooleanClause::Occur occur); /// Builds a new TermQuery instance /// @param term term /// @return new TermQuery instance QueryPtr newTermQuery(TermPtr term); /// Builds a new PhraseQuery instance /// @return new PhraseQuery instance PhraseQueryPtr newPhraseQuery(); /// Builds a new MultiPhraseQuery instance /// @return new MultiPhraseQuery instance MultiPhraseQueryPtr newMultiPhraseQuery(); /// Builds a new PrefixQuery instance /// @param prefix Prefix term /// @return new PrefixQuery instance QueryPtr newPrefixQuery(TermPtr prefix); /// Builds a new FuzzyQuery instance /// @param term Term /// @param minimumSimilarity minimum similarity /// @param prefixLength prefix length /// @return new FuzzyQuery Instance QueryPtr newFuzzyQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength); /// Builds a new TermRangeQuery instance /// @param field Field /// @param part1 min /// @param part2 max /// @param inclusive true if range is inclusive /// @return new TermRangeQuery instance QueryPtr newRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); /// Builds a new MatchAllDocsQuery instance /// @return new MatchAllDocsQuery instance QueryPtr newMatchAllDocsQuery(); /// Builds a new WildcardQuery instance /// @param t wildcard term /// @return new WildcardQuery instance QueryPtr newWildcardQuery(TermPtr term); /// Factory method for generating query, given a set of clauses. By default creates a boolean query /// composed of clauses passed in. /// /// Can be overridden by extending classes, to modify query being returned. /// /// @param clauses List that contains {@link BooleanClause} instances to join. /// @return Resulting {@link Query} object. virtual QueryPtr getBooleanQuery(Collection clauses); /// Factory method for generating query, given a set of clauses. By default creates a boolean query /// composed of clauses passed in. /// /// Can be overridden by extending classes, to modify query being returned. /// /// @param clauses List that contains {@link BooleanClause} instances to join. /// @param disableCoord true if coord scoring should be disabled. /// @return Resulting {@link Query} object. virtual QueryPtr getBooleanQuery(Collection clauses, bool disableCoord); /// Factory method for generating a query. Called when parser parses an input term token that contains /// one or more wildcard characters (? and *), but is not a prefix term token (one that has just a /// single * character at the end) /// /// Depending on settings, prefix term may be lower-cased automatically. It will not go through the /// default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard /// templates. /// /// Can be overridden by extending classes, to provide custom handling for wildcard queries, which may /// be necessary due to missing analyzer calls. /// /// @param field Name of the field query will use. /// @param termStr Term token that contains one or more wild card characters (? or *), but is not simple /// prefix term /// @return Resulting {@link Query} built for the term virtual QueryPtr getWildcardQuery(const String& field, const String& termStr); /// Factory method for generating a query (similar to {@link #getWildcardQuery}). Called when parser /// parses an input term token that uses prefix notation; that is, contains a single '*' wildcard /// character as its last character. Since this is a special case of generic wildcard term, and such /// a query can be optimized easily, this usually results in a different query object. /// /// Depending on settings, a prefix term may be lower-cased automatically. It will not go through the /// default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard templates. /// /// Can be overridden by extending classes, to provide custom handling for wild card queries, which may be /// necessary due to missing analyzer calls. /// /// @param field Name of the field query will use. /// @param termStr Term token to use for building term for the query (without trailing '*' character) /// @return Resulting {@link Query} built for the term virtual QueryPtr getPrefixQuery(const String& field, const String& termStr); /// Factory method for generating a query (similar to {@link #getWildcardQuery}). Called when parser /// parses an input term token that has the fuzzy suffix (~) appended. /// /// @param field Name of the field query will use. /// @param termStr Term token to use for building term for the query /// @return Resulting {@link Query} built for the term virtual QueryPtr getFuzzyQuery(const String& field, const String& termStr, double minSimilarity); /// Returns a String where the escape char has been removed, or kept only once if there was a double /// escape. Supports escaped unicode characters, eg. translates \\u0041 to A. String discardEscapeChar(const String& input); /// Returns the numeric value of the hexadecimal character static int32_t hexToInt(wchar_t c); /// Returns a String where those characters that QueryParser expects to be escaped are escaped by /// a preceding \. static String escape(const String& s); bool jj_2_1(int32_t xla); bool jj_3R_2(); bool jj_3_1(); bool jj_3R_3(); QueryParserTokenPtr jj_consume_token(int32_t kind); bool jj_scan_token(int32_t kind); int32_t jj_ntk(); void jj_add_error_token(int32_t kind, int32_t pos); void jj_rescan_token(); void jj_save(int32_t index, int32_t xla); }; } #endif LucenePlusPlus-rel_3.0.4/include/QueryParserCharStream.h000066400000000000000000000100421217574114600233400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSERCHARSTREAM_H #define QUERYPARSERCHARSTREAM_H #include "LuceneObject.h" namespace Lucene { /// This interface describes a character stream that maintains line and column number positions of /// the characters. It also has the capability to backup the stream to some extent. An implementation /// of this interface is used in the QueryParserTokenManager. /// /// All the methods except backup can be implemented in any fashion. backup needs to be implemented /// correctly for the correct operation of the lexer. Rest of the methods are all used to get information /// like line number, column number and the String that constitutes a token and are not used by the lexer. /// Hence their implementation won't affect the generated lexer's operation. class LPPAPI QueryParserCharStream { public: LUCENE_INTERFACE(QueryParserCharStream); public: /// Returns the next character from the selected input. The method of selecting the input is the /// responsibility of the class implementing this interface. virtual wchar_t readChar() = 0; /// Returns the column position of the character last read. /// @deprecated /// @see #getEndColumn virtual int32_t getColumn() = 0; /// Returns the line number of the character last read. /// @deprecated /// @see #getEndLine virtual int32_t getLine() = 0; /// Returns the column number of the last character for current token (being matched after the last /// call to BeginToken). virtual int32_t getEndColumn() = 0; /// Returns the line number of the last character for current token (being matched after the last call /// to BeginToken). virtual int32_t getEndLine() = 0; /// Returns the column number of the first character for current token (being matched after the last /// call to BeginToken). virtual int32_t getBeginColumn() = 0; /// Returns the line number of the first character for current token (being matched after the last call /// to BeginToken). virtual int32_t getBeginLine() = 0; /// Backs up the input stream by amount steps. Lexer calls this method if it had already read some /// characters, but could not use them to match a (longer) token. So, they will be used again as the /// prefix of the next token and it is the implementation's's responsibility to do this right. virtual void backup(int32_t amount) = 0; /// Returns the next character that marks the beginning of the next token. All characters must remain /// in the buffer between two successive calls to this method to implement backup correctly. virtual wchar_t BeginToken() = 0; /// Returns a string made up of characters from the marked token beginning to the current buffer position. /// Implementations have the choice of returning anything that they want to. For example, for efficiency, /// one might decide to just return null, which is a valid implementation. virtual String GetImage() = 0; /// Returns an array of characters that make up the suffix of length for the currently matched token. /// This is used to build up the matched string for use in actions in the case of MORE. virtual CharArray GetSuffix(int32_t length) = 0; /// The lexer calls this function to indicate that it is done with the stream and hence implementations /// can free any resources held by this class. Again, the body of this function can be just empty and it /// will not affect the lexer's operation. virtual void Done() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/QueryParserConstants.h000066400000000000000000000037311217574114600232720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSERCONSTANTS_H #define QUERYPARSERCONSTANTS_H #include "LuceneObject.h" namespace Lucene { /// Token literal values and constants. class LPPAPI QueryParserConstants { protected: QueryParserConstants(); public: virtual ~QueryParserConstants(); LUCENE_INTERFACE(QueryParserConstants); public: enum RegularExpressionId { _EOF = 0, _NUM_CHAR = 1, _ESCAPED_CHAR = 2, _TERM_START_CHAR = 3, _TERM_CHAR = 4, _WHITESPACE = 5, _QUOTED_CHAR = 6, AND = 8, OR = 9, NOT = 10, PLUS = 11, MINUS = 12, LPAREN = 13, RPAREN = 14, COLON = 15, STAR = 16, CARAT = 17, QUOTED = 18, TERM = 19, FUZZY_SLOP = 20, PREFIXTERM = 21, WILDTERM = 22, RANGEIN_START = 23, RANGEEX_START = 24, NUMBER = 25, RANGEIN_TO = 26, RANGEIN_END = 27, RANGEIN_QUOTED = 28, RANGEIN_GOOP = 29, RANGEEX_TO = 30, RANGEEX_END = 31, RANGEEX_QUOTED = 32, RANGEEX_GOOP = 33 }; enum LexicalState { Boost = 0, RangeEx = 1, RangeIn = 2, DEFAULT = 3 }; /// Literal token values. static Collection tokenImage; protected: /// Literal token values. static const wchar_t* _tokenImage[]; }; } #endif LucenePlusPlus-rel_3.0.4/include/QueryParserToken.h000066400000000000000000000057631217574114600224050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSERTOKEN_H #define QUERYPARSERTOKEN_H #include "LuceneObject.h" namespace Lucene { /// Describes the input token stream. class LPPAPI QueryParserToken : public LuceneObject { public: /// Constructs a new token for the specified Image and Kind. QueryParserToken(int32_t kind = 0, const String& image = EmptyString); virtual ~QueryParserToken(); LUCENE_CLASS(QueryParserToken); public: /// An integer that describes the kind of this token. int32_t kind; /// The line number of the first character of this Token. int32_t beginLine; /// The column number of the first character of this Token. int32_t beginColumn; /// The line number of the last character of this Token. int32_t endLine; /// The column number of the last character of this Token. int32_t endColumn; /// The string image of the token. String image; /// A reference to the next regular (non-special) token from the input stream. If this is the last /// token from the input stream, or if the token manager has not read tokens beyond this one, this /// field is set to null. This is true only if this token is also a regular token. Otherwise, see /// below for a description of the contents of this field. QueryParserTokenPtr next; /// This field is used to access special tokens that occur prior to this token, but after the /// immediately preceding regular (non-special) token. If there are no such special tokens, this /// field is set to null. When there are more than one such special token, this field refers to the /// last of these special tokens, which in turn refers to the next previous special token through /// its specialToken field, and so on until the first special token (whose specialToken field is /// null). The next fields of special tokens refer to other special tokens that immediately follow /// it (without an intervening regular token). If there is no such token, this field is null. QueryParserTokenPtr specialToken; public: /// Returns the image. virtual String toString(); /// Returns a new Token object, by default. However, if you want, you can create and return subclass /// objects based on the value of ofKind. Simply add the cases to the switch for all those special /// cases. static QueryParserTokenPtr newToken(int32_t ofKind, const String& image = EmptyString); }; } #endif LucenePlusPlus-rel_3.0.4/include/QueryParserTokenManager.h000066400000000000000000000102151217574114600236640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSERTOKENMANAGER_H #define QUERYPARSERTOKENMANAGER_H #include "QueryParserConstants.h" namespace Lucene { /// Token Manager. class LPPAPI QueryParserTokenManager : public QueryParserConstants, public LuceneObject { public: QueryParserTokenManager(QueryParserCharStreamPtr stream); QueryParserTokenManager(QueryParserCharStreamPtr stream, int32_t lexState); virtual ~QueryParserTokenManager(); LUCENE_CLASS(QueryParserTokenManager); public: /// Debug output. InfoStreamPtr debugStream; protected: static const int64_t jjbitVec0[]; static const int64_t jjbitVec1[]; static const int64_t jjbitVec3[]; static const int64_t jjbitVec4[]; static const int32_t jjnextStates[]; /// Token literal values. static const wchar_t* jjstrLiteralImages[]; /// Lexer state names. static const wchar_t* lexStateNames[]; /// Lex State array. static const int32_t jjnewLexState[]; static const int64_t jjtoToken[]; static const int64_t jjtoSkip[]; int32_t curLexState; int32_t defaultLexState; int32_t jjnewStateCnt; int32_t jjround; int32_t jjmatchedPos; int32_t jjmatchedKind; QueryParserCharStreamPtr input_stream; IntArray jjrounds; IntArray jjstateSet; wchar_t curChar; public: /// Set debug output. void setDebugStream(InfoStreamPtr debugStream); /// Reinitialise parser. void ReInit(QueryParserCharStreamPtr stream); /// Reinitialise parser. void ReInit(QueryParserCharStreamPtr stream, int32_t lexState); /// Switch to specified lex state. void SwitchTo(int32_t lexState); /// Get the next Token. QueryParserTokenPtr getNextToken(); protected: int32_t jjStopStringLiteralDfa_3(int32_t pos, int64_t active0); int32_t jjStartNfa_3(int32_t pos, int64_t active0); int32_t jjStopAtPos(int32_t pos, int32_t kind); int32_t jjMoveStringLiteralDfa0_3(); int32_t jjStartNfaWithStates_3(int32_t pos, int32_t kind, int32_t state); int32_t jjMoveNfa_3(int32_t startState, int32_t curPos); int32_t jjStopStringLiteralDfa_1(int32_t pos, int64_t active0); int32_t jjStartNfa_1(int32_t pos, int64_t active0); int32_t jjMoveStringLiteralDfa0_1(); int32_t jjMoveStringLiteralDfa1_1(int64_t active0); int32_t jjStartNfaWithStates_1(int32_t pos, int32_t kind, int32_t state); int32_t jjMoveNfa_1(int32_t startState, int32_t curPos); int32_t jjMoveStringLiteralDfa0_0(); int32_t jjMoveNfa_0(int32_t startState, int32_t curPos); int32_t jjStopStringLiteralDfa_2(int32_t pos, int64_t active0); int32_t jjStartNfa_2(int32_t pos, int64_t active0); int32_t jjMoveStringLiteralDfa0_2(); int32_t jjMoveStringLiteralDfa1_2(int64_t active0); int32_t jjStartNfaWithStates_2(int32_t pos, int32_t kind, int32_t state); int32_t jjMoveNfa_2(int32_t startState, int32_t curPos); static bool jjCanMove_0(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); static bool jjCanMove_1(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); static bool jjCanMove_2(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); void ReInitRounds(); QueryParserTokenPtr jjFillToken(); void jjCheckNAdd(int32_t state); void jjAddStates(int32_t start, int32_t end); void jjCheckNAddTwoStates(int32_t state1, int32_t state2); void jjCheckNAddStates(int32_t start, int32_t end); }; } #endif LucenePlusPlus-rel_3.0.4/include/QueryTermVector.h000066400000000000000000000025121217574114600222270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYTERMVECTOR_H #define QUERYTERMVECTOR_H #include "TermFreqVector.h" namespace Lucene { class LPPAPI QueryTermVector : public TermFreqVector, public LuceneObject { public: /// @param queryTerms The original list of terms from the query, can contain duplicates QueryTermVector(Collection queryTerms); QueryTermVector(const String& queryString, AnalyzerPtr analyzer); virtual ~QueryTermVector(); LUCENE_CLASS(QueryTermVector); protected: Collection terms; Collection termFreqs; public: virtual String toString(); int32_t size(); Collection getTerms(); Collection getTermFrequencies(); int32_t indexOf(const String& term); Collection indexesOf(Collection terms, int32_t start, int32_t length); protected: void processTerms(Collection queryTerms); }; } #endif LucenePlusPlus-rel_3.0.4/include/QueryWrapperFilter.h000066400000000000000000000026371217574114600227330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYWRAPPERFILTER_H #define QUERYWRAPPERFILTER_H #include "Filter.h" namespace Lucene { /// Constrains search results to only match those which also match a provided query. /// /// This could be used, for example, with a {@link TermRangeQuery} on a suitably formatted date field to /// implement date filtering. One could re-use a single QueryFilter that matches, eg., only documents /// modified within the last week. The QueryFilter and TermRangeQuery would only need to be reconstructed /// once per day. class LPPAPI QueryWrapperFilter : public Filter { public: /// Constructs a filter which only matches documents matching query. QueryWrapperFilter(QueryPtr query); virtual ~QueryWrapperFilter(); LUCENE_CLASS(QueryWrapperFilter); protected: QueryPtr query; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); virtual String toString(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/RAMDirectory.h000066400000000000000000000061651217574114600214230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAMDIRECTORY_H #define RAMDIRECTORY_H #include "Directory.h" namespace Lucene { /// A memory-resident {@link Directory} implementation. Locking implementation is by default the /// {@link SingleInstanceLockFactory} but can be changed with {@link #setLockFactory}. /// Lock acquisition sequence: RAMDirectory, then RAMFile class LPPAPI RAMDirectory : public Directory { public: /// Constructs an empty {@link Directory}. RAMDirectory(); /// Creates a new RAMDirectory instance from a different Directory implementation. /// This can be used to load a disk-based index into memory. /// /// This should be used only with indices that can fit into memory. /// /// Note that the resulting RAMDirectory instance is fully independent from the /// original Directory (it is a complete copy). Any subsequent changes to the /// original Directory will not be visible in the RAMDirectory instance. /// @param dir a Directory value RAMDirectory(DirectoryPtr dir); RAMDirectory(DirectoryPtr dir, bool closeDir); virtual ~RAMDirectory(); LUCENE_CLASS(RAMDirectory); INTERNAL: int64_t _sizeInBytes; MapStringRAMFile fileMap; protected: DirectoryWeakPtr _dirSource; bool copyDirectory; bool closeDir; public: virtual void initialize(); /// Returns an array of strings, one for each file in the directory. virtual HashSet listAll(); /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name); /// Returns the time the named file was last modified. virtual uint64_t fileModified(const String& name); /// Set the modified time of an existing file to now. virtual void touchFile(const String& name); /// Returns the length of a file in the directory. virtual int64_t fileLength(const String& name); /// Return total size in bytes of all files in this directory. /// This is currently quantized to RAMOutputStream::BUFFER_SIZE. int64_t sizeInBytes(); /// Removes an existing file in the directory. virtual void deleteFile(const String& name); /// Creates a new, empty file in the directory with the given name. /// Returns a stream writing this file. virtual IndexOutputPtr createOutput(const String& name); /// Returns a stream reading an existing file. virtual IndexInputPtr openInput(const String& name); /// Closes the store. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/RAMFile.h000066400000000000000000000032741217574114600203340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAMFILE_H #define RAMFILE_H #include "LuceneObject.h" namespace Lucene { /// File used as buffer in RAMDirectory class LPPAPI RAMFile : public LuceneObject { public: RAMFile(); // File used as buffer, in no RAMDirectory RAMFile(RAMDirectoryPtr directory); virtual ~RAMFile(); LUCENE_CLASS(RAMFile); INTERNAL: int64_t length; RAMDirectoryWeakPtr _directory; protected: Collection buffers; int64_t sizeInBytes; /// This is publicly modifiable via Directory.touchFile(), so direct access not supported int64_t lastModified; public: /// For non-stream access from thread that might be concurrent with writing. int64_t getLength(); void setLength(int64_t length); /// For non-stream access from thread that might be concurrent with writing int64_t getLastModified(); void setLastModified(int64_t lastModified); int64_t getSizeInBytes(); ByteArray addBuffer(int32_t size); ByteArray getBuffer(int32_t index); int32_t numBuffers(); protected: /// Allocate a new buffer. Subclasses can allocate differently. virtual ByteArray newBuffer(int32_t size); }; } #endif LucenePlusPlus-rel_3.0.4/include/RAMInputStream.h000066400000000000000000000043761217574114600217340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAMINPUTSTREAM_H #define RAMINPUTSTREAM_H #include "IndexInput.h" namespace Lucene { /// A memory-resident {@link IndexInput} implementation. class RAMInputStream : public IndexInput { public: RAMInputStream(); RAMInputStream(RAMFilePtr f); virtual ~RAMInputStream(); LUCENE_CLASS(RAMInputStream); public: static const int32_t BUFFER_SIZE; protected: RAMFilePtr file; int64_t _length; ByteArray currentBuffer; int32_t currentBufferIndex; int32_t bufferPosition; int64_t bufferStart; int32_t bufferLength; public: /// Closes the stream to further operations. virtual void close(); /// The number of bytes in the file. virtual int64_t length(); /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte(); /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*,int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); protected: void switchCurrentBuffer(bool enforceEOF); }; } #endif LucenePlusPlus-rel_3.0.4/include/RAMOutputStream.h000066400000000000000000000045541217574114600221330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAMOUTPUTSTREAM_H #define RAMOUTPUTSTREAM_H #include "IndexOutput.h" namespace Lucene { /// A memory-resident {@link IndexOutput} implementation. class RAMOutputStream : public IndexOutput { public: /// Construct an empty output buffer. RAMOutputStream(); RAMOutputStream(RAMFilePtr f); virtual ~RAMOutputStream(); LUCENE_CLASS(RAMOutputStream); public: static const int32_t BUFFER_SIZE; protected: RAMFilePtr file; ByteArray currentBuffer; int32_t currentBufferIndex; int32_t bufferPosition; int64_t bufferStart; int32_t bufferLength; public: /// Copy the current contents of this buffer to the named output. void writeTo(IndexOutputPtr out); /// Resets this to an empty file. void reset(); /// Closes this stream to further operations. virtual void close(); /// Sets current position in this file, where the next write will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// The number of bytes in the file. virtual int64_t length(); /// Writes a single byte. /// @see IndexInput#readByte() virtual void writeByte(uint8_t b); /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); /// Forces any buffered output to be written. virtual void flush(); /// Returns the current position in this file, where the next write will occur. virtual int64_t getFilePointer(); /// Returns byte usage of all buffers. int64_t sizeInBytes(); protected: void switchCurrentBuffer(); void setFileLength(); }; } #endif LucenePlusPlus-rel_3.0.4/include/Random.h000066400000000000000000000016171217574114600203340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RANDOM_H #define RANDOM_H #include "LuceneObject.h" namespace Lucene { /// Utility class to generate a stream of pseudorandom numbers. class LPPAPI Random : public LuceneObject { public: Random(); Random(int64_t seed); virtual ~Random(); protected: int64_t seed; public: void setSeed(int64_t seed); int32_t nextInt(int32_t limit = INT_MAX); double nextDouble(); protected: int32_t next(int32_t bits); }; } #endif LucenePlusPlus-rel_3.0.4/include/RawPostingList.h000066400000000000000000000024001217574114600220340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAWPOSTINGLIST_H #define RAWPOSTINGLIST_H #include "LuceneObject.h" namespace Lucene { /// This is the base class for an in-memory posting list, keyed by a Token. {@link TermsHash} maintains a /// hash table holding one instance of this per unique Token. Consumers of TermsHash ({@link TermsHashConsumer}) /// must subclass this class with its own concrete class. FreqProxTermsWriterPostingList is a private inner /// class used for the freq/prox postings, and TermVectorsTermsWriterPostingList is a private inner class used /// to hold TermVectors postings. class RawPostingList : public LuceneObject { public: RawPostingList(); virtual ~RawPostingList(); LUCENE_CLASS(RawPostingList); public: static const int32_t BYTES_SIZE; int32_t textStart; int32_t intStart; int32_t byteStart; }; } #endif LucenePlusPlus-rel_3.0.4/include/ReadOnlyDirectoryReader.h000066400000000000000000000025441217574114600236410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef READONLYDIRECTORYREADER_H #define READONLYDIRECTORYREADER_H #include "DirectoryReader.h" namespace Lucene { class ReadOnlyDirectoryReader : public DirectoryReader { public: ReadOnlyDirectoryReader(DirectoryPtr directory, SegmentInfosPtr sis, IndexDeletionPolicyPtr deletionPolicy, int32_t termInfosIndexDivisor); ReadOnlyDirectoryReader(DirectoryPtr directory, SegmentInfosPtr infos, Collection oldReaders, Collection oldStarts, MapStringByteArray oldNormsCache, bool doClone, int32_t termInfosIndexDivisor); ReadOnlyDirectoryReader(IndexWriterPtr writer, SegmentInfosPtr infos, int32_t termInfosIndexDivisor); virtual ~ReadOnlyDirectoryReader(); LUCENE_CLASS(ReadOnlyDirectoryReader); public: /// Tries to acquire the WriteLock on this directory. this method is only valid if this /// IndexReader is directory owner. virtual void acquireWriteLock(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ReadOnlySegmentReader.h000066400000000000000000000014221217574114600232710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef READONLYSEGMENTREADER_H #define READONLYSEGMENTREADER_H #include "SegmentReader.h" namespace Lucene { class ReadOnlySegmentReader : public SegmentReader { public: virtual ~ReadOnlySegmentReader(); LUCENE_CLASS(ReadOnlySegmentReader); public: static void noWrite(); virtual void acquireWriteLock(); virtual bool isDeleted(int32_t n); }; } #endif LucenePlusPlus-rel_3.0.4/include/Reader.h000066400000000000000000000034341217574114600203150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef READER_H #define READER_H #include "LuceneObject.h" namespace Lucene { /// Abstract class for reading character streams. class LPPAPI Reader : public LuceneObject { protected: Reader(); public: virtual ~Reader(); LUCENE_CLASS(Reader); public: static const int32_t READER_EOF; /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length) = 0; /// Skip characters. virtual int64_t skip(int64_t n); /// Close the stream. virtual void close() = 0; /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Mark the present position in the stream. Subsequent calls to reset() will attempt to reposition the /// stream to this point. virtual void mark(int32_t readAheadLimit); /// Reset the stream. If the stream has been marked, then attempt to reposition it at the mark. If the stream /// has not been marked, then attempt to reset it in some way appropriate to the particular stream, for example /// by repositioning it to its starting point. virtual void reset(); /// The number of bytes in the stream. virtual int64_t length(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ReaderUtil.h000066400000000000000000000031741217574114600211540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef READERUTIL_H #define READERUTIL_H #include "LuceneObject.h" namespace Lucene { /// Common util methods for dealing with {@link IndexReader}s. class LPPAPI ReaderUtil : public LuceneObject { public: virtual ~ReaderUtil(); LUCENE_CLASS(ReaderUtil); public: /// Gathers sub-readers from reader into a List. static void gatherSubReaders(Collection allSubReaders, IndexReaderPtr reader); /// Returns sub IndexReader that contains the given document id. /// /// @param doc Id of document /// @param reader Parent reader /// @return Sub reader of parent which contains the specified doc id static IndexReaderPtr subReader(int32_t doc, IndexReaderPtr reader); /// Returns sub-reader subIndex from reader. /// /// @param reader Parent reader /// @param subIndex Index of desired sub reader /// @return The subreader at subIndex static IndexReaderPtr subReader(IndexReaderPtr reader, int32_t subIndex); /// Returns index of the searcher/reader for document n in the array used to construct this /// searcher/reader. static int32_t subIndex(int32_t n, Collection docStarts); }; } #endif LucenePlusPlus-rel_3.0.4/include/ReqExclScorer.h000066400000000000000000000037301217574114600216330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REQEXCLSCORER_H #define REQEXCLSCORER_H #include "Scorer.h" namespace Lucene { /// A Scorer for queries with a required subscorer and an excluding (prohibited) sub DocIdSetIterator. /// This Scorer implements {@link Scorer#skipTo(int32_t)}, and it uses the skipTo() on the given scorers. class ReqExclScorer : public Scorer { public: /// Construct a ReqExclScorer. /// @param reqScorer The scorer that must match, except where /// @param exclDisi indicates exclusion. ReqExclScorer(ScorerPtr reqScorer, DocIdSetIteratorPtr exclDisi); virtual ~ReqExclScorer(); LUCENE_CLASS(ReqExclScorer); protected: ScorerPtr reqScorer; DocIdSetIteratorPtr exclDisi; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} /// is called the first time. /// @return The score of the required scorer. virtual double score(); virtual int32_t advance(int32_t target); protected: /// Advance to non excluded doc. /// /// On entry: ///
    ///
  • reqScorer != null, ///
  • exclScorer != null, ///
  • reqScorer was advanced once via next() or skipTo() and reqScorer.doc() may still be excluded. ///
/// Advances reqScorer a non excluded required doc, if any. /// @return true iff there is a non excluded required doc. int32_t toNonExcluded(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ReqOptSumScorer.h000066400000000000000000000026311217574114600221660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REQOPTSUMSCORER_H #define REQOPTSUMSCORER_H #include "Scorer.h" namespace Lucene { /// A Scorer for queries with a required part and an optional part. Delays skipTo() on the optional part /// until a score() is needed. This Scorer implements {@link Scorer#skipTo(int32_t)}. class ReqOptSumScorer : public Scorer { public: ReqOptSumScorer(ScorerPtr reqScorer, ScorerPtr optScorer); virtual ~ReqOptSumScorer(); LUCENE_CLASS(ReqOptSumScorer); protected: ScorerPtr reqScorer; ScorerPtr optScorer; public: virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); virtual int32_t docID(); /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} /// is called the first time. /// @return The score of the required scorer, eventually increased by the score of the optional scorer when /// it also matches the current document. virtual double score(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ReusableStringReader.h000066400000000000000000000022751217574114600231710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REUSABLESTRINGREADER_H #define REUSABLESTRINGREADER_H #include "Reader.h" namespace Lucene { /// Used by DocumentsWriter to implemented a StringReader that can be reset to a new string; we use this /// when tokenizing the string value from a Field. class ReusableStringReader : public Reader { public: ReusableStringReader(); virtual ~ReusableStringReader(); LUCENE_CLASS(ReusableStringReader); public: int32_t upto; int32_t left; String s; public: virtual void init(const String& s); using Reader::read; /// Read characters into a portion of an array. virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); /// Close the stream. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ReverseOrdFieldSource.h000066400000000000000000000041021217574114600233110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REVERSEORDFIELDSOURCE_H #define REVERSEORDFIELDSOURCE_H #include "ValueSource.h" namespace Lucene { /// Obtains the ordinal of the field value from the default Lucene {@link FieldCache} using getStringIndex() /// and reverses the order. /// /// The native lucene index order is used to assign an ordinal value for each field value. /// /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. Example /// of reverse ordinal (rord): /// /// If there were only three field values: "apple","banana","pear" then rord("apple")=3, rord("banana")=2, /// ord("pear")=1 /// /// WARNING: rord() depends on the position in an index and can thus change when other documents are inserted /// or deleted, or if a MultiSearcher is used. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. class LPPAPI ReverseOrdFieldSource : public ValueSource { public: /// Constructor for a certain field. /// @param field field whose values reverse order is used. ReverseOrdFieldSource(const String& field); virtual ~ReverseOrdFieldSource(); LUCENE_CLASS(ReverseOrdFieldSource); protected: String field; public: virtual String description(); virtual DocValuesPtr getValues(IndexReaderPtr reader); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ScoreCachingWrappingScorer.h000066400000000000000000000036021217574114600243260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SCORECACHINGWRAPPINGSCORER_H #define SCORECACHINGWRAPPINGSCORER_H #include "Scorer.h" namespace Lucene { /// A {@link Scorer} which wraps another scorer and caches the score of the current document. Successive /// calls to {@link #score()} will return the same result and will not invoke the wrapped Scorer's score() /// method, unless the current document has changed. /// /// This class might be useful due to the changes done to the {@link Collector} interface, in which the /// score is not computed for a document by default, only if the collector requests it. Some collectors /// may need to use the score in several places, however all they have in hand is a {@link Scorer} object, /// and might end up computing the score of a document more than once. class LPPAPI ScoreCachingWrappingScorer : public Scorer { public: /// Creates a new instance by wrapping the given scorer. ScoreCachingWrappingScorer(ScorerPtr scorer); virtual ~ScoreCachingWrappingScorer(); LUCENE_CLASS(ScoreCachingWrappingScorer); protected: ScorerWeakPtr _scorer; int32_t curDoc; double curScore; public: SimilarityPtr getSimilarity(); virtual double score(); virtual int32_t docID(); virtual int32_t nextDoc(); virtual void score(CollectorPtr collector); virtual int32_t advance(int32_t target); protected: virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); }; } #endif LucenePlusPlus-rel_3.0.4/include/ScoreDoc.h000066400000000000000000000016701217574114600206140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SCOREDOC_H #define SCOREDOC_H #include "LuceneObject.h" namespace Lucene { /// Expert: Returned by low-level search implementations. /// @see TopDocs class LPPAPI ScoreDoc : public LuceneObject { public: ScoreDoc(int32_t doc, double score); virtual ~ScoreDoc(); LUCENE_CLASS(ScoreDoc); public: /// The score of this document for the query. double score; /// A hit document's number. /// @see Searcher#doc(int32_t) int32_t doc; public: virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/Scorer.h000066400000000000000000000050321217574114600203440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SCORER_H #define SCORER_H #include "DocIdSetIterator.h" namespace Lucene { /// Common scoring functionality for different types of queries. /// /// A Scorer iterates over documents matching a query in increasing order of doc Id. /// /// Document scores are computed using a given Similarity implementation. /// /// NOTE: The values NEGATIVE_INFINITY and POSITIVE_INFINITY are not valid scores. Certain collectors /// (eg {@link TopScoreDocCollector}) will not properly collect hits with these scores. class LPPAPI Scorer : public DocIdSetIterator { public: /// Constructs a Scorer. /// @param similarity The Similarity implementation used by this scorer. Scorer(SimilarityPtr similarity); virtual ~Scorer(); LUCENE_CLASS(Scorer); protected: SimilarityPtr similarity; public: /// Returns the Similarity implementation used by this scorer. SimilarityPtr getSimilarity(); /// Scores and collects all matching documents. /// @param collector The collector to which all matching documents are passed. virtual void score(CollectorPtr collector); /// Returns the score of the current document matching the query. Initially invalid, until {@link /// #nextDoc()} or {@link #advance(int32_t)} is called the first time, or when called from within /// {@link Collector#collect}. virtual double score() = 0; protected: /// Collects matching documents in a range. Hook for optimization. /// Note, firstDocID is added to ensure that {@link #nextDoc()} was called before this method. /// /// @param collector The collector to which all matching documents are passed. /// @param max Do not score documents past this. /// @param firstDocID The first document ID (ensures {@link #nextDoc()} is called before this method. /// @return true if more matching documents may remain. virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); friend class BooleanScorer; friend class ScoreCachingWrappingScorer; }; } #endif LucenePlusPlus-rel_3.0.4/include/ScorerDocQueue.h000066400000000000000000000053721217574114600220060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SCORERDOCQUEUE_H #define SCORERDOCQUEUE_H #include "LuceneObject.h" namespace Lucene { /// A ScorerDocQueue maintains a partial ordering of its Scorers such that the least Scorer can always be /// found in constant time. Put()'s and pop()'s require log(size) time. The ordering is by Scorer::doc(). class LPPAPI ScorerDocQueue : public LuceneObject { public: ScorerDocQueue(int32_t maxSize); virtual ~ScorerDocQueue(); LUCENE_CLASS(ScorerDocQueue); protected: Collection heap; int32_t maxSize; int32_t _size; HeapedScorerDocPtr topHSD; // same as heap[1], only for speed public: /// Adds a Scorer to a ScorerDocQueue in log(size) time. If one tries to add more Scorers than maxSize /// ArrayIndexOutOfBound exception is thrown. void put(ScorerPtr scorer); /// Adds a Scorer to the ScorerDocQueue in log(size) time if either the ScorerDocQueue is not full, or /// not lessThan(scorer, top()). /// @return true if scorer is added, false otherwise. bool insert(ScorerPtr scorer); /// Returns the least Scorer of the ScorerDocQueue in constant time. Should not be used when the queue /// is empty. ScorerPtr top(); /// Returns document number of the least Scorer of the ScorerDocQueue in constant time. /// Should not be used when the queue is empty. int32_t topDoc(); double topScore(); bool topNextAndAdjustElsePop(); bool topSkipToAndAdjustElsePop(int32_t target); /// Removes and returns the least scorer of the ScorerDocQueue in log(size) time. Should not be used /// when the queue is empty. ScorerPtr pop(); /// Should be called when the scorer at top changes doc() value. void adjustTop(); /// Returns the number of scorers currently stored in the ScorerDocQueue. int32_t size(); /// Removes all entries from the ScorerDocQueue. void clear(); protected: bool checkAdjustElsePop(bool cond); /// Removes the least scorer of the ScorerDocQueue in log(size) time. Should not be used when the /// queue is empty. void popNoResult(); void upHeap(); void downHeap(); }; } #endif LucenePlusPlus-rel_3.0.4/include/Searchable.h000066400000000000000000000126251217574114600211460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEARCHABLE_H #define SEARCHABLE_H #include "LuceneObject.h" namespace Lucene { /// The interface for search implementations. /// /// Searchable is the abstract network protocol for searching. Implementations provide search over a single /// index, over multiple indices, and over indices on remote servers. /// /// Queries, filters and sort criteria are designed to be compact so that they may be efficiently passed to a /// remote index, with only the top-scoring hits being returned, rather than every matching hit. /// /// NOTE: this interface is kept public for convenience. Since it is not expected to be implemented directly, /// it may be changed unexpectedly between releases. class LPPAPI Searchable { public: LUCENE_INTERFACE(Searchable); public: /// Lower-level search API. /// /// {@link Collector#collect(int32_t)} is called for every document. Collector-based access to remote /// indexes is discouraged. /// /// Applications should only use this if they need all of the matching documents. The high-level search /// API ({@link Searcher#search(QueryPtr, int32_t)}) is usually more efficient, as it skips non-high-scoring /// hits. /// /// @param weight To match documents /// @param filter If non-null, used to permit documents to be collected. /// @param collector To receive hits virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr collector) = 0; /// Frees resources associated with this Searcher. Be careful not to call this method while you are still /// using objects that reference this Searchable. virtual void close() = 0; /// Returns the number of documents containing term. /// @see IndexReader#docFreq(TermPtr) virtual int32_t docFreq(TermPtr term) = 0; /// For each term in the terms array, calculates the number of documents containing term. Returns an array /// with these document frequencies. Used to minimize number of remote calls. virtual Collection docFreqs(Collection terms) = 0; /// Returns one greater than the largest possible document number. /// @see IndexReader#maxDoc() virtual int32_t maxDoc() = 0; /// Low-level search implementation. Finds the top n hits for query, applying filter if non-null. /// Applications should usually call {@link Searcher#search(QueryPtr, int32_t)} or {@link /// Searcher#search(QueryPtr, FilterPtr, int32_t)} instead. virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n) = 0; /// Returns the stored fields of document i. /// @see IndexReader#document(int32_t) virtual DocumentPtr doc(int32_t n) = 0; /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine what /// {@link Field}s to load and how they should be loaded. /// /// NOTE: If the underlying Reader (more specifically, the underlying FieldsReader) is closed before the /// lazy {@link Field} is loaded an exception may be thrown. If you want the value of a lazy {@link Field} /// to be available after closing you must explicitly load it or fetch the Document again with a new loader. /// /// @param n Get the document at the n'th position /// @param fieldSelector The {@link FieldSelector} to use to determine what Fields should be loaded on the /// Document. May be null, in which case all Fields will be loaded. /// @return The stored fields of the {@link Document} at the n'th position /// /// @see IndexReader#document(int32_t, FieldSelectorPtr) /// @see Fieldable /// @see FieldSelector /// @see SetBasedFieldSelector /// @see LoadFirstFieldSelector virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector) = 0; /// Called to re-write queries into primitive queries. virtual QueryPtr rewrite(QueryPtr query) = 0; /// Low-level implementation method. Returns an Explanation that describes how doc scored against weight. /// /// This is intended to be used in developing Similarity implementations, and for good performance, should /// not be displayed with every hit. Computing an explanation is as expensive as executing the query over /// the entire index. /// /// Applications should call {@link Searcher#explain(QueryPtr, int32_t)}. virtual ExplanationPtr explain(WeightPtr weight, int32_t doc) = 0; /// Low-level search implementation with arbitrary sorting. Finds the top n hits for query, applying filter /// if non-null, and sorting the hits by the criteria in sort. /// /// Applications should usually call {@link Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr)} instead. virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/Searcher.h000066400000000000000000000113341217574114600206450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEARCHER_H #define SEARCHER_H #include "Searchable.h" namespace Lucene { /// An abstract base class for search implementations. Implements the main search methods. /// /// Note that you can only access hits from a Searcher as long as it is not yet closed, otherwise an IO /// exception will be thrown. class LPPAPI Searcher : public Searchable, public LuceneObject { public: Searcher(); virtual ~Searcher(); LUCENE_CLASS(Searcher); protected: /// The Similarity implementation used by this searcher. SimilarityPtr similarity; public: /// Search implementation with arbitrary sorting. Finds the top n hits for query, applying filter if /// non-null, and sorting the hits by the criteria in sort. /// /// NOTE: this does not compute scores by default; use {@link IndexSearcher#setDefaultFieldSortScoring} /// to enable scoring. virtual TopFieldDocsPtr search(QueryPtr query, FilterPtr filter, int32_t n, SortPtr sort); /// Lower-level search API. /// /// {@link Collector#collect(int32_t)} is called for every matching document. /// /// Applications should only use this if they need all of the matching documents. The high-level /// search API ({@link Searcher#search(QueryPtr, int32_t)}) is usually more efficient, as it skips /// non-high-scoring hits. /// /// Note: The score passed to this method is a raw score. In other words, the score will not necessarily /// be a double whose value is between 0 and 1. virtual void search(QueryPtr query, CollectorPtr results); /// Lower-level search API. /// /// {@link Collector#collect(int32_t)} is called for every matching document. Collector-based access to /// remote indexes is discouraged. /// /// Applications should only use this if they need all of the matching documents. The high-level search /// API ({@link Searcher#search(QueryPtr, FilterPtr, int32_t)}) is usually more efficient, as it skips /// non-high-scoring hits. /// /// @param query To match documents /// @param filter If non-null, used to permit documents to be collected. /// @param results To receive hits virtual void search(QueryPtr query, FilterPtr filter, CollectorPtr results); /// Finds the top n hits for query, applying filter if non-null. virtual TopDocsPtr search(QueryPtr query, FilterPtr filter, int32_t n); /// Finds the top n hits for query. virtual TopDocsPtr search(QueryPtr query, int32_t n); /// Returns an Explanation that describes how doc scored against query. /// /// This is intended to be used in developing Similarity implementations, and for good performance, /// should not be displayed with every hit. Computing an explanation is as expensive as executing the /// query over the entire index. virtual ExplanationPtr explain(QueryPtr query, int32_t doc); /// Set the Similarity implementation used by this Searcher. virtual void setSimilarity(SimilarityPtr similarity); /// Return the Similarity implementation used by this Searcher. /// /// This defaults to the current value of {@link Similarity#getDefault()}. virtual SimilarityPtr getSimilarity(); virtual Collection docFreqs(Collection terms); virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr results) = 0; virtual void close() = 0; virtual int32_t docFreq(TermPtr term) = 0; virtual int32_t maxDoc() = 0; virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n) = 0; virtual DocumentPtr doc(int32_t n) = 0; virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector) = 0; virtual QueryPtr rewrite(QueryPtr query) = 0; virtual ExplanationPtr explain(WeightPtr weight, int32_t doc) = 0; virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) = 0; protected: /// Creates a weight for query. /// @return New weight virtual WeightPtr createWeight(QueryPtr query); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentInfo.h000066400000000000000000000162111217574114600213260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTINFO_H #define SEGMENTINFO_H #include "LuceneObject.h" namespace Lucene { /// Information about a segment such as it's name, directory, and files /// related to the segment. class LPPAPI SegmentInfo : public LuceneObject { public: SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir); SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir, bool isCompoundFile, bool hasSingleNormFile); SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir, bool isCompoundFile, bool hasSingleNormFile, int32_t docStoreOffset, const String& docStoreSegment, bool docStoreIsCompoundFile, bool hasProx); /// Construct a new SegmentInfo instance by reading a previously saved SegmentInfo from input. /// @param dir directory to load from. /// @param format format of the segments info file. /// @param input input handle to read segment info from. SegmentInfo(DirectoryPtr dir, int32_t format, IndexInputPtr input); virtual ~SegmentInfo(); LUCENE_CLASS(SegmentInfo); public: static const int32_t NO; // no norms; no deletes; static const int32_t YES; // have norms; have deletes; static const int32_t CHECK_DIR; // must check dir to see if there are norms/deletions static const int32_t WITHOUT_GEN; // a file name that has no GEN in it. protected: // true if this is a segments file written before lock-less commits (2.1) bool preLockless; // current generation of del file; NO if there are no deletes; CHECK_DIR if it's a pre-2.1 segment // (and we must check filesystem); YES or higher if there are deletes at generation N int64_t delGen; // current generation of each field's norm file. If this array is null, for lockLess this means no // separate norms. For preLockLess this means we must check filesystem. If this array is not null, // its values mean: NO says this field has no separate norms; CHECK_DIR says it is a preLockLess // segment and filesystem must be checked; >= YES says this field has separate norms with the // specified generation Collection normGen; // NO if it is not; YES if it is; CHECK_DIR if it's pre-2.1 (ie, must check file system to see if // .cfs and .nrm exist) uint8_t isCompoundFile; // true if this segment maintains norms in a single file; false otherwise this is currently false for // segments populated by DocumentWriter and true for newly created merged segments (both compound and // non compound). bool hasSingleNormFile; // cached list of files that this segment uses in the Directory HashSet _files; // total byte size of all of our files (computed on demand) int64_t _sizeInBytes; // if this segment shares stored fields & vectors, this offset is where in that file this segment's // docs begin int32_t docStoreOffset; // name used to derive fields/vectors file we share with other segments String docStoreSegment; // whether doc store files are stored in compound file (*.cfx) bool docStoreIsCompoundFile; // How many deleted docs in this segment, or -1 if not yet known (if it's an older index) int32_t delCount; // True if this segment has any fields with omitTermFreqAndPositions == false bool hasProx; MapStringString diagnostics; public: String name; // unique name in dir int32_t docCount; // number of docs in seg DirectoryPtr dir; // where segment resides public: /// Copy everything from src SegmentInfo into our instance. void reset(SegmentInfoPtr src); void setDiagnostics(MapStringString diagnostics); MapStringString getDiagnostics(); void setNumFields(int32_t numFields); /// Returns total size in bytes of all of files used by this segment. int64_t sizeInBytes(); bool hasDeletions(); void advanceDelGen(); void clearDelGen(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); String getDelFileName(); /// Returns true if this field for this segment has saved a separate norms file (__N.sX). /// @param fieldNumber the field index to check bool hasSeparateNorms(int32_t fieldNumber); /// Returns true if any fields in this segment have separate norms. bool hasSeparateNorms(); /// Increment the generation count for the norms file for this field. /// @param fieldIndex field whose norm file will be rewritten void advanceNormGen(int32_t fieldIndex); /// Get the file name for the norms file for this field. /// @param number field index String getNormFileName(int32_t number); /// Mark whether this segment is stored as a compound file. /// @param isCompoundFile true if this is a compound file; else, false void setUseCompoundFile(bool isCompoundFile); /// Returns true if this segment is stored as a compound file; else, false. bool getUseCompoundFile(); int32_t getDelCount(); void setDelCount(int32_t delCount); int32_t getDocStoreOffset(); bool getDocStoreIsCompoundFile(); void setDocStoreIsCompoundFile(bool v); String getDocStoreSegment(); void setDocStoreOffset(int32_t offset); void setDocStore(int32_t offset, const String& segment, bool isCompoundFile); /// Save this segment's info. void write(IndexOutputPtr output); void setHasProx(bool hasProx); bool getHasProx(); /// Return all files referenced by this SegmentInfo. The returns List is a locally cached List so /// you should not modify it. HashSet files(); /// Used for debugging. String segString(DirectoryPtr dir); /// We consider another SegmentInfo instance equal if it has the same dir and same name. virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); protected: void addIfExists(HashSet files, const String& fileName); /// Called whenever any change is made that affects which files this segment has. void clearFiles(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentInfoCollection.h000066400000000000000000000025661217574114600233520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTINFOCOLLECTION_H #define SEGMENTINFOCOLLECTION_H #include "LuceneObject.h" namespace Lucene { /// A collection of SegmentInfo objects to be used as a base class for {@link SegmentInfos} class LPPAPI SegmentInfoCollection : public LuceneObject { public: SegmentInfoCollection(); virtual ~SegmentInfoCollection(); LUCENE_CLASS(SegmentInfoCollection); protected: Collection segmentInfos; public: int32_t size(); bool empty(); void clear(); void add(SegmentInfoPtr info); void add(int32_t pos, SegmentInfoPtr info); void addAll(SegmentInfoCollectionPtr segmentInfos); bool equals(SegmentInfoCollectionPtr other); int32_t find(SegmentInfoPtr info); bool contains(SegmentInfoPtr info); void remove(int32_t pos); void remove(int32_t start, int32_t end); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentInfos.h000066400000000000000000000210071217574114600215100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTINFOS_H #define SEGMENTINFOS_H #include "SegmentInfoCollection.h" namespace Lucene { /// A collection of SegmentInfo objects with methods for operating on those segments in relation to the file system. class LPPAPI SegmentInfos : public SegmentInfoCollection { public: SegmentInfos(); virtual ~SegmentInfos(); LUCENE_CLASS(SegmentInfos); public: /// The file format version, a negative number. Works since counter, the old 1st entry, is always >= 0 static const int32_t FORMAT; /// This format adds details used for lockless commits. It differs slightly from the previous format in that file names /// are never re-used (write once). Instead, each file is written to the next generation. For example, segments_1, /// segments_2, etc. This allows us to not use a commit lock. /// See fileformats for details. static const int32_t FORMAT_LOCKLESS; /// This format adds a "hasSingleNormFile" flag into each segment info. static const int32_t FORMAT_SINGLE_NORM_FILE; /// This format allows multiple segments to share a single vectors and stored fields file. static const int32_t FORMAT_SHARED_DOC_STORE; /// This format adds a checksum at the end of the file to ensure all bytes were successfully written. static const int32_t FORMAT_CHECKSUM; /// This format adds the deletion count for each segment. This way IndexWriter can efficiently report numDocs(). static const int32_t FORMAT_DEL_COUNT; /// This format adds the boolean hasProx to record if any fields in the segment store prox information (ie, have /// omitTermFreqAndPositions == false) static const int32_t FORMAT_HAS_PROX; /// This format adds optional commit userData storage. static const int32_t FORMAT_USER_DATA; /// This format adds optional per-segment string diagnostics storage, and switches userData to Map static const int32_t FORMAT_DIAGNOSTICS; /// This must always point to the most recent file format. static const int32_t CURRENT_FORMAT; int32_t counter; // used to name new segments private: /// Advanced configuration of retry logic in loading segments_N file. static int32_t defaultGenFileRetryCount; static int32_t defaultGenFileRetryPauseMsec; static int32_t defaultGenLookaheadCount; /// Counts how often the index has been changed by adding or deleting docs. /// Starting with the current time in milliseconds forces to create unique version numbers. int64_t version; int64_t generation; // generation of the "segments_N" for the next commit int64_t lastGeneration; // generation of the "segments_N" file we last successfully read // or wrote; this is normally the same as generation except if // there was an exception that had interrupted a commit MapStringString userData; // Opaque map that user can specify during IndexWriter::commit static MapStringString singletonUserData; static InfoStreamPtr infoStream; ChecksumIndexOutputPtr pendingSegnOutput; public: SegmentInfoPtr info(int32_t i); String getCurrentSegmentFileName(); String getNextSegmentFileName(); /// Read a particular segmentFileName. Note that this may throw an IOException if a commit is in process. void read(DirectoryPtr directory, const String& segmentFileName); /// This version of read uses the retry logic (for lock-less commits) to find the right segments file to load. void read(DirectoryPtr directory); /// Returns a copy of this instance, also copying each SegmentInfo. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Version number when this SegmentInfos was generated. int64_t getVersion(); int64_t getGeneration(); int64_t getLastGeneration(); /// Returns a new SegmentInfos containing the SegmentInfo instances in the specified range first (inclusive) to /// last (exclusive), so total number of segments returned is last-first. SegmentInfosPtr range(int32_t first, int32_t last); /// Carry over generation numbers from another SegmentInfos. void updateGeneration(SegmentInfosPtr other); void rollbackCommit(DirectoryPtr dir); /// Call this to start a commit. This writes the new segments file, but writes an invalid checksum at the end, so /// that it is not visible to readers. Once this is called you must call. /// {@link #finishCommit} to complete the commit or /// {@link #rollbackCommit} to abort it. void prepareCommit(DirectoryPtr dir); /// Returns all file names referenced by SegmentInfo instances matching the provided Directory (ie files associated /// with any "external" segments are skipped). The returned collection is recomputed on each invocation. HashSet files(DirectoryPtr dir, bool includeSegmentsFile); void finishCommit(DirectoryPtr dir); /// Writes & syncs to the Directory dir, taking care to remove the segments file on exception. void commit(DirectoryPtr dir); String segString(DirectoryPtr directory); MapStringString getUserData(); void setUserData(MapStringString data); /// Replaces all segments in this instance, but keeps generation, version, counter so that future commits remain /// write once. void replace(SegmentInfosPtr other); bool hasExternalSegments(DirectoryPtr dir); static int64_t getCurrentSegmentGeneration(HashSet files); static int64_t getCurrentSegmentGeneration(DirectoryPtr directory); static String getCurrentSegmentFileName(HashSet files); static String getCurrentSegmentFileName(DirectoryPtr directory); static int64_t generationFromSegmentsFileName(const String& fileName); /// Current version number from segments file. static int64_t readCurrentVersion(DirectoryPtr directory); /// Returns userData from latest segments file. static MapStringString readCurrentUserData(DirectoryPtr directory); /// If non-null, information about retries when loading the segments file will be printed to this. static void setInfoStream(InfoStreamPtr infoStream); /// Set how many times to try loading the segments.gen file contents to determine current segment generation. This file /// is only referenced when the primary method (listing the directory) fails. static void setDefaultGenFileRetryCount(int32_t count); /// @see #setDefaultGenFileRetryCount static int32_t getDefaultGenFileRetryCount(); /// Set how many milliseconds to pause in between attempts to load the segments.gen file. static void setDefaultGenFileRetryPauseMsec(int32_t msec); /// @see #setDefaultGenFileRetryPauseMsec static int32_t getDefaultGenFileRetryPauseMsec(); /// Set how many times to try incrementing the gen when loading the segments file. This only runs if the primary /// (listing directory) and secondary (opening segments.gen file) methods fail to find the segments file. static void setDefaultGenLookaheadCount(int32_t count); /// @see #setDefaultGenLookaheadCount static int32_t getDefaultGenLookahedCount(); /// @see #setInfoStream static InfoStreamPtr getInfoStream(); static void message(const String& message); protected: void write(DirectoryPtr directory); friend class FindSegmentsFile; }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentMergeInfo.h000066400000000000000000000022451217574114600223100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTMERGEINFO_H #define SEGMENTMERGEINFO_H #include "Term.h" namespace Lucene { class SegmentMergeInfo : public LuceneObject { public: SegmentMergeInfo(int32_t b, TermEnumPtr te, IndexReaderPtr r); virtual ~SegmentMergeInfo(); LUCENE_CLASS(SegmentMergeInfo); protected: TermPositionsPtr postings; // use getPositions() Collection docMap; // use getDocMap() public: TermPtr term; int32_t base; int32_t ord; // the position of the segment in a MultiReader TermEnumPtr termEnum; IndexReaderWeakPtr _reader; int32_t delCount; public: Collection getDocMap(); TermPositionsPtr getPositions(); bool next(); void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentMergeQueue.h000066400000000000000000000015161217574114600225010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTMERGEQUEUE_H #define SEGMENTMERGEQUEUE_H #include "PriorityQueue.h" namespace Lucene { class SegmentMergeQueue : public PriorityQueue { public: SegmentMergeQueue(int32_t size); virtual ~SegmentMergeQueue(); LUCENE_CLASS(SegmentMergeQueue); public: void close(); protected: virtual bool lessThan(const SegmentMergeInfoPtr& first, const SegmentMergeInfoPtr& second); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentMerger.h000066400000000000000000000142101217574114600216510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTMERGER_H #define SEGMENTMERGER_H #include "LuceneObject.h" namespace Lucene { /// The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, into a single /// Segment. After adding the appropriate readers, call the merge method to combine the segments. /// /// If the compoundFile flag is set, then the segments will be merged into a compound file. /// @see #merge /// @see #add class SegmentMerger : public LuceneObject { public: SegmentMerger(DirectoryPtr dir, const String& name); SegmentMerger(IndexWriterPtr writer, const String& name, OneMergePtr merge); virtual ~SegmentMerger(); LUCENE_CLASS(SegmentMerger); protected: DirectoryPtr directory; String segment; int32_t termIndexInterval; Collection readers; FieldInfosPtr fieldInfos; int32_t mergedDocs; CheckAbortPtr checkAbort; /// Whether we should merge doc stores (stored fields and vectors files). When all segments we /// are merging already share the same doc store files, we don't need to merge the doc stores. bool mergeDocStores; /// Maximum number of contiguous documents to bulk-copy when merging stored fields static const int32_t MAX_RAW_MERGE_DOCS; Collection matchingSegmentReaders; Collection rawDocLengths; Collection rawDocLengths2; SegmentMergeQueuePtr queue; bool omitTermFreqAndPositions; ByteArray payloadBuffer; Collection< Collection > docMaps; Collection delCounts; public: /// norms header placeholder static const uint8_t NORMS_HEADER[]; static const int32_t NORMS_HEADER_LENGTH; public: bool hasProx(); /// Add an IndexReader to the collection of readers that are to be merged void add(IndexReaderPtr reader); /// @param i The index of the reader to return /// @return The i'th reader to be merged IndexReaderPtr segmentReader(int32_t i); /// Merges the readers specified by the {@link #add} method into the directory passed to the constructor. /// @return The number of documents that were merged int32_t merge(); /// Merges the readers specified by the {@link #add} method into the directory passed to the constructor. /// @param mergeDocStores if false, we will not merge the stored fields nor vectors files /// @return The number of documents that were merged int32_t merge(bool mergeDocStores); /// close all IndexReaders that have been added. Should not be called before merge(). void closeReaders(); HashSet getMergedFiles(); HashSet createCompoundFile(const String& fileName); /// @return The number of documents in all of the readers int32_t mergeFields(); Collection< Collection > getDocMaps(); Collection getDelCounts(); protected: void addIndexed(IndexReaderPtr reader, FieldInfosPtr fInfos, HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions); void setMatchingSegmentReaders(); int32_t copyFieldsWithDeletions(FieldsWriterPtr fieldsWriter, IndexReaderPtr reader, FieldsReaderPtr matchingFieldsReader); int32_t copyFieldsNoDeletions(FieldsWriterPtr fieldsWriter, IndexReaderPtr reader, FieldsReaderPtr matchingFieldsReader); /// Merge the TermVectors from each of the segments into the new one. void mergeVectors(); void copyVectorsWithDeletions(TermVectorsWriterPtr termVectorsWriter, TermVectorsReaderPtr matchingVectorsReader, IndexReaderPtr reader); void copyVectorsNoDeletions(TermVectorsWriterPtr termVectorsWriter, TermVectorsReaderPtr matchingVectorsReader, IndexReaderPtr reader); void mergeTerms(); void mergeTermInfos(FormatPostingsFieldsConsumerPtr consumer); /// Process postings from multiple segments all positioned on the same term. Writes out merged entries /// into freqOutput and the proxOutput streams. /// @param smis array of segments /// @param n number of cells in the array actually occupied /// @return number of documents across all segments where this term was found int32_t appendPostings(FormatPostingsTermsConsumerPtr termsConsumer, Collection smis, int32_t n); void mergeNorms(); }; class CheckAbort : public LuceneObject { public: CheckAbort(OneMergePtr merge, DirectoryPtr dir); virtual ~CheckAbort(); LUCENE_CLASS(CheckAbort); protected: double workCount; OneMergePtr merge; DirectoryWeakPtr _dir; public: /// Records the fact that roughly units amount of work have been done since this method was last called. /// When adding time-consuming code into SegmentMerger, you should test different values for units to /// ensure that the time in between calls to merge.checkAborted is up to ~ 1 second. virtual void work(double units); }; class CheckAbortNull : public CheckAbort { public: CheckAbortNull(); virtual ~CheckAbortNull(); LUCENE_CLASS(CheckAbortNull); public: /// do nothing virtual void work(double units); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentReader.h000066400000000000000000000216501217574114600216400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTREADER_H #define SEGMENTREADER_H #include "IndexReader.h" #include "CloseableThreadLocal.h" namespace Lucene { class LPPAPI SegmentReader : public IndexReader { public: SegmentReader(); virtual ~SegmentReader(); LUCENE_CLASS(SegmentReader); protected: bool readOnly; INTERNAL: BitVectorPtr deletedDocs; SegmentReaderRefPtr deletedDocsRef; CoreReadersPtr core; FieldsReaderLocalPtr fieldsReaderLocal; SegmentInfoPtr rollbackSegmentInfo; CloseableThreadLocal termVectorsLocal; FieldInfosPtr fieldInfos(); /// Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. /// @return TermVectorsReader TermVectorsReaderPtr getTermVectorsReader(); TermVectorsReaderPtr getTermVectorsReaderOrig(); FieldsReaderPtr getFieldsReader(); MapStringNorm _norms; private: SegmentInfoPtr si; int32_t readBufferSize; bool deletedDocsDirty; bool normsDirty; int32_t pendingDeleteCount; bool rollbackHasChanges; bool rollbackDeletedDocsDirty; bool rollbackNormsDirty; int32_t rollbackPendingDeleteCount; // optionally used for the .nrm file shared by multiple norms IndexInputPtr singleNormStream; SegmentReaderRefPtr singleNormRef; public: virtual void initialize(); using IndexReader::document; using IndexReader::termPositions; static SegmentReaderPtr get(bool readOnly, SegmentInfoPtr si, int32_t termInfosIndexDivisor); static SegmentReaderPtr get(bool readOnly, DirectoryPtr dir, SegmentInfoPtr si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor); void openDocStores(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual LuceneObjectPtr clone(bool openReadOnly, LuceneObjectPtr other = LuceneObjectPtr()); SegmentReaderPtr reopenSegment(SegmentInfoPtr si, bool doClone, bool openReadOnly); static bool hasDeletions(SegmentInfoPtr si); /// Returns true if any documents have been deleted virtual bool hasDeletions(); static bool usesCompoundFile(SegmentInfoPtr si); static bool hasSeparateNorms(SegmentInfoPtr si); HashSet files(); /// Returns an enumeration of all the terms in the index. virtual TermEnumPtr terms(); /// Returns an enumeration of all terms starting at a given term. virtual TermEnumPtr terms(TermPtr t); /// Get the {@link Document} at the n'th position. virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n); /// Returns an enumeration of all the documents which contain term. virtual TermDocsPtr termDocs(TermPtr term); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs(); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions(); /// Returns the number of documents containing the term t. virtual int32_t docFreq(TermPtr t); /// Returns the number of documents in this index. virtual int32_t numDocs(); /// Returns one greater than the largest possible document number. virtual int32_t maxDoc(); /// Get a list of unique field names that exist in this index and have the specified field option information. virtual HashSet getFieldNames(FieldOption fieldOption); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. virtual ByteArray norms(const String& field); /// Read norms into a pre-allocated array. virtual void norms(const String& field, ByteArray norms, int32_t offset); bool termsIndexLoaded(); /// NOTE: only called from IndexWriter when a near real-time reader is opened, or applyDeletes is run, sharing a /// segment that's still being merged. This method is not thread safe, and relies on the synchronization in IndexWriter void loadTermsIndex(int32_t termsIndexDivisor); bool normsClosed(); // for testing only bool normsClosed(const String& field); // for testing only /// Return a term frequency vector for the specified document and field. The vector returned contains term /// numbers and frequencies for all terms in the specified field of this document, if the field had /// storeTermVector flag set. If the flag was not set, the method returns null. virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays /// of the {@link TermFreqVector}. virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); /// Map all the term vectors for all fields in a Document virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); /// Return an array of term frequency vectors for the specified document. The array contains a vector for /// each vectorized field in the document. Each vector vector contains term numbers and frequencies for all /// terms in a given vectorized field. If no such fields existed, the method returns null. virtual Collection getTermFreqVectors(int32_t docNumber); /// Return the name of the segment this reader is reading. String getSegmentName(); /// Return the SegmentInfo of the segment this reader is reading. SegmentInfoPtr getSegmentInfo(); void setSegmentInfo(SegmentInfoPtr info); void startCommit(); void rollbackCommit(); /// Returns the directory this index resides in. virtual DirectoryPtr directory(); /// This is necessary so that cloned SegmentReaders (which share the underlying postings data) /// will map to the same entry in the FieldCache. virtual LuceneObjectPtr getFieldCacheKey(); virtual LuceneObjectPtr getDeletesCacheKey(); /// Returns the number of unique terms (across all fields) in this reader. virtual int64_t getUniqueTermCount(); static SegmentReaderPtr getOnlySegmentReader(DirectoryPtr dir); static SegmentReaderPtr getOnlySegmentReader(IndexReaderPtr reader); virtual int32_t getTermInfosIndexDivisor(); protected: bool checkDeletedCounts(); void loadDeletedDocs(); /// Clones the norm bytes. May be overridden by subclasses. /// @param bytes Byte array to clone /// @return New BitVector virtual ByteArray cloneNormBytes(ByteArray bytes); /// Clones the deleteDocs BitVector. May be overridden by subclasses. /// @param bv BitVector to clone /// @return New BitVector virtual BitVectorPtr cloneDeletedDocs(BitVectorPtr bv); /// Implements commit. virtual void doCommit(MapStringString commitUserData); virtual void commitChanges(MapStringString commitUserData); /// Implements close. virtual void doClose(); /// Implements deletion of the document numbered docNum. /// Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. virtual void doDelete(int32_t docNum); /// Implements actual undeleteAll() in subclass. virtual void doUndeleteAll(); /// can return null if norms aren't stored ByteArray getNorms(const String& field); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); void openNorms(DirectoryPtr cfsDir, int32_t readBufferSize); friend class ReaderPool; friend class IndexWriter; friend class Norm; }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentTermDocs.h000066400000000000000000000050241217574114600221530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMDOCS_H #define SEGMENTTERMDOCS_H #include "TermPositions.h" namespace Lucene { class SegmentTermDocs : public TermPositions, public LuceneObject { public: SegmentTermDocs(SegmentReaderPtr parent); virtual ~SegmentTermDocs(); LUCENE_CLASS(SegmentTermDocs); protected: SegmentReaderWeakPtr _parent; IndexInputPtr _freqStream; int32_t count; int32_t df; BitVectorPtr deletedDocs; int32_t _doc; int32_t _freq; int32_t skipInterval; int32_t maxSkipLevels; DefaultSkipListReaderPtr skipListReader; int64_t freqBasePointer; int64_t proxBasePointer; int64_t skipPointer; bool haveSkipped; bool currentFieldStoresPayloads; bool currentFieldOmitTermFreqAndPositions; public: /// Sets this to the data for a term. virtual void seek(TermPtr term); /// Sets this to the data for the current term in a {@link TermEnum}. virtual void seek(TermEnumPtr termEnum); virtual void seek(TermInfoPtr ti, TermPtr term); virtual void close(); /// Returns the current document number. virtual int32_t doc(); /// Returns the frequency of the term within the current document. virtual int32_t freq(); /// Moves to the next pair in the enumeration. virtual bool next(); /// Optimized implementation. virtual int32_t read(Collection docs, Collection freqs); /// Optimized implementation. virtual bool skipTo(int32_t target); /// Used for testing virtual IndexInputPtr freqStream(); virtual void freqStream(IndexInputPtr freqStream); protected: virtual void skippingDoc(); virtual int32_t readNoTf(Collection docs, Collection freqs, int32_t length); /// Overridden by SegmentTermPositions to skip in prox stream. virtual void skipProx(int64_t proxPointer, int32_t payloadLength); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentTermEnum.h000066400000000000000000000057411217574114600221750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMENUM_H #define SEGMENTTERMENUM_H #include "TermEnum.h" namespace Lucene { class SegmentTermEnum : public TermEnum { public: SegmentTermEnum(); SegmentTermEnum(IndexInputPtr i, FieldInfosPtr fis, bool isi); virtual ~SegmentTermEnum(); LUCENE_CLASS(SegmentTermEnum); protected: IndexInputPtr input; TermBufferPtr termBuffer; TermBufferPtr prevBuffer; TermBufferPtr scanBuffer; // used for scanning TermInfoPtr _termInfo; int32_t format; bool isIndex; int32_t formatM1SkipInterval; public: FieldInfosPtr fieldInfos; int64_t size; int64_t position; int64_t indexPointer; int32_t indexInterval; int32_t skipInterval; int32_t maxSkipLevels; public: virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); void seek(int64_t pointer, int64_t p, TermPtr t, TermInfoPtr ti); /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Optimized scan, without allocating new terms. Return number of invocations to next(). int32_t scanTo(TermPtr term); /// Returns the current Term in the enumeration. /// Initially invalid, valid after next() called for the first time. virtual TermPtr term(); /// Returns the previous Term enumerated. Initially null. TermPtr prev(); /// Returns the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. TermInfoPtr termInfo(); /// Sets the argument to the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. void termInfo(TermInfoPtr ti); /// Returns the docFreq of the current Term in the enumeration. /// Initially invalid, valid after next() called for the first time. virtual int32_t docFreq(); /// Returns the freqPointer from the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. int64_t freqPointer(); /// Returns the proxPointer from the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. int64_t proxPointer(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentTermPositionVector.h000066400000000000000000000033401217574114600242510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMPOSITIONVECTOR_H #define SEGMENTTERMPOSITIONVECTOR_H #include "SegmentTermVector.h" namespace Lucene { class SegmentTermPositionVector : public SegmentTermVector { public: SegmentTermPositionVector(const String& field, Collection terms, Collection termFreqs, Collection< Collection > positions, Collection< Collection > offsets); virtual ~SegmentTermPositionVector(); LUCENE_CLASS(SegmentTermPositionVector); protected: Collection< Collection > positions; Collection< Collection > offsets; protected: static const Collection EMPTY_TERM_POS(); public: /// Returns an array of TermVectorOffsetInfo in which the term is found. /// @param index The position in the array to get the offsets from /// @return An array of TermVectorOffsetInfo objects or the empty list virtual Collection getOffsets(int32_t index); /// Returns an array of positions in which the term is found. /// Terms are identified by the index at which its number appears in the term String array obtained from the indexOf method. virtual Collection getTermPositions(int32_t index); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentTermPositions.h000066400000000000000000000056421217574114600232600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMPOSITIONS_H #define SEGMENTTERMPOSITIONS_H #include "SegmentTermDocs.h" namespace Lucene { class SegmentTermPositions : public SegmentTermDocs { public: SegmentTermPositions(SegmentReaderPtr parent); virtual ~SegmentTermPositions(); LUCENE_CLASS(SegmentTermPositions); protected: IndexInputPtr proxStream; int32_t proxCount; int32_t position; /// The current payload length int32_t payloadLength; /// Indicates whether the payload of the current position has been read from the proxStream yet bool needToLoadPayload; // these variables are being used to remember information for a lazy skip int64_t lazySkipPointer; int32_t lazySkipProxCount; public: using SegmentTermDocs::seek; virtual void seek(TermInfoPtr ti, TermPtr term); virtual void close(); /// Returns next position in the current document. virtual int32_t nextPosition(); /// Moves to the next pair in the enumeration. virtual bool next(); /// Not supported virtual int32_t read(Collection docs, Collection freqs); /// Returns the length of the payload at the current term position. virtual int32_t getPayloadLength(); /// Returns the payload data at the current term position. virtual ByteArray getPayload(ByteArray data, int32_t offset); /// Checks if a payload can be loaded at this position. virtual bool isPayloadAvailable(); protected: int32_t readDeltaPosition(); virtual void skippingDoc(); virtual void skipProx(int64_t proxPointer, int32_t payloadLength); virtual void skipPositions(int32_t n); virtual void skipPayload(); /// It is not always necessary to move the prox pointer to a new document after the freq pointer has /// been moved. Consider for example a phrase query with two terms: the freq pointer for term 1 has to /// move to document x to answer the question if the term occurs in that document. But only if term 2 /// also matches document x, the positions have to be read to figure out if term 1 and term 2 appear next /// to each other in document x and thus satisfy the query. So we move the prox pointer lazily to the /// document as soon as positions are requested. virtual void lazySkip(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentTermVector.h000066400000000000000000000033771217574114600225360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMVECTOR_H #define SEGMENTTERMVECTOR_H #include "TermPositionVector.h" namespace Lucene { class SegmentTermVector : public TermPositionVector, public LuceneObject { public: SegmentTermVector(const String& field, Collection terms, Collection termFreqs); virtual ~SegmentTermVector(); LUCENE_CLASS(SegmentTermVector); protected: String field; Collection terms; Collection termFreqs; public: /// @return The number of the field this vector is associated with virtual String getField(); virtual String toString(); /// @return The number of terms in the term vector. virtual int32_t size(); /// @return An Array of term texts in ascending order. virtual Collection getTerms(); /// @return Array of term frequencies. virtual Collection getTermFrequencies(); /// Return an index in the term numbers array returned from getTerms at which the term with the /// specified term appears. virtual int32_t indexOf(const String& term); /// Just like indexOf(int) but searches for a number of terms at the same time. virtual Collection indexesOf(Collection termNumbers, int32_t start, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.4/include/SegmentWriteState.h000066400000000000000000000023321217574114600225250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTWRITESTATE_H #define SEGMENTWRITESTATE_H #include "LuceneObject.h" namespace Lucene { class SegmentWriteState : public LuceneObject { public: SegmentWriteState(DocumentsWriterPtr docWriter, DirectoryPtr directory, const String& segmentName, const String& docStoreSegmentName, int32_t numDocs, int32_t numDocsInStore, int32_t termIndexInterval); virtual ~SegmentWriteState(); LUCENE_CLASS(SegmentWriteState); public: DocumentsWriterWeakPtr _docWriter; DirectoryPtr directory; String segmentName; String docStoreSegmentName; int32_t numDocs; int32_t termIndexInterval; int32_t numDocsInStore; HashSet flushedFiles; public: String segmentFileName(const String& ext); }; } #endif LucenePlusPlus-rel_3.0.4/include/SerialMergeScheduler.h000066400000000000000000000020571217574114600231510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SERIALMERGESCHEDULER_H #define SERIALMERGESCHEDULER_H #include "MergeScheduler.h" namespace Lucene { /// A {@link MergeScheduler} that simply does each merge sequentially, using the current thread. class LPPAPI SerialMergeScheduler : public MergeScheduler { public: virtual ~SerialMergeScheduler(); LUCENE_CLASS(SerialMergeScheduler); public: /// Just do the merges in sequence. We do this "synchronized" so that even if the application is using /// multiple threads, only one merge may run at a time. virtual void merge(IndexWriterPtr writer); /// Close this MergeScheduler. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/Set.h000066400000000000000000000073101217574114600176430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SET_H #define SET_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle set based collections that can be safely copied and shared template < class TYPE, class LESS = std::less > class Set : public LuceneSync { public: typedef Set this_type; typedef std::set< TYPE, LESS, LuceneAllocator > set_type; typedef typename set_type::iterator iterator; typedef typename set_type::const_iterator const_iterator; typedef TYPE value_type; virtual ~Set() { } protected: boost::shared_ptr setContainer; public: static this_type newInstance() { this_type instance; instance.setContainer = Lucene::newInstance(); return instance; } template static this_type newInstance(ITER first, ITER last) { this_type instance; instance.setContainer = Lucene::newInstance(first, last); return instance; } void reset() { setContainer.reset(); } int32_t size() const { return (int32_t)setContainer->size(); } bool empty() const { return setContainer->empty(); } void clear() { setContainer->clear(); } iterator begin() { return setContainer->begin(); } iterator end() { return setContainer->end(); } const_iterator begin() const { return setContainer->begin(); } const_iterator end() const { return setContainer->end(); } bool add(const TYPE& type) { return setContainer->insert(type).second; } template void addAll(ITER first, ITER last) { setContainer->insert(first, last); } bool remove(const TYPE& type) { return (setContainer->erase(type) > 0); } iterator find(const TYPE& type) { return setContainer->find(type); } bool contains(const TYPE& type) const { return (setContainer->find(type) != setContainer->end()); } bool equals(const this_type& other) const { return equals(other, std::equal_to()); } template bool equals(const this_type& other, PRED comp) const { if (setContainer->size() != other.setContainer->size()) return false; return std::equal(setContainer->begin(), setContainer->end(), other.setContainer->begin(), comp); } void swap(this_type& other) { setContainer.swap(other->setContainer); } operator bool() const { return setContainer; } bool operator! () const { return !setContainer; } bool operator== (const this_type& other) { return (setContainer == other.setContainer); } bool operator!= (const this_type& other) { return (setContainer != other.setContainer); } }; } #endif LucenePlusPlus-rel_3.0.4/include/SetBasedFieldSelector.h000066400000000000000000000034721217574114600232540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SETBASEDFIELDSELECTOR_H #define SETBASEDFIELDSELECTOR_H #include "FieldSelector.h" namespace Lucene { /// Declare what fields to load normally and what fields to load lazily class LPPAPI SetBasedFieldSelector : public FieldSelector { public: /// Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. /// If both are null, the Document will not have any {@link Field} on it. /// @param fieldsToLoad A Set of {@link String} field names to load. May be empty, but not null /// @param lazyFieldsToLoad A Set of {@link String} field names to load lazily. May be empty, but not null SetBasedFieldSelector(HashSet fieldsToLoad, HashSet lazyFieldsToLoad); virtual ~SetBasedFieldSelector(); LUCENE_CLASS(SetBasedFieldSelector); protected: HashSet fieldsToLoad; HashSet lazyFieldsToLoad; public: /// Indicate whether to load the field with the given name or not. If the {@link Field#name()} is not in /// either of the initializing Sets, then {@link FieldSelectorResult#NO_LOAD} is returned. If a Field name /// is in both fieldsToLoad and lazyFieldsToLoad, lazy has precedence. /// @param fieldName The {@link Field} name to check /// @return The {@link FieldSelectorResult} virtual FieldSelectorResult accept(const String& fieldName); }; } #endif LucenePlusPlus-rel_3.0.4/include/Similarity.h000066400000000000000000001026131217574114600212400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMILARITY_H #define SIMILARITY_H #include "Explanation.h" namespace Lucene { /// Scoring API. /// /// Similarity defines the components of Lucene scoring. Overriding computation of these components is /// a convenient way to alter Lucene scoring. /// /// Suggested reading: /// Introduction To Information Retrieval, Chapter 6. /// /// The following describes how Lucene scoring evolves from underlying information retrieval models to /// (efficient) implementation. We first brief on VSM Score, then derive from it Lucene's Conceptual Scoring /// Formula, from which, finally, evolves Lucene's Practical Scoring Function (the latter is connected directly /// with Lucene classes and methods). /// /// Lucene combines Boolean model (BM) of /// Information Retrieval with Vector Space Model /// (VSM) of Information Retrieval - documents "approved" by BM are scored by VSM. /// /// In VSM, documents and queries are represented as weighted vectors in a multi-dimensional space, where each /// distinct index term is a dimension, and weights are Tf-idf /// values. /// /// VSM does not require weights to be Tf-idf values, but Tf-idf values are believed to produce search results /// of high quality, and so Lucene is using Tf-idf. Tf and Idf are described in more detail below, but for now, /// for completion, let's just say that for given term t and document (or query) x, Tf(t,x) varies with the /// number of occurrences of term t in x (when one increases so does the other) and idf(t) similarly varies with /// the inverse of the number of index documents containing term t. /// /// VSM score of document d for query q is the Cosine /// Similarity of the weighted query vectors V(q) and V(d): /// ///
 
/// /// /// ///
/// /// ///
/// /// /// /// /// ///
/// cosine-similarity(q,d)   =   /// /// /// /// /// ///
V(q) · V(d)
–––––––––
|V(q)| |V(d)|
///
///
///
///
VSM Score
///
///
 
/// /// Where V(q) · V(d) is the dot product of the /// weighted vectors, and |V(q)| and |V(d)| are their /// Euclidean norms. /// /// Note: the above equation can be viewed as the dot product of the normalized weighted vectors, in the sense /// that dividing V(q) by its euclidean norm is normalizing it to a unit vector. /// /// Lucene refines VSM score for both search quality and usability: ///
    ///
  • Normalizing V(d) to the unit vector is known to be problematic in that it removes all document length /// information. For some documents removing this info is probably ok, eg. a document made by duplicating a /// certain paragraph 10 times, especially if that paragraph is made of distinct terms. But for a document which /// contains no duplicated paragraphs, this might be wrong. To avoid this problem, a different document length /// normalization factor is used, which normalizes to a vector equal to or larger than the unit vector: /// doc-len-norm(d). ///
  • ///
  • At indexing, users can specify that certain documents are more important than others, by assigning a /// document boost. For this, the score of each document is also multiplied by its boost value doc-boost(d). ///
  • ///
  • Lucene is field based, hence each query term applies to a single field, document length normalization /// is by the length of the certain field, and in addition to document boost there are also document fields /// boosts. ///
  • ///
  • The same field can be added to a document during indexing several times, and so the boost of that field /// is the multiplication of the boosts of the separate additions (or parts) of that field within the document. ///
  • ///
  • At search time users can specify boosts to each query, sub-query, and each query term, hence the /// contribution of a query term to the score of a document is multiplied by the boost of that query term /// query-boost(q). ///
  • ///
  • A document may match a multi term query without containing all the terms of that query (this is correct /// for some of the queries), and users can further reward documents matching more query terms through a /// coordination factor, which is usually larger when more terms are matched: coord-factor(q,d). ///
  • ///
/// /// Under the simplifying assumption of a single field in the index, we get Lucene's Conceptual scoring formula: /// ///
 
/// /// /// ///
/// /// ///
/// /// /// /// /// /// ///
/// score(q,d)   =   /// coord-factor(q,d) ·   /// query-boost(q) ·   /// /// /// /// /// ///
V(q) · V(d)
–––––––––
|V(q)|
///
///   ·   doc-len-norm(d) ///   ·   doc-boost(d) ///
///
///
///
Lucene Conceptual Scoring Formula
///
///
 
/// /// The conceptual formula is a simplification in the sense that (1) terms and documents are fielded and (2) /// boosts are usually per query term rather than per query. /// /// We now describe how Lucene implements this conceptual scoring formula, and derive from it Lucene's Practical /// Scoring Function. /// /// For efficient score computation some scoring components are computed and aggregated in advance: ///
    ///
  • Query-boost for the query (actually for each query term) is known when search starts. ///
  • ///
  • Query Euclidean norm |V(q)| can be computed when search starts, as it is independent of the document /// being scored. From search optimization perspective, it is a valid question why bother to normalize the /// query at all, because all scored documents will be multiplied by the same |V(q)|, and hence documents ranks /// (their order by score) will not be affected by this normalization. There are two good reasons to keep this /// normalization: ///
      ///
    • Recall that Cosine Similarity can be used /// find how similar two documents are. One can use Lucene for eg. clustering, and use a document as a query to /// compute its similarity to other documents. In this use case it is important that the score of document d3 /// for query d1 is comparable to the score of document d3 for query d2. In other words, scores of a document for /// two distinct queries should be comparable. There are other applications that may require this. And this is /// exactly what normalizing the query vector V(q) provides: comparability (to a certain extent) of two or more /// queries. ///
    • ///
    • Applying query normalization on the scores helps to keep the scores around the unit vector, hence preventing /// loss of score data because of floating point precision limitations. ///
    • ///
    ///
  • ///
  • Document length norm doc-len-norm(d) and document boost doc-boost(d) are known at indexing time. They are /// computed in advance and their multiplication is saved as a single value in the index: norm(d). (In the equations /// below, norm(t in d) means norm(field(t) in doc d) where field(t) is the field associated with term t.) ///
  • ///
/// /// Lucene's Practical Scoring Function is derived from the above. The color codes demonstrate how it relates to /// those of the conceptual formula: /// /// /// /// ///
/// /// ///
/// /// /// /// /// /// /// /// /// /// /// ///
/// score(q,d)   =   /// coord(q,d)  ·  /// queryNorm(q)  ·  /// /// /// /// ( /// tf(t in d)  ·  /// idf(t)2  ·  /// t.getBoost() ·  /// norm(t,d) /// ) ///
t in q
///
///
///
Lucene Practical Scoring Function
///
/// /// where ///
    ///
  1. /// /// tf(t in d) /// correlates to the term's frequency, defined as the number of times term t appears in the currently /// scored document d. Documents that have more occurrences of a given term receive a higher score. /// Note that tf(t in q) is assumed to be 1 and therefore it does not appear in this equation, /// However if a query contains twice the same term, there will be two term-queries with that same term /// and hence the computation would still be correct (although not very efficient). /// The default computation for tf(t in d) in {@link DefaultSimilarity#tf(float) DefaultSimilarity} is: /// ///
     
    /// /// /// /// /// ///
    /// {@link DefaultSimilarity#tf(float) tf(t in d)}   =   /// /// frequency½ ///
    ///
     
    ///
  2. /// ///
  3. /// /// idf(t) stands for Inverse Document Frequency. This value correlates to the inverse of docFreq /// (the number of documents in which the term t appears). This means rarer terms give higher contribution /// to the total score. idf(t) appears for t in both the query and the document, hence it is squared in /// the equation. The default computation for idf(t) in {@link DefaultSimilarity#idf(int, int) DefaultSimilarity} is: /// ///
     
    /// /// /// /// /// /// /// ///
    /// {@link DefaultSimilarity#idf(int, int) idf(t)}  =   /// /// 1 + log ( /// /// /// /// /// ///
    numDocs
    –––––––––
    docFreq+1
    ///
    /// ) ///
    ///
     
    ///
  4. /// ///
  5. /// /// coord(q,d) /// is a score factor based on how many of the query terms are found in the specified document. Typically, a /// document that contains more of the query's terms will receive a higher score than another document with /// fewer query terms. This is a search time factor computed in {@link #coord(int, int) coord(q,d)} by the /// Similarity in effect at search time. ///
     
    ///
  6. /// ///
  7. /// /// queryNorm(q) /// /// is a normalizing factor used to make scores between queries comparable. This factor does not affect /// document ranking (since all ranked documents are multiplied by the same factor), but rather just attempts /// to make scores from different queries (or even different indexes) comparable. This is a search time /// factor computed by the Similarity in effect at search time. /// /// The default computation in {@link DefaultSimilarity#queryNorm(float) DefaultSimilarity} /// produces a Euclidean norm: ///
     
    /// /// /// /// /// ///
    /// queryNorm(q)   =   /// {@link DefaultSimilarity#queryNorm(float) queryNorm(sumOfSquaredWeights)} ///   =   /// /// /// /// /// ///
    1
    /// –––––––––––––– ///
    sumOfSquaredWeights½
    ///
    ///
     
    /// /// The sum of squared weights (of the query terms) is computed by the query {@link Weight} object. For example, /// a {@link BooleanQuery boolean query} computes this value as: /// ///
     
    /// /// /// /// /// /// /// /// /// /// /// ///
    /// {@link Weight#sumOfSquaredWeights() sumOfSquaredWeights}   =   /// {@link Query#getBoost() q.getBoost()} 2 ///  ·  /// /// /// /// ( /// idf(t)  ·  /// t.getBoost() /// ) 2 ///
    t in q
    ///
     
    /// ///
  8. /// ///
  9. /// /// t.getBoost() /// is a search time boost of term t in the query q as specified in the query text or as set by application /// calls to {@link Query#setBoost(float) setBoost()}. Notice that there is really no direct API for accessing /// a boost of one term in a multi term query, but rather multi terms are represented in a query as multi /// {@link TermQuery TermQuery} objects, and so the boost of a term in the query is accessible by calling /// the sub-query {@link Query#getBoost() getBoost()}. ///
     
    ///
  10. /// ///
  11. /// /// norm(t,d) encapsulates a few (indexing time) boost and length factors: /// ///
      ///
    • Document boost - set by calling /// {@link Document#setBoost(float) doc.setBoost()} /// before adding the document to the index. ///
    • ///
    • Field boost - set by calling /// {@link Fieldable#setBoost(float) field.setBoost()} /// before adding the field to a document. ///
    • ///
    • {@link #lengthNorm(String, int) lengthNorm(field)} - computed when the document is added to /// the index in accordance with the number of tokens of this field in the document, so that shorter fields /// contribute more to the score. LengthNorm is computed by the Similarity class in effect at indexing. ///
    • ///
    /// /// When a document is added to the index, all the above factors are multiplied. /// If the document has multiple fields with the same name, all their boosts are multiplied together: /// ///
     
    /// /// /// /// /// /// /// /// /// /// /// ///
    /// norm(t,d)   =   /// {@link Document#getBoost() doc.getBoost()} ///  ·  /// {@link #lengthNorm(String, int) lengthNorm(field)} ///  ·  /// /// /// /// {@link Fieldable#getBoost() f.getBoost}() ///
    field f in d named as t
    ///
     
    /// However the resulted norm value is {@link #encodeNorm(float) encoded} as a single byte before being stored. /// At search time, the norm byte value is read from the index {@link Directory directory} and {@link /// #decodeNorm(byte) decoded} back to a float norm value. This encoding/decoding, while reducing index size, /// comes with the price of precision loss - it is not guaranteed that decode(encode(x)) = x. For instance, /// decode(encode(0.89)) = 0.75. ///
     
    /// Compression of norm values to a single byte saves memory at search time, because once a field is referenced /// at search time, its norms - for all documents - are maintained in memory. ///
     
    /// The rationale supporting such lossy compression of norm values is that given the difficulty (and inaccuracy) /// of users to express their true information need by a query, only big differences matter. ///
     
    /// Last, note that search time is too late to modify this norm part of scoring, eg. by using a different /// {@link Similarity} for search. ///
     
    ///
  12. ///
/// /// @see #setDefault(SimilarityPtr) /// @see IndexWriter#setSimilarity(SimilarityPtr) /// @see Searcher#setSimilarity(SimilarityPtr) class LPPAPI Similarity : public LuceneObject { public: Similarity(); virtual ~Similarity(); LUCENE_CLASS(Similarity); protected: static const int32_t NO_DOC_ID_PROVIDED; protected: static const Collection NORM_TABLE(); public: /// Return the default Similarity implementation used by indexing and search code. /// This is initially an instance of {@link DefaultSimilarity}. /// @see Searcher#setSimilarity(SimilarityPtr) /// @see IndexWriter#setSimilarity(SimilarityPtr) static SimilarityPtr getDefault(); /// Decodes a normalization factor stored in an index. /// @see #encodeNorm(double) static double decodeNorm(uint8_t b); /// Returns a table for decoding normalization bytes. /// @see #encodeNorm(double) static const Collection getNormDecoder(); /// Compute the normalization value for a field, given the accumulated state of term processing for this /// field (see {@link FieldInvertState}). /// /// Implementations should calculate a float value based on the field state and then return that value. /// /// For backward compatibility this method by default calls {@link #lengthNorm(String, int32_t)} passing /// {@link FieldInvertState#getLength()} as the second argument, and then multiplies this value by {@link /// FieldInvertState#getBoost()}. /// /// @param field Field name /// @param state Current processing state for this field /// @return The calculated float norm virtual double computeNorm(const String& fieldName, FieldInvertStatePtr state); /// Computes the normalization value for a field given the total number of terms contained in a field. /// These values, together with field boosts, are stored in an index and multiplied into scores for hits /// on each field by the search code. /// /// Matches in longer fields are less precise, so implementations of this method usually return smaller /// values when numTokens is large, and larger values when numTokens is small. /// /// Note that the return values are computed under {@link IndexWriter#addDocument(DocumentPtr)} and then /// stored using {@link #encodeNorm(double)}. Thus they have limited precision, and documents must be /// re-indexed if this method is altered. /// /// @param fieldName The name of the field /// @param numTokens The total number of tokens contained in fields named fieldName of doc. /// @return A normalization factor for hits on this field of this document /// @see Field#setBoost(double) virtual double lengthNorm(const String& fieldName, int32_t numTokens) = 0; /// Computes the normalization value for a query given the sum of the squared weights of each of the query /// terms. This value is multiplied into the weight of each query term. While the classic query /// normalization factor is computed as 1/sqrt(sumOfSquaredWeights), other implementations might completely /// ignore sumOfSquaredWeights (ie return 1). /// /// This does not affect ranking, but the default implementation does make scores from different queries /// more comparable than they would be by eliminating the magnitude of the Query vector as a factor in the /// score. /// /// @param sumOfSquaredWeights The sum of the squares of query term weights /// @return a normalization factor for query weights virtual double queryNorm(double sumOfSquaredWeights) = 0; /// Encodes a normalization factor for storage in an index. /// /// The encoding uses a three-bit mantissa, a five-bit exponent, and the zero-exponent point at 15, thus /// representing values from around 7x10^9 to 2x10^-9 with about one significant decimal digit of accuracy. /// Zero is also represented. Negative numbers are rounded up to zero. Values too large to represent /// are rounded down to the largest representable value. Positive values too small to represent are rounded /// up to the smallest positive representable value. /// /// @see Field#setBoost(double) static uint8_t encodeNorm(double f); /// Computes a score factor based on a term or phrase's frequency in a document. This value is multiplied /// by the {@link #idf(int32_t, int32_t)} factor for each term in the query and these products are then /// summed to form the initial score for a document. /// /// Terms and phrases repeated in a document indicate the topic of the document, so implementations of this /// method usually return larger values when freq is large, and smaller values when freq is small. /// /// The default implementation calls {@link #tf(double)}. /// /// @param freq The frequency of a term within a document /// @return A score factor based on a term's within-document frequency virtual double tf(int32_t freq); /// Computes the amount of a sloppy phrase match, based on an edit distance. This value is summed for /// each sloppy phrase match in a document to form the frequency that is passed to {@link #tf(double)}. /// /// A phrase match with a small edit distance to a document passage more closely matches the document, so /// implementations of this method usually return larger values when the edit distance is small and /// smaller values when it is large. /// /// @see PhraseQuery#setSlop(int32_t) /// @param distance The edit distance of this sloppy phrase match /// @return The frequency increment for this match virtual double sloppyFreq(int32_t distance) = 0; /// Computes a score factor based on a term or phrase's frequency in a document. This value is multiplied /// by the {@link #idf(int32_t, int32_t)} factor for each term in the query and these products are then /// summed to form the initial score for a document. /// /// Terms and phrases repeated in a document indicate the topic of the document, so implementations of this /// method usually return larger values when freq is large, and smaller values when freq is small. /// /// @param freq The frequency of a term within a document /// @return A score factor based on a term's within-document frequency virtual double tf(double freq) = 0; /// Computes a score factor for a simple term and returns an explanation for that score factor. /// /// The default implementation uses: ///
        /// idf(searcher->docFreq(term), searcher->maxDoc());
        /// 
/// /// Note that {@link Searcher#maxDoc()} is used instead of {@link IndexReader#numDocs() IndexReader#numDocs()} /// because also {@link Searcher#docFreq(TermPtr)} is used, and when the latter is inaccurate, so is {@link /// Searcher#maxDoc()}, and in the same direction. In addition, {@link Searcher#maxDoc()} is more efficient /// to compute. /// /// @param term The term in question /// @param searcher The document collection being searched /// @return An IDFExplain object that includes both an idf score factor and an explanation for the term. virtual IDFExplanationPtr idfExplain(TermPtr term, SearcherPtr searcher); /// Computes a score factor for a phrase. /// /// The default implementation sums the idf factor for each term in the phrase. /// /// @param terms The terms in the phrase /// @param searcher The document collection being searched /// @return An IDFExplain object that includes both an idf score factor for the phrase and an explanation /// for each term. virtual IDFExplanationPtr idfExplain(Collection terms, SearcherPtr searcher); /// Computes a score factor based on a term's document frequency (the number of documents which contain the /// term). This value is multiplied by the {@link #tf(int32_t)} factor for each term in the query and these /// products are then summed to form the initial score for a document. /// /// Terms that occur in fewer documents are better indicators of topic, so implementations of this method /// usually return larger values for rare terms, and smaller values for common terms. /// /// @param docFreq The number of documents which contain the term /// @param numDocs The total number of documents in the collection /// @return A score factor based on the term's document frequency virtual double idf(int32_t docFreq, int32_t numDocs) = 0; /// Computes a score factor based on the fraction of all query terms that a document contains. This value /// is multiplied into scores. /// /// The presence of a large portion of the query terms indicates a better match with the query, so /// implementations of this method usually return larger values when the ratio between these parameters is /// large and smaller values when the ratio between them is small. /// /// @param overlap The number of query terms matched in the document /// @param maxOverlap The total number of terms in the query /// @return A score factor based on term overlap with the query virtual double coord(int32_t overlap, int32_t maxOverlap) = 0; /// Calculate a scoring factor based on the data in the payload. Overriding implementations are responsible /// for interpreting what is in the payload. Lucene makes no assumptions about what is in the byte array. /// /// The default implementation returns 1. /// /// @param docId The docId currently being scored. If this value is {@link #NO_DOC_ID_PROVIDED}, then it /// should be assumed that the PayloadQuery implementation does not provide document information /// @param fieldName The fieldName of the term this payload belongs to /// @param start The start position of the payload /// @param end The end position of the payload /// @param payload The payload byte array to be scored /// @param offset The offset into the payload array /// @param length The length in the array /// @return An implementation dependent float to be used as a scoring factor virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.4/include/SimilarityDelegator.h000066400000000000000000000030051217574114600230620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMILARITYDELEGATOR_H #define SIMILARITYDELEGATOR_H #include "Similarity.h" namespace Lucene { /// Delegating scoring implementation. Useful in {@link Query#getSimilarity(Searcher)} implementations, /// to override only certain methods of a Searcher's Similarity implementation. class LPPAPI SimilarityDelegator : public Similarity { public: SimilarityDelegator(SimilarityPtr delegee); virtual ~SimilarityDelegator(); LUCENE_CLASS(SimilarityDelegator); protected: SimilarityPtr delegee; public: virtual double computeNorm(const String& field, FieldInvertStatePtr state); virtual double lengthNorm(const String& fieldName, int32_t numTokens); virtual double queryNorm(double sumOfSquaredWeights); virtual double tf(double freq); virtual double sloppyFreq(int32_t distance); virtual double idf(int32_t docFreq, int32_t numDocs); virtual double coord(int32_t overlap, int32_t maxOverlap); virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.4/include/SimpleAnalyzer.h000066400000000000000000000015741217574114600220550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEANALYZER_H #define SIMPLEANALYZER_H #include "Analyzer.h" namespace Lucene { /// An {@link Analyzer} that filters {@link LetterTokenizer} with {@link LowerCaseFilter} class LPPAPI SimpleAnalyzer : public Analyzer { public: virtual ~SimpleAnalyzer(); LUCENE_CLASS(SimpleAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/SimpleFSDirectory.h000066400000000000000000000031361217574114600224610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEFSDIRECTORY_H #define SIMPLEFSDIRECTORY_H #include "FSDirectory.h" namespace Lucene { /// A straightforward implementation of {@link FSDirectory} using std::ofstream and std::ifstream. class LPPAPI SimpleFSDirectory : public FSDirectory { public: /// Create a new SimpleFSDirectory for the named location and {@link NativeFSLockFactory}. /// @param path the path of the directory. /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) SimpleFSDirectory(const String& path, LockFactoryPtr lockFactory = LockFactoryPtr()); virtual ~SimpleFSDirectory(); LUCENE_CLASS(SimpleFSDirectory); public: /// Creates an IndexOutput for the file with the given name. virtual IndexOutputPtr createOutput(const String& name); /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory implementation may ignore the buffer size. virtual IndexInputPtr openInput(const String& name); /// Creates an IndexInput for the file with the given name. virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); }; } #endif LucenePlusPlus-rel_3.0.4/include/SimpleFSLockFactory.h000066400000000000000000000030371217574114600227350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEFSLOCKFACTORY_H #define SIMPLEFSLOCKFACTORY_H #include "FSLockFactory.h" #include "Lock.h" namespace Lucene { /// Implements {@link LockFactory} using {@link File#createNewFile()}. /// @see LockFactory class LPPAPI SimpleFSLockFactory : public FSLockFactory { public: /// Create a SimpleFSLockFactory instance, with null (unset) lock directory. When you pass this factory /// to a {@link FSDirectory} subclass, the lock directory is automatically set to the directory itself. /// Be sure to create one instance for each directory your create! SimpleFSLockFactory(); /// Instantiate using the provided directory name. /// @param lockDir where lock files should be created. SimpleFSLockFactory(const String& lockDir); virtual ~SimpleFSLockFactory(); LUCENE_CLASS(SimpleFSLockFactory); public: /// Return a new Lock instance identified by lockName. virtual LockPtr makeLock(const String& lockName); /// Attempt to clear (forcefully unlock and remove) the specified lock. virtual void clearLock(const String& lockName); }; } #endif LucenePlusPlus-rel_3.0.4/include/SimpleLRUCache.h000066400000000000000000000051321217574114600216500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLELRUCACHE_H #define SIMPLELRUCACHE_H #include #include "LuceneObject.h" namespace Lucene { /// General purpose LRU cache map. /// Accessing an entry will keep the entry cached. {@link #get(const KEY&)} and /// {@link #put(const KEY&, const VALUE&)} results in an access to the corresponding entry. template class SimpleLRUCache : public LuceneObject { public: typedef std::pair key_value; typedef std::list< key_value > key_list; typedef typename key_list::const_iterator const_iterator; typedef boost::unordered_map< KEY, typename key_list::iterator, HASH, EQUAL, LuceneAllocator< std::pair > > map_type; typedef typename map_type::const_iterator map_iterator; SimpleLRUCache(int32_t cacheSize) { this->cacheSize = cacheSize; } virtual ~SimpleLRUCache() { } protected: int32_t cacheSize; key_list cacheList; map_type cacheMap; public: void put(const KEY& key, const VALUE& value) { cacheList.push_front(std::make_pair(key, value)); cacheMap[key] = cacheList.begin(); if ((int32_t)cacheList.size() > cacheSize) { cacheMap.erase(cacheList.back().first); cacheList.pop_back(); } } VALUE get(const KEY& key) { map_iterator find = cacheMap.find(key); if (find == cacheMap.end()) return VALUE(); VALUE value(find->second->second); cacheList.erase(find->second); cacheList.push_front(std::make_pair(key, value)); cacheMap[key] = cacheList.begin(); return value; } bool contains(const KEY& key) const { return (cacheMap.find(key) != cacheMap.end()); } int32_t size() const { return (int32_t)cacheList.size(); } const_iterator begin() const { return cacheList.begin(); } const_iterator end() const { return cacheList.end(); } }; }; #endif LucenePlusPlus-rel_3.0.4/include/SingleInstanceLockFactory.h000066400000000000000000000032211217574114600241540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SINGLEINSTANCELOCKFACTORY_H #define SINGLEINSTANCELOCKFACTORY_H #include "LockFactory.h" namespace Lucene { /// Implements {@link LockFactory} for a single in-process instance, meaning all /// locking will take place through this one instance. Only use this {@link LockFactory} /// when you are certain all IndexReaders and IndexWriters for a given index are running /// against a single shared in-process Directory instance. This is currently the /// default locking for RAMDirectory. /// @see LockFactory class LPPAPI SingleInstanceLockFactory : public LockFactory { public: SingleInstanceLockFactory(); virtual ~SingleInstanceLockFactory(); LUCENE_CLASS(SingleInstanceLockFactory); protected: HashSet locks; public: /// Return a new Lock instance identified by lockName. /// @param lockName name of the lock to be created. virtual LockPtr makeLock(const String& lockName); /// Attempt to clear (forcefully unlock and remove) the /// specified lock. Only call this at a time when you are /// certain this lock is no longer in use. /// @param lockName name of the lock to be cleared. virtual void clearLock(const String& lockName); }; } #endif LucenePlusPlus-rel_3.0.4/include/SingleTermEnum.h000066400000000000000000000022131217574114600220030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SINGLETERMENUM_H #define SINGLETERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating a single term. /// /// This can be used by {@link MultiTermQuery}s that need only visit one term, but want to preserve /// MultiTermQuery semantics such as {@link MultiTermQuery#rewriteMethod}. class LPPAPI SingleTermEnum : public FilteredTermEnum { public: SingleTermEnum(IndexReaderPtr reader, TermPtr singleTerm); virtual ~SingleTermEnum(); LUCENE_CLASS(SingleTermEnum); protected: TermPtr singleTerm; bool _endEnum; public: virtual double difference(); protected: virtual bool endEnum(); virtual bool termCompare(TermPtr term); }; } #endif LucenePlusPlus-rel_3.0.4/include/SloppyPhraseScorer.h000066400000000000000000000072751217574114600227310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SLOPPYPHRASESCORER_H #define SLOPPYPHRASESCORER_H #include "PhraseScorer.h" namespace Lucene { class SloppyPhraseScorer : public PhraseScorer { public: SloppyPhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, int32_t slop, ByteArray norms); virtual ~SloppyPhraseScorer(); LUCENE_CLASS(SloppyPhraseScorer); protected: int32_t slop; Collection repeats; Collection tmpPos; // for flipping repeating pps bool checkedRepeats; public: /// Score a candidate doc for all slop-valid position-combinations (matches) encountered while /// traversing/hopping the PhrasePositions. The score contribution of a match depends on the distance: /// - highest score for distance=0 (exact match). /// - score gets lower as distance gets higher. /// Example: for query "a b"~2, a document "x a b a y" can be scored twice: once for "a b" (distance=0), /// and once for "b a" (distance=2). /// Possibly not all valid combinations are encountered, because for efficiency we always propagate the /// least PhrasePosition. This allows to base on PriorityQueue and move forward faster. /// As result, for example, document "a b c b a" would score differently for queries "a b c"~4 and /// "c b a"~4, although they really are equivalent. Similarly, for doc "a b c b a f g", query "c b"~2 /// would get same score as "g f"~2, although "c b"~2 could be matched twice. We may want to fix this /// in the future (currently not, for performance reasons). virtual double phraseFreq(); protected: /// Flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back. /// Assumes: pp!=pp2, pp2 in pq, pp not in pq. Called only when there are repeating pps. PhrasePositionsPtr flip(PhrasePositionsPtr pp, PhrasePositionsPtr pp2); /// Init PhrasePositions in place. /// There is a one time initialization for this scorer: /// - Put in repeats[] each pp that has another pp with same position in the doc. /// - Also mark each such pp by pp.repeats = true. /// Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. /// In particular, this allows to score queries with no repetitions with no overhead due to this computation. /// - Example 1 - query with no repetitions: "ho my"~2 /// - Example 2 - query with repetitions: "ho my my"~2 /// - Example 3 - query with repetitions: "my ho my"~2 /// Init per doc with repeats in query, includes propagating some repeating pp's to avoid false phrase detection. /// @return end (max position), or -1 if any term ran out (ie. done) int32_t initPhrasePositions(); /// We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences in the query /// of the same word would go elsewhere in the matched doc. /// @return null if differ (i.e. valid) otherwise return the higher offset PhrasePositions out of the first /// two PPs found to not differ. PhrasePositionsPtr termPositionsDiffer(PhrasePositionsPtr pp); }; } #endif LucenePlusPlus-rel_3.0.4/include/SmallDouble.h000066400000000000000000000021151217574114600213110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SMALLDOUBLE_H #define SMALLDOUBLE_H #include "LuceneObject.h" namespace Lucene { /// Floating point numbers smaller than 32 bits. class SmallDouble : public LuceneObject { public: virtual ~SmallDouble(); LUCENE_CLASS(SmallDouble); public: /// Converts a floating point number to an 8 bit float. /// Values less than zero are all mapped to zero. /// Values are truncated (rounded down) to the nearest 8 bit value. /// Values between zero and the smallest representable value are rounded up. static uint8_t doubleToByte(double f); /// Converts an 8 bit floating point number to a double. static double byteToDouble(uint8_t b); }; } #endif LucenePlusPlus-rel_3.0.4/include/SnapshotDeletionPolicy.h000066400000000000000000000040701217574114600235530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SNAPSHOTDELETIONPOLICY_H #define SNAPSHOTDELETIONPOLICY_H #include "IndexDeletionPolicy.h" namespace Lucene { class LPPAPI SnapshotDeletionPolicy : public IndexDeletionPolicy { public: SnapshotDeletionPolicy(IndexDeletionPolicyPtr primary); virtual ~SnapshotDeletionPolicy(); LUCENE_CLASS(SnapshotDeletionPolicy); protected: IndexCommitPtr lastCommit; IndexDeletionPolicyPtr primary; String _snapshot; public: /// This is called once when a writer is first instantiated to give the policy a chance to remove old /// commit points. virtual void onInit(Collection commits); /// This is called each time the writer completed a commit. This gives the policy a chance to remove /// old commit points with each commit. virtual void onCommit(Collection commits); /// Take a snapshot of the most recent commit to the index. You must call release() to free this snapshot. /// Note that while the snapshot is held, the files it references will not be deleted, which will consume /// additional disk space in your index. If you take a snapshot at a particularly bad time (say just before /// you call optimize()) then in the worst case this could consume an extra 1X of your total index size, /// until you release the snapshot. virtual IndexCommitPtr snapshot(); /// Release the currently held snapshot. virtual void release(); protected: Collection wrapCommits(Collection commits); friend class MyCommitPoint; }; } #endif LucenePlusPlus-rel_3.0.4/include/Sort.h000066400000000000000000000123421217574114600200400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SORT_H #define SORT_H #include "LuceneObject.h" namespace Lucene { /// Encapsulates sort criteria for returned hits. /// /// The fields used to determine sort order must be carefully chosen. Documents must contain a single term /// in such a field, and the value of the term should indicate the document's relative position in a given /// sort order. The field must be indexed, but should not be tokenized, and does not need to be stored /// (unless you happen to want it back with the rest of your document data). In other words: /// ///
    /// document->add(newLucene(L"byNumber", StringUtils::toString(x), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
    /// 
/// /// Valid Types of Values /// /// There are four possible kinds of term values which may be put into sorting fields: Integers, Longs, Doubles, /// or Strings. Unless {@link SortField SortField} objects are specified, the type of value in the field is /// determined by parsing the first term in the field. /// /// Integer term values should contain only digits and an optional preceding negative sign. Values must be base /// 10 and in the range INT_MIN and INT_MAX inclusive. Documents which should appear first in the sort should /// have low value integers, later documents high values (ie. the documents should be numbered 1..n where 1 is /// the first and n the last). /// /// Long term values should contain only digits and an optional preceding negative sign. Values must be base 10 /// and in the range LLONG_MIN and LLONG_MAX inclusive. Documents which should appear first in the sort should /// have low value integers, later documents high values. /// /// Double term values should conform to values accepted by Double (except that NaN and Infinity are not /// supported). Documents which should appear first in the sort should have low values, later documents high /// values. /// /// String term values can contain any valid String, but should not be tokenized. The values are sorted according /// to their comparable natural order. Note that using this type of term value has higher memory requirements /// than the other two types. /// /// Object Reuse /// /// One of these objects can be used multiple times and the sort order changed between usages. /// This class is thread safe. /// /// Memory Usage /// /// Sorting uses of caches of term values maintained by the internal HitQueue(s). The cache is static and /// contains an integer or double array of length IndexReader::maxDoc() for each field name for which a sort is /// performed. In other words, the size of the cache in bytes is: /// ///
    /// 4 * IndexReader::maxDoc() * (# of different fields actually used to sort)
    /// 
/// /// For String fields, the cache is larger: in addition to the above array, the value of every term in the /// field is kept in memory. If there are many unique terms in the field, this could be quite large. /// /// Note that the size of the cache is not affected by how many fields are in the index and might be used to /// sort - only by the ones actually used to sort a result set. class LPPAPI Sort : public LuceneObject { public: /// Sorts by computed relevance. This is the same sort criteria as calling {@link /// Searcher#search(QueryPtr, int32_t) Searcher#search()} without a sort criteria, only with slightly more /// overhead. Sort(); /// Sorts by the criteria in the given SortField. Sort(SortFieldPtr field); /// Sorts in succession by the criteria in each SortField. Sort(Collection fields); virtual ~Sort(); LUCENE_CLASS(Sort); public: /// Internal representation of the sort criteria Collection fields; public: /// Represents sorting by computed relevance. Using this sort criteria returns the same results as calling /// {@link Searcher#search(QueryPtr, int32_t) Searcher#search()} without a sort criteria, only with slightly /// more overhead. static SortPtr RELEVANCE(); /// Represents sorting by index order. static SortPtr INDEXORDER(); /// Sets the sort to the given criteria. void setSort(SortFieldPtr field); /// Sets the sort to the given criteria in succession. void setSort(Collection fields); /// Representation of the sort criteria. /// @return Array of SortField objects used in this sort criteria Collection getSort(); virtual String toString(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SortField.h000066400000000000000000000155551217574114600210150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SORTFIELD_H #define SORTFIELD_H #include "LuceneObject.h" namespace Lucene { /// Stores information about how to sort documents by terms in an individual field. Fields must be indexed /// in order to sort by them. class LPPAPI SortField : public LuceneObject { public: /// Creates a sort by terms in the given field with the type of term values explicitly given. /// @param field Name of field to sort by. Can be null if type is SCORE or DOC. /// @param type Type of values in the terms. /// @param reverse True if natural order should be reversed. SortField(const String& field, int32_t type, bool reverse = false); /// Creates a sort, possibly in reverse, by terms in the given field, parsed to numeric values using a /// custom {@link Parser}. /// @param field Name of field to sort by /// @param parser Instance of a {@link Parser}, which must subclass one of the existing numeric parsers from /// {@link FieldCache}. Sort type is inferred by testing which numeric parser the parser subclasses. /// @param reverse True if natural order should be reversed. SortField(const String& field, ParserPtr parser, bool reverse = false); /// Creates a sort, possibly in reverse, by terms in the given field sorted according to the given locale. /// @param field Name of field to sort by, cannot be null. /// @param locale Locale of values in the field. /// @param reverse True if natural order should be reversed. SortField(const String& field, const std::locale& locale, bool reverse = false); /// Creates a sort, possibly in reverse, with a custom comparison function. /// @param field Name of field to sort by; cannot be null. /// @param comparator Returns a comparator for sorting hits. /// @param reverse True if natural order should be reversed. SortField(const String& field, FieldComparatorSourcePtr comparator, bool reverse = false); virtual ~SortField(); LUCENE_CLASS(SortField); public: /// Sort by document score (relevancy). Sort values are Double and higher values are at the front. static const int32_t SCORE; /// Sort by document number (index order). Sort values are Integer and lower values are at the front. static const int32_t DOC; /// Sort using term values as Strings. Sort values are String and lower values are at the front. static const int32_t STRING; /// Sort using term values as Integers. Sort values are Integer and lower values are at the front. static const int32_t INT; /// Sort using term values as Floats. Sort values are Float and lower values are at the front. static const int32_t FLOAT; /// Sort using term values as Longs. Sort values are Long and lower values are at the front. static const int32_t LONG; /// Sort using term values as Doubles. Sort values are Double and lower values are at the front. static const int32_t DOUBLE; /// Sort using term values as Shorts. Sort values are Short and lower values are at the front. static const int32_t SHORT; /// Sort using a custom Comparator. Sort values are any ComparableValue and sorting is done according /// to natural order. static const int32_t CUSTOM; /// Sort using term values as Bytes. Sort values are Byte and lower values are at the front. static const int32_t BYTE; /// Sort using term values as Strings, but comparing by value (using String::compare) for all comparisons. /// This is typically slower than {@link #STRING}, which uses ordinals to do the sorting. static const int32_t STRING_VAL; INTERNAL: bool reverse; // defaults to natural order String field; int32_t type; // defaults to determining type dynamically localePtr locale; // defaults to "natural order" (no Locale) ParserPtr parser; private: /// Used for CUSTOM sort FieldComparatorSourcePtr comparatorSource; public: /// Represents sorting by document score (relevancy). static SortFieldPtr FIELD_SCORE(); /// Represents sorting by document number (index order). static SortFieldPtr FIELD_DOC(); /// Returns the name of the field. Could return null if the sort is by SCORE or DOC. /// @return Name of field, possibly null. String getField(); /// Returns the type of contents in the field. /// @return One of the constants SCORE, DOC, STRING, INT or DOUBLE. int32_t getType(); /// Returns the Locale by which term values are interpreted. localePtr getLocale(); /// Returns the instance of a {@link FieldCache} parser that fits to the given sort type. May return null /// if no parser was specified. Sorting is using the default parser then. /// @return An instance of a parser, or null. ParserPtr getParser(); /// Returns whether the sort should be reversed. /// @return True if natural order should be reversed. bool getReverse(); /// Returns the {@link FieldComparatorSource} used for custom sorting FieldComparatorSourcePtr getComparatorSource(); virtual String toString(); /// Returns true if other is equal to this. If a {@link FieldComparatorSource} or {@link Parser} was provided, /// it must properly implement equals (unless a singleton is always used). virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); /// Returns the {@link FieldComparator} to use for sorting. /// @param numHits number of top hits the queue will store /// @param sortPos position of this SortField within {@link Sort}. The comparator is primary if sortPos == 0, /// secondary if sortPos == 1, etc. Some comparators can optimize themselves when they are the primary sort. /// @return {@link FieldComparator} to use when sorting FieldComparatorPtr getComparator(int32_t numHits, int32_t sortPos); protected: /// Sets field and type, and ensures field is not NULL unless type is SCORE or DOC void initFieldType(const String& field, int32_t type); }; } #endif LucenePlusPlus-rel_3.0.4/include/SortedTermVectorMapper.h000066400000000000000000000046411217574114600235340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SORTEDTERMVECTORMAPPER_H #define SORTEDTERMVECTORMAPPER_H #include #include "TermVectorMapper.h" namespace Lucene { /// Store a sorted collection of {@link TermVectorEntry}s. Collects all term information into a single, /// sorted set. /// /// NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/ /// positions you will not know what Fields they correlate with. /// /// This is not thread-safe class LPPAPI SortedTermVectorMapper : public TermVectorMapper { public: /// @param comparator A Comparator for sorting {@link TermVectorEntry}s SortedTermVectorMapper(TermVectorEntryComparator comparator); SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator); virtual ~SortedTermVectorMapper(); LUCENE_CLASS(SortedTermVectorMapper); protected: Collection currentSet; MapStringTermVectorEntry termToTVE; bool storeOffsets; bool storePositions; TermVectorEntryComparator comparator; public: static const wchar_t* ALL; public: /// Map the Term Vector information into your own structure virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); /// The TermVectorEntrySet. A SortedSet of {@link TermVectorEntry} objects. Sort is by the comparator passed /// into the constructor. /// /// This set will be empty until after the mapping process takes place. /// /// @return The sorted set of {@link TermVectorEntry}. Collection getTermVectorEntrySet(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SortedVIntList.h000066400000000000000000000065621217574114600220150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SORTEDVINTLIST_H #define SORTEDVINTLIST_H #include "DocIdSet.h" namespace Lucene { /// Stores and iterate on sorted integers in compressed form in RAM. /// /// The code for compressing the differences between ascending integers was borrowed from {@link IndexInput} /// and {@link IndexOutput}. /// /// NOTE: this class assumes the stored integers are doc Ids (hence why it extends {@link DocIdSet}). Therefore /// its {@link #iterator()} assumes {@link DocIdSetIterator#NO_MORE_DOCS} can be used as sentinel. If you /// intend to use this value, then make sure it's not used during search flow. class LPPAPI SortedVIntList : public DocIdSet { public: /// Create a SortedVIntList from all elements of an array of integers. /// @param sortedInts A sorted array of non negative integers. SortedVIntList(Collection sortedInts); /// Create a SortedVIntList from an array of integers. /// @param sortedInts A sorted array of non negative integers. /// @param inputSize The number of integers to be used from the array. SortedVIntList(Collection sortedInts, int32_t inputSize); /// Create a SortedVIntList from a BitSet. /// @param bits A bit set representing a set of integers. SortedVIntList(BitSetPtr bits); /// Create a SortedVIntList from an OpenBitSet. /// @param bits A bit set representing a set of integers. SortedVIntList(OpenBitSetPtr bits); /// Create a SortedVIntList. /// @param docIdSetIterator An iterator providing document numbers as a set of integers. /// This DocIdSetIterator is iterated completely when this constructor is called and it must provide the /// integers in non decreasing order. SortedVIntList(DocIdSetIteratorPtr docIdSetIterator); virtual ~SortedVIntList(); LUCENE_CLASS(SortedVIntList); public: /// When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, a SortedVIntList representing the /// index numbers of the set bits will be smaller than that BitSet. static const int32_t BITS2VINTLIST_SIZE; protected: static const int32_t VB1; static const int32_t BIT_SHIFT; static const int32_t MAX_BYTES_PER_INT; int32_t _size; ByteArray bytes; int32_t lastBytePos; int32_t lastInt; public: /// @return The total number of sorted integers. int32_t size(); /// @return The size of the byte array storing the compressed sorted integers. int32_t getByteSize(); /// This DocIdSet implementation is cacheable. virtual bool isCacheable(); /// @return An iterator over the sorted integers. virtual DocIdSetIteratorPtr iterator(); protected: void initBytes(); void addInt(int32_t nextInt); friend class SortedDocIdSetIterator; }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanFilter.h000066400000000000000000000030461217574114600211610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANFILTER_H #define SPANFILTER_H #include "Filter.h" namespace Lucene { /// Abstract base class providing a mechanism to restrict searches to a subset of an index and also maintains /// and returns position information. /// /// This is useful if you want to compare the positions from a SpanQuery with the positions of items in a filter. /// For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents, and /// then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could /// then compare position information for post processing. class LPPAPI SpanFilter : public Filter { public: virtual ~SpanFilter(); LUCENE_CLASS(SpanFilter); public: /// Returns a SpanFilterResult with true for documents which should be permitted in search results, and /// false for those that should not and Spans for where the true docs match. /// @param reader The {@link IndexReader} to load position and DocIdSet information from /// @return A {@link SpanFilterResult} virtual SpanFilterResultPtr bitSpans(IndexReaderPtr reader) = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanFilterResult.h000066400000000000000000000043001217574114600223520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANFILTERRESULT_H #define SPANFILTERRESULT_H #include "LuceneObject.h" namespace Lucene { /// The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery class LPPAPI SpanFilterResult : public LuceneObject { public: /// @param docIdSet The DocIdSet for the Filter /// @param positions A List of {@link PositionInfo} objects SpanFilterResult(DocIdSetPtr docIdSet, Collection positions); virtual ~SpanFilterResult(); LUCENE_CLASS(SpanFilterResult); protected: DocIdSetPtr docIdSet; Collection positions; // Spans spans public: /// The first entry in the array corresponds to the first "on" bit. Entries are increasing by /// document order. /// @return A List of PositionInfo objects Collection getPositions(); /// Returns the docIdSet DocIdSetPtr getDocIdSet(); }; class LPPAPI PositionInfo : public LuceneObject { public: PositionInfo(int32_t doc); virtual ~PositionInfo(); LUCENE_CLASS(PositionInfo); protected: int32_t doc; Collection positions; public: void addPosition(int32_t start, int32_t end); int32_t getDoc(); Collection getPositions(); }; class LPPAPI StartEnd : public LuceneObject { public: StartEnd(int32_t start, int32_t end); virtual ~StartEnd(); LUCENE_CLASS(StartEnd); protected: int32_t start; int32_t end; public: /// @return The end position of this match int32_t getEnd(); /// @return The start position of this match int32_t getStart(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanFirstQuery.h000066400000000000000000000031631217574114600220510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANFIRSTQUERY_H #define SPANFIRSTQUERY_H #include "SpanQuery.h" #include "Spans.h" namespace Lucene { /// Matches spans near the beginning of a field. class LPPAPI SpanFirstQuery : public SpanQuery { public: /// Construct a SpanFirstQuery matching spans in match whose end position is less than or equal to end. SpanFirstQuery(SpanQueryPtr match, int32_t end); virtual ~SpanFirstQuery(); LUCENE_CLASS(SpanFirstQuery); protected: SpanQueryPtr match; int32_t end; public: using SpanQuery::toString; /// Return the SpanQuery whose matches are filtered. SpanQueryPtr getMatch(); /// Return the maximum end position permitted in a match. int32_t getEnd(); virtual String getField(); virtual String toString(const String& field); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual void extractTerms(SetTerm terms); virtual SpansPtr getSpans(IndexReaderPtr reader); virtual QueryPtr rewrite(IndexReaderPtr reader); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); friend class FirstSpans; }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanNearQuery.h000066400000000000000000000041331217574114600216450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANNEARQUERY_H #define SPANNEARQUERY_H #include "SpanQuery.h" namespace Lucene { /// Matches spans which are near one another. One can specify slop, the maximum number of intervening /// unmatched positions, as well as whether matches are required to be in-order. class LPPAPI SpanNearQuery : public SpanQuery { public: /// Construct a SpanNearQuery. Matches spans matching a span from each clause, with up to slop total /// unmatched positions between them. * When inOrder is true, the spans from each clause must be /// ordered as in clauses. SpanNearQuery(Collection clauses, int32_t slop, bool inOrder, bool collectPayloads = true); virtual ~SpanNearQuery(); LUCENE_CLASS(SpanNearQuery); protected: Collection clauses; int32_t slop; bool inOrder; String field; bool collectPayloads; public: using SpanQuery::toString; /// Return the clauses whose spans are matched. Collection getClauses(); /// Return the maximum number of intervening unmatched positions permitted. int32_t getSlop(); /// Return true if matches are required to be in-order. bool isInOrder(); virtual String getField(); virtual void extractTerms(SetTerm terms); virtual String toString(const String& field); virtual SpansPtr getSpans(IndexReaderPtr reader); virtual QueryPtr rewrite(IndexReaderPtr reader); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanNotQuery.h000066400000000000000000000031421217574114600215170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANNOTQUERY_H #define SPANNOTQUERY_H #include "SpanQuery.h" namespace Lucene { /// Removes matches which overlap with another SpanQuery. class LPPAPI SpanNotQuery : public SpanQuery { public: /// Construct a SpanNotQuery matching spans from include which have no overlap with spans from exclude. SpanNotQuery(SpanQueryPtr include, SpanQueryPtr exclude); virtual ~SpanNotQuery(); LUCENE_CLASS(SpanNotQuery); protected: SpanQueryPtr include; SpanQueryPtr exclude; public: using SpanQuery::toString; /// Return the SpanQuery whose matches are filtered. SpanQueryPtr getInclude(); /// Return the SpanQuery whose matches must not overlap those returned. SpanQueryPtr getExclude(); virtual String getField(); virtual void extractTerms(SetTerm terms); virtual String toString(const String& field); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual SpansPtr getSpans(IndexReaderPtr reader); virtual QueryPtr rewrite(IndexReaderPtr reader); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanOrQuery.h000066400000000000000000000027011217574114600213370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANORQUERY_H #define SPANORQUERY_H #include "SpanQuery.h" namespace Lucene { /// Matches the union of its clauses. class LPPAPI SpanOrQuery : public SpanQuery { public: /// Construct a SpanOrQuery merging the provided clauses. SpanOrQuery(Collection clauses); virtual ~SpanOrQuery(); LUCENE_CLASS(SpanOrQuery); protected: Collection clauses; String field; public: using SpanQuery::toString; /// Return the clauses whose spans are matched. Collection getClauses(); virtual String getField(); virtual void extractTerms(SetTerm terms); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual QueryPtr rewrite(IndexReaderPtr reader); virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual SpansPtr getSpans(IndexReaderPtr reader); friend class OrSpans; }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanQuery.h000066400000000000000000000017031217574114600210370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANQUERY_H #define SPANQUERY_H #include "Query.h" namespace Lucene { /// Base class for span-based queries. class LPPAPI SpanQuery : public Query { public: virtual ~SpanQuery(); LUCENE_CLASS(SpanQuery); public: /// Returns the matches for this query in an index. Used internally to search for spans. virtual SpansPtr getSpans(IndexReaderPtr reader) = 0; /// Returns the name of the field matched by this query. virtual String getField() = 0; virtual WeightPtr createWeight(SearcherPtr searcher); }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanQueryFilter.h000066400000000000000000000032721217574114600222100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANQUERYFILTER_H #define SPANQUERYFILTER_H #include "SpanFilter.h" namespace Lucene { /// Constrains search results to only match those which also match a provided query. Also provides position /// information about where each document matches at the cost of extra space compared with the /// QueryWrapperFilter. There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. /// Namely, the position information for each matching document is stored. /// /// This filter does not cache. See the {@link CachingSpanFilter} for a wrapper that caches. class LPPAPI SpanQueryFilter : public SpanFilter { public: /// Constructs a filter which only matches documents matching query. /// @param query The {@link SpanQuery} to use as the basis for the Filter. SpanQueryFilter(SpanQueryPtr query = SpanQueryPtr()); virtual ~SpanQueryFilter(); LUCENE_CLASS(SpanQueryFilter); protected: SpanQueryPtr query; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); virtual SpanFilterResultPtr bitSpans(IndexReaderPtr reader); SpanQueryPtr getQuery(); virtual String toString(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanScorer.h000066400000000000000000000026011217574114600211650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANSCORER_H #define SPANSCORER_H #include "Scorer.h" namespace Lucene { /// Public for extension only. class LPPAPI SpanScorer : public Scorer { public: SpanScorer(SpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms); virtual ~SpanScorer(); LUCENE_CLASS(SpanScorer); protected: SpansPtr spans; WeightPtr weight; ByteArray norms; double value; bool more; int32_t doc; double freq; public: virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual double score(); protected: virtual bool setFreqCurrentDoc(); /// This method is no longer an official member of {@link Scorer}, but it is needed by SpanWeight /// to build an explanation. virtual ExplanationPtr explain(int32_t doc); friend class SpanWeight; friend class PayloadNearSpanWeight; }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanTermQuery.h000066400000000000000000000024301217574114600216650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANTERMQUERY_H #define SPANTERMQUERY_H #include "SpanQuery.h" namespace Lucene { /// Matches spans containing a term. class LPPAPI SpanTermQuery : public SpanQuery { public: /// Construct a SpanTermQuery matching the named term's spans. SpanTermQuery(TermPtr term); virtual ~SpanTermQuery(); LUCENE_CLASS(SpanTermQuery); protected: TermPtr term; public: using SpanQuery::toString; /// Return the term whose spans are matched. TermPtr getTerm(); virtual String getField(); virtual void extractTerms(SetTerm terms); virtual String toString(const String& field); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual SpansPtr getSpans(IndexReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/SpanWeight.h000066400000000000000000000025511217574114600211630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANWEIGHT_H #define SPANWEIGHT_H #include "Weight.h" namespace Lucene { /// Public for use by other weight implementations class LPPAPI SpanWeight : public Weight { public: SpanWeight(SpanQueryPtr query, SearcherPtr searcher); virtual ~SpanWeight(); LUCENE_CLASS(SpanWeight); protected: SimilarityPtr similarity; double value; double idf; double queryNorm; double queryWeight; SetTerm terms; SpanQueryPtr query; IDFExplanationPtr idfExp; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); friend class PayloadNearSpanScorer; friend class PayloadTermSpanScorer; }; } #endif LucenePlusPlus-rel_3.0.4/include/Spans.h000066400000000000000000000063321217574114600201770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANS_H #define SPANS_H #include "LuceneObject.h" namespace Lucene { /// An enumeration of span matches. Used to implement span searching. Each span represents a range of term /// positions within a document. Matches are enumerated in order, by increasing document number, within that /// by increasing start position and finally by increasing end position. class LPPAPI Spans : public LuceneObject { public: virtual ~Spans(); LUCENE_CLASS(Spans); public: /// Move to the next match, returning true if any such exists. virtual bool next() = 0; /// Skips to the first match beyond the current, whose document number is greater than or equal to target. /// /// Returns true if there is such a match. /// /// Behaves as if written: ///
        /// bool skipTo(int32_t target)
        /// {
        ///     do
        ///     {
        ///         if (!next())
        ///             return false;
        ///     }
        ///     while (target > doc());
        ///     return true;
        /// }
        /// 
/// Most implementations are considerably more efficient than that. virtual bool skipTo(int32_t target) = 0; /// Returns the document number of the current match. Initially invalid. virtual int32_t doc() = 0; /// Returns the start position of the current match. Initially invalid. virtual int32_t start() = 0; /// Returns the end position of the current match. Initially invalid. virtual int32_t end() = 0; /// Returns the payload data for the current span. This is invalid until {@link #next()} is called for the /// first time. This method must not be called more than once after each call of {@link #next()}. However, /// most payloads are loaded lazily, so if the payload data for the current position is not needed, this /// method may not be called at all for performance reasons. An ordered SpanQuery does not lazy load, so /// if you have payloads in your index and you do not want ordered SpanNearQuerys to collect payloads, you /// can disable collection with a constructor option. /// /// Note that the return type is a collection, thus the ordering should not be relied upon. /// /// @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable /// is false virtual Collection getPayload() = 0; /// Checks if a payload can be loaded at this position. /// /// Payloads can only be loaded once per call to {@link #next()}. /// /// @return true if there is a payload available at this position that can be loaded virtual bool isPayloadAvailable() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/StandardAnalyzer.h000066400000000000000000000070001217574114600223520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STANDARDANALYZER_H #define STANDARDANALYZER_H #include "Analyzer.h" namespace Lucene { /// Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link LowerCaseFilter} and {@link StopFilter}, using /// a list of English stop words. /// /// You must specify the required {@link Version} compatibility when creating StandardAnalyzer: /// ///
    ///
  • As of 2.9, StopFilter preserves position increments ///
  • As of 2.4, Tokens incorrectly identified as acronyms are corrected ///
class LPPAPI StandardAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}). /// @param matchVersion Lucene version to match. StandardAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. /// @param matchVersion Lucene version to match. /// @param stopWords stop words StandardAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords); /// Builds an analyzer with the stop words from the given file. /// @see WordlistLoader#getWordSet(const String&, const String&) /// @param matchVersion Lucene version to match. /// @param stopwords File to read stop words from. StandardAnalyzer(LuceneVersion::Version matchVersion, const String& stopwords); /// Builds an analyzer with the stop words from the given reader. /// @see WordlistLoader#getWordSet(ReaderPtr, const String&) /// @param matchVersion Lucene version to match. /// @param stopwords Reader to read stop words from. StandardAnalyzer(LuceneVersion::Version matchVersion, ReaderPtr stopwords); virtual ~StandardAnalyzer(); LUCENE_CLASS(StandardAnalyzer); public: /// Default maximum allowed token length static const int32_t DEFAULT_MAX_TOKEN_LENGTH; protected: HashSet stopSet; /// Specifies whether deprecated acronyms should be replaced with HOST type. bool replaceInvalidAcronym; bool enableStopPositionIncrements; LuceneVersion::Version matchVersion; int32_t maxTokenLength; protected: /// Construct an analyzer with the given stop words. void ConstructAnalyser(LuceneVersion::Version matchVersion, HashSet stopWords); public: /// Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} /// and a {@link StopFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Set maximum allowed token length. If a token is seen that exceeds this length then it is discarded. This setting /// only takes effect the next time tokenStream or reusableTokenStream is called. void setMaxTokenLength(int32_t length); /// @see #setMaxTokenLength int32_t getMaxTokenLength(); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/StandardFilter.h000066400000000000000000000022441217574114600220170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STANDARDFILTER_H #define STANDARDFILTER_H #include "TokenFilter.h" namespace Lucene { /// Normalizes tokens extracted with {@link StandardTokenizer}. class LPPAPI StandardFilter : public TokenFilter { public: /// Construct filtering input. StandardFilter(TokenStreamPtr input); virtual ~StandardFilter(); LUCENE_CLASS(StandardFilter); protected: TypeAttributePtr typeAtt; TermAttributePtr termAtt; protected: static const String& APOSTROPHE_TYPE(); static const String& ACRONYM_TYPE(); public: /// Returns the next token in the stream, or null at EOS. /// /// Removes 's from the end of words. /// Removes dots from acronyms. virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/include/StandardTokenizer.h000066400000000000000000000103531217574114600225440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STANDARDTOKENIZER_H #define STANDARDTOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// A grammar-based tokenizer /// /// This should be a good tokenizer for most European-language documents: /// ///
    ///
  • Splits words at punctuation characters, removing punctuation. However, a dot that's not followed by /// whitespace is considered part of a token. ///
  • Splits words at hyphens, unless there's a number in the token, in which case the whole token is interpreted /// as a product number and is not split. ///
  • Recognizes email addresses and internet hostnames as one token. ///
/// /// Many applications have specific tokenizer needs. If this tokenizer does not suit your application, please consider /// copying this source code directory to your project and maintaining your own grammar-based tokenizer. /// /// You must specify the required {@link Version} compatibility when creating StandardAnalyzer: /// ///
    ///
  • As of 2.4, Tokens incorrectly identified as acronyms are corrected ///
class LPPAPI StandardTokenizer : public Tokenizer { public: /// Creates a new instance of the {@link StandardTokenizer}. Attaches the input to the newly created scanner. /// @param input The input reader StandardTokenizer(LuceneVersion::Version matchVersion, ReaderPtr input); /// Creates a new StandardTokenizer with a given {@link AttributeSource}. StandardTokenizer(LuceneVersion::Version matchVersion, AttributeSourcePtr source, ReaderPtr input); /// Creates a new StandardTokenizer with a given {@link AttributeSource.AttributeFactory} StandardTokenizer(LuceneVersion::Version matchVersion, AttributeFactoryPtr factory, ReaderPtr input); virtual ~StandardTokenizer(); LUCENE_CLASS(StandardTokenizer); protected: /// A private instance of the scanner StandardTokenizerImplPtr scanner; bool replaceInvalidAcronym; int32_t maxTokenLength; // this tokenizer generates three attributes: offset, positionIncrement and type TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; PositionIncrementAttributePtr posIncrAtt; TypeAttributePtr typeAtt; public: static const int32_t ALPHANUM; static const int32_t APOSTROPHE; static const int32_t ACRONYM; static const int32_t COMPANY; static const int32_t EMAIL; static const int32_t HOST; static const int32_t NUM; static const int32_t CJ; /// @deprecated this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. static const int32_t ACRONYM_DEP; /// String token types that correspond to token type int constants static const Collection TOKEN_TYPES(); protected: void init(ReaderPtr input, LuceneVersion::Version matchVersion); public: /// Set the max allowed token length. Any token longer than this is skipped. void setMaxTokenLength(int32_t length); /// @see #setMaxTokenLength int32_t getMaxTokenLength(); /// @see TokenStream#next() virtual bool incrementToken(); virtual void end(); virtual void reset(ReaderPtr input); /// @return true if StandardTokenizer now returns these tokens as Hosts, otherwise false /// @deprecated Remove in 3.X and make true the only valid value bool isReplaceInvalidAcronym(); /// @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST. /// @deprecated Remove in 3.X and make true the only valid value void setReplaceInvalidAcronym(bool replaceInvalidAcronym); }; } #endif LucenePlusPlus-rel_3.0.4/include/StandardTokenizerImpl.h000066400000000000000000000160341217574114600233700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STANDARDTOKENIZERIMPL_H #define STANDARDTOKENIZERIMPL_H #include "LuceneObject.h" namespace Lucene { class StandardTokenizerImpl : public LuceneObject { public: /// Creates a new scanner /// @param in the Reader to read input from. StandardTokenizerImpl(ReaderPtr in); virtual ~StandardTokenizerImpl(); LUCENE_CLASS(StandardTokenizerImpl); protected: /// Initial size of the lookahead buffer static const int32_t ZZ_BUFFERSIZE; /// Translates characters to character classes static const wchar_t ZZ_CMAP_PACKED[]; static const int32_t ZZ_CMAP_LENGTH; static const int32_t ZZ_CMAP_PACKED_LENGTH; /// Translates characters to character classes static const wchar_t* ZZ_CMAP(); /// Translates DFA states to action switch labels. static const wchar_t ZZ_ACTION_PACKED_0[]; static const int32_t ZZ_ACTION_LENGTH; static const int32_t ZZ_ACTION_PACKED_LENGTH; /// Translates DFA states to action switch labels. static const int32_t* ZZ_ACTION(); /// Translates a state to a row index in the transition table static const wchar_t ZZ_ROWMAP_PACKED_0[]; static const int32_t ZZ_ROWMAP_LENGTH; static const int32_t ZZ_ROWMAP_PACKED_LENGTH; /// Translates a state to a row index in the transition table static const int32_t* ZZ_ROWMAP(); /// The transition table of the DFA static const wchar_t ZZ_TRANS_PACKED_0[]; static const int32_t ZZ_TRANS_LENGTH; static const int32_t ZZ_TRANS_PACKED_LENGTH; /// The transition table of the DFA static const int32_t* ZZ_TRANS(); // error codes static const int32_t ZZ_UNKNOWN_ERROR; static const int32_t ZZ_NO_MATCH; static const int32_t ZZ_PUSHBACK_2BIG; static const wchar_t* ZZ_ERROR_MSG[]; /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState static const wchar_t ZZ_ATTRIBUTE_PACKED_0[]; static const int32_t ZZ_ATTRIBUTE_LENGTH; static const int32_t ZZ_ATTRIBUTE_PACKED_LENGTH; /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState static const int32_t* ZZ_ATTRIBUTE(); /// The input device ReaderPtr zzReader; /// The current state of the DFA int32_t zzState; /// The current lexical state int32_t zzLexicalState; /// This buffer contains the current text to be matched and is the source of the yytext() string CharArray zzBuffer; /// The text position at the last accepting state int32_t zzMarkedPos; /// The text position at the last state to be included in yytext int32_t zzPushbackPos; /// The current text position in the buffer int32_t zzCurrentPos; /// StartRead marks the beginning of the yytext() string in the buffer int32_t zzStartRead; /// EndRead marks the last character in the buffer, that has been read from input int32_t zzEndRead; /// Number of newlines encountered up to the start of the matched text int32_t yyline; /// The number of characters up to the start of the matched text int32_t _yychar; /// The number of characters from the last newline up to the start of the matched text int32_t yycolumn; /// zzAtBOL == true if the scanner is currently at the beginning of a line bool zzAtBOL; /// zzAtEOF == true if the scanner is at the EOF bool zzAtEOF; public: /// This character denotes the end of file static const int32_t YYEOF; /// Lexical states static const int32_t YYINITIAL; public: int32_t yychar(); /// Resets the Tokenizer to a new Reader. void reset(ReaderPtr r); /// Fills Lucene token with the current token text. void getText(TokenPtr t); /// Fills TermAttribute with the current token text. void getText(TermAttributePtr t); /// Closes the input stream. void yyclose(); /// Resets the scanner to read from a new input stream. Does not close the old reader. /// /// All internal variables are reset, the old input stream cannot be reused (internal buffer is discarded and lost). /// Lexical state is set to ZZ_INITIAL. /// /// @param reader the new input stream. void yyreset(ReaderPtr reader); /// Returns the current lexical state. int32_t yystate(); /// Enters a new lexical state /// @param newState the new lexical state. void yybegin(int32_t newState); /// Returns the text matched by the current regular expression. String yytext(); /// Returns the character at position pos from the matched text. /// /// It is equivalent to yytext()[pos], but faster /// @param pos the position of the character to fetch. A value from 0 to yylength() - 1. /// @return the character at position pos. wchar_t yycharat(int32_t pos); /// Returns the length of the matched text region. int32_t yylength(); /// Pushes the specified amount of characters back into the input stream. /// /// They will be read again by then next call of the scanning method /// @param number the number of characters to be read again. This number must not be greater than yylength() void yypushback(int32_t number); /// Resumes scanning until the next regular expression is matched, the end of input is encountered or an I/O- /// Error occurs. int32_t getNextToken(); protected: /// Refills the input buffer. bool zzRefill(); /// Reports an error that occurred while scanning. /// /// In a well-formed scanner (no or only correct usage of yypushback(int32_t) and a match-all fallback rule) /// this method will only be called with things that "Can't Possibly Happen". If this method is called, /// something is seriously wrong. /// /// Usual syntax/scanner level error handling should be done in error fallback rules. /// /// @param errorCode The code of the errormessage to display. void zzScanError(int32_t errorCode); }; } #endif LucenePlusPlus-rel_3.0.4/include/StopAnalyzer.h000066400000000000000000000037541217574114600215530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STOPANALYZER_H #define STOPANALYZER_H #include "Analyzer.h" namespace Lucene { /// Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. /// /// You must specify the required {@link Version} compatibility when creating StopAnalyzer: As of 2.9, position /// increments are preserved class LPPAPI StopAnalyzer : public Analyzer { public: /// Builds an analyzer which removes words in {@link #ENGLISH_STOP_WORDS_SET}. StopAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the stop words from the given set. StopAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords); /// Builds an analyzer with the stop words from the given file. StopAnalyzer(LuceneVersion::Version matchVersion, const String& stopwordsFile); /// Builds an analyzer with the stop words from the given reader. StopAnalyzer(LuceneVersion::Version matchVersion, ReaderPtr stopwords); virtual ~StopAnalyzer(); LUCENE_CLASS(StopAnalyzer); protected: HashSet stopWords; bool enablePositionIncrements; static const wchar_t* _ENGLISH_STOP_WORDS_SET[]; public: /// An unmodifiable set containing some common English words that are usually not useful for searching. static const HashSet ENGLISH_STOP_WORDS_SET(); virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/StopFilter.h000066400000000000000000000064201217574114600212040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STOPFILTER_H #define STOPFILTER_H #include "TokenFilter.h" namespace Lucene { /// Removes stop words from a token stream. class LPPAPI StopFilter : public TokenFilter { public: /// Construct a token stream filtering the given input. If stopWords is an instance of {@link CharArraySet} /// (true if makeStopSet() was used to construct the set) it will be directly used and ignoreCase will be /// ignored since CharArraySet directly controls case sensitivity. /// /// If stopWords is not an instance of {@link CharArraySet}, a new CharArraySet will be constructed and /// ignoreCase will be used to specify the case sensitivity of that set. /// /// @param enablePositionIncrements true if token positions should record the removed stop words /// @param input Input TokenStream /// @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords /// @param ignoreCase if true, all words are lower cased first StopFilter(bool enablePositionIncrements, TokenStreamPtr input, HashSet stopWords, bool ignoreCase = false); StopFilter(bool enablePositionIncrements, TokenStreamPtr input, CharArraySetPtr stopWords, bool ignoreCase = false); virtual ~StopFilter(); LUCENE_CLASS(StopFilter); protected: CharArraySetPtr stopWords; bool enablePositionIncrements; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; public: /// Builds a Set from an array of stop words, appropriate for passing into the StopFilter constructor. static HashSet makeStopSet(Collection stopWords); /// Returns the next input Token whose term() is not a stop word. virtual bool incrementToken(); /// Returns version-dependent default for enablePositionIncrements. Analyzers that embed StopFilter use this /// method when creating the StopFilter. Prior to 2.9, this returns false. On 2.9 or later, it returns true. static bool getEnablePositionIncrementsVersionDefault(LuceneVersion::Version matchVersion); /// @see #setEnablePositionIncrements(bool). bool getEnablePositionIncrements(); /// If true, this StopFilter will preserve positions of the incoming tokens (ie, accumulate and set position /// increments of the removed stop tokens). Generally, true is best as it does not lose information (positions /// of the original tokens) during indexing. /// /// When set, when a token is stopped (omitted), the position increment of the following token is incremented. /// /// NOTE: be sure to also set {@link QueryParser#setEnablePositionIncrements} if you use QueryParser to create queries. void setEnablePositionIncrements(bool enable); }; } #endif LucenePlusPlus-rel_3.0.4/include/StoredFieldsWriter.h000066400000000000000000000042341217574114600226760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STOREDFIELDSWRITER_H #define STOREDFIELDSWRITER_H #include "DocumentsWriter.h" namespace Lucene { /// This is a DocFieldConsumer that writes stored fields. class StoredFieldsWriter : public LuceneObject { public: StoredFieldsWriter(DocumentsWriterPtr docWriter, FieldInfosPtr fieldInfos); virtual ~StoredFieldsWriter(); LUCENE_CLASS(StoredFieldsWriter); public: FieldsWriterPtr fieldsWriter; DocumentsWriterWeakPtr _docWriter; FieldInfosPtr fieldInfos; int32_t lastDocID; Collection docFreeList; int32_t freeCount; int32_t allocCount; public: StoredFieldsWriterPerThreadPtr addThread(DocStatePtr docState); void flush(SegmentWriteStatePtr state); void closeDocStore(SegmentWriteStatePtr state); StoredFieldsWriterPerDocPtr getPerDoc(); void abort(); /// Fills in any hole in the docIDs void fill(int32_t docID); void finishDocument(StoredFieldsWriterPerDocPtr perDoc); bool freeRAM(); void free(StoredFieldsWriterPerDocPtr perDoc); protected: void initFieldsWriter(); }; class StoredFieldsWriterPerDoc : public DocWriter { public: StoredFieldsWriterPerDoc(StoredFieldsWriterPtr fieldsWriter); virtual ~StoredFieldsWriterPerDoc(); LUCENE_CLASS(StoredFieldsWriterPerDoc); protected: StoredFieldsWriterWeakPtr _fieldsWriter; public: PerDocBufferPtr buffer; RAMOutputStreamPtr fdt; int32_t numStoredFields; public: void reset(); virtual void abort(); virtual int64_t sizeInBytes(); virtual void finish(); }; } #endif LucenePlusPlus-rel_3.0.4/include/StoredFieldsWriterPerThread.h000066400000000000000000000021701217574114600244720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STOREDFIELDSWRITERPERTHREAD_H #define STOREDFIELDSWRITERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class StoredFieldsWriterPerThread : public LuceneObject { public: StoredFieldsWriterPerThread(DocStatePtr docState, StoredFieldsWriterPtr storedFieldsWriter); virtual ~StoredFieldsWriterPerThread(); LUCENE_CLASS(StoredFieldsWriterPerThread); public: FieldsWriterPtr localFieldsWriter; StoredFieldsWriterWeakPtr _storedFieldsWriter; DocStatePtr docState; StoredFieldsWriterPerDocPtr doc; public: void startDocument(); void addField(FieldablePtr field, FieldInfoPtr fieldInfo); DocWriterPtr finishDocument(); void abort(); }; } #endif LucenePlusPlus-rel_3.0.4/include/StringReader.h000066400000000000000000000025361217574114600215060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STRINGREADER_H #define STRINGREADER_H #include "Reader.h" namespace Lucene { /// Convenience class for reading strings. class LPPAPI StringReader : public Reader { public: /// Creates a new StringReader, given the String to read from. StringReader(const String& str); virtual ~StringReader(); LUCENE_CLASS(StringReader); protected: String str; int32_t position; public: /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); /// Close the stream. virtual void close(); /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Reset the stream. virtual void reset(); /// The number of bytes in the stream. virtual int64_t length(); }; } #endif LucenePlusPlus-rel_3.0.4/include/StringUtils.h000066400000000000000000000070031217574114600213760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STRINGUTILS_H #define STRINGUTILS_H #include "Lucene.h" namespace Lucene { class LPPAPI StringUtils { public: /// Maximum length of UTF encoding. static const int32_t MAX_ENCODING_UTF8_SIZE; /// Default character radix. static const int32_t CHARACTER_MAX_RADIX; public: /// Convert uft8 buffer into unicode. static int32_t toUnicode(const uint8_t* utf8, int32_t length, CharArray unicode); /// Convert uft8 buffer into unicode. static int32_t toUnicode(const uint8_t* utf8, int32_t length, UnicodeResultPtr unicodeResult); /// Convert uft8 buffer into unicode. static String toUnicode(const uint8_t* utf8, int32_t length); /// Convert uft8 string into unicode. static String toUnicode(const SingleString& s); /// Convert unicode buffer into uft8. static int32_t toUTF8(const wchar_t* unicode, int32_t length, ByteArray utf8); /// Convert unicode buffer into uft8. static int32_t toUTF8(const wchar_t* unicode, int32_t length, UTF8ResultPtr utf8Result); /// Convert unicode buffer into uft8. static SingleString toUTF8(const wchar_t* unicode, int32_t length); /// Convert unicode string into uft8. static SingleString toUTF8(const String& s); /// Convert given string to lower case using current locale static void toLower(String& str); /// Convert given string to lower case using current locale static String toLower(const String& str); /// Convert given string to upper case using current locale static void toUpper(String& str); /// Convert given string to upper case using current locale static String toUpper(const String& str); /// Compare two strings ignoring case differences static int32_t compareCase(const String& first, const String& second); /// Splits string using given delimiters static Collection split(const String& str, const String& delim); /// Convert the given string to int32_t. static int32_t toInt(const String& value); /// Convert the given string to int64_t. static int64_t toLong(const String& value); /// Return given value as a long integer using base unit. static int64_t toLong(const String& value, int32_t base); /// Convert the given string to double. static double toDouble(const String& value); /// Compute the hash code from string. static int32_t hashCode(const String& value); /// Return given value as a string using base unit. static String toString(int64_t value, int32_t base); /// Convert any given type to a {@link String}. template static String toString(const TYPE& value) { StringStream os; os << value; return os.str(); } }; #define UTF8_TO_STRING(utf8) StringUtils::toUnicode(utf8, SIZEOF_ARRAY(utf8)) } #endif LucenePlusPlus-rel_3.0.4/include/Synchronize.h000066400000000000000000000033141217574114600214230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SYNCHRONIZE_H #define SYNCHRONIZE_H #include #include "Lucene.h" namespace Lucene { /// Utility class to support locking via a mutex. class LPPAPI Synchronize { public: Synchronize(); virtual ~Synchronize(); protected: boost::recursive_timed_mutex mutexSynchronize; int64_t lockThread; int32_t recursionCount; public: /// create a new Synchronize instance atomically. static void createSync(SynchronizePtr& sync); /// Lock mutex using an optional timeout. void lock(int32_t timeout = 0); /// Unlock mutex. void unlock(); /// Unlock all recursive mutex. int32_t unlockAll(); /// Returns true if mutex is currently locked by current thread. bool holdsLock(); }; /// Utility class to support scope locking. class LPPAPI SyncLock { public: SyncLock(SynchronizePtr sync, int32_t timeout = 0); template SyncLock(OBJECT object, int32_t timeout = 0) { this->sync = object->getSync(); lock(timeout); } virtual ~SyncLock(); protected: SynchronizePtr sync; protected: void lock(int32_t timeout); }; } #endif LucenePlusPlus-rel_3.0.4/include/TeeSinkTokenFilter.h000066400000000000000000000141571217574114600226300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TEESINKTOKENFILTER_H #define TEESINKTOKENFILTER_H #include "TokenFilter.h" #include "TokenStream.h" namespace Lucene { /// This TokenFilter provides the ability to set aside attribute states that have already been analyzed. This is /// useful in situations where multiple fields share many common analysis steps and then go their separate ways. /// /// It is also useful for doing things like entity extraction or proper noun analysis as part of the analysis workflow /// and saving off those tokens for use in another field. /// ///
    /// TeeSinkTokenFilterPtr source1 = newLucene(newLucene(reader1));
    /// SinkTokenStreamPtr sink1 = source1->newSinkTokenStream();
    /// SinkTokenStreamPtr sink2 = source1->newSinkTokenStream();
    ///
    /// TeeSinkTokenFilterPtr source2 = newLucene(newLucene(reader2));
    /// source2->addSinkTokenStream(sink1);
    /// source2->addSinkTokenStream(sink2);
    ///
    /// TokenStreamPtr final1 = newLucene(source1);
    /// TokenStreamPtr final2 = source2;
    /// TokenStreamPtr final3 = newLucene(sink1);
    /// TokenStreamPtr final4 = newLucene(sink2);
    ///
    /// d->add(newLucene(L"f1", final1));
    /// d->add(newLucene(L"f2", final2));
    /// d->add(newLucene(L"f3", final3));
    /// d->add(newLucene(L"f4", final4));
    /// 
/// /// In this example, sink1 and sink2 will both get tokens from both reader1 and reader2 after whitespace tokenizer /// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. /// It is important, that tees are consumed before sinks (in the above example, the field names must be less the /// sink's field names). If you are not sure, which stream is consumed first, you can simply add another sink and /// then pass all tokens to the sinks at once using {@link #consumeAllTokens}. /// /// This TokenFilter is exhausted after this. In the above example, change the example above to: /// ///
    /// ...
    /// TokenStreamPtr final1 = newLucene(source1->newSinkTokenStream());
    /// TokenStreamPtr final2 = source2->newSinkTokenStream();
    /// sink1->consumeAllTokens();
    /// sink2->consumeAllTokens();
    /// ...
    /// 
/// /// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are /// ready. /// /// Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene. class LPPAPI TeeSinkTokenFilter : public TokenFilter { public: /// Instantiates a new TeeSinkTokenFilter. TeeSinkTokenFilter(TokenStreamPtr input); virtual ~TeeSinkTokenFilter(); LUCENE_CLASS(TeeSinkTokenFilter); protected: Collection sinks; public: /// Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream. SinkTokenStreamPtr newSinkTokenStream(); /// Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream that pass /// the supplied filter. /// @see SinkFilter SinkTokenStreamPtr newSinkTokenStream(SinkFilterPtr filter); /// Adds a {@link SinkTokenStream} created by another TeeSinkTokenFilter to this one. The supplied stream will /// also receive all consumed tokens. This method can be used to pass tokens from two different tees to one sink. void addSinkTokenStream(SinkTokenStreamPtr sink); /// TeeSinkTokenFilter passes all tokens to the added sinks when itself is consumed. To be sure, that all tokens /// from the input stream are passed to the sinks, you can call this methods. This instance is exhausted after this, /// but all sinks are instant available. void consumeAllTokens(); virtual bool incrementToken(); virtual void end(); }; class LPPAPI SinkFilter : public LuceneObject { public: virtual ~SinkFilter(); LUCENE_CLASS(SinkFilter); public: /// Returns true, if the current state of the passed-in {@link AttributeSource} shall be stored in the sink. virtual bool accept(AttributeSourcePtr source) = 0; /// Called by {@link SinkTokenStream#reset()}. This method does nothing by default and can optionally be overridden. virtual void reset(); }; class LPPAPI AcceptAllSinkFilter : public SinkFilter { public: virtual ~AcceptAllSinkFilter(); LUCENE_CLASS(AcceptAllSinkFilter); public: virtual bool accept(AttributeSourcePtr source); }; /// A filter that decides which {@link AttributeSource} states to store in the sink. class LPPAPI SinkTokenStream : public TokenStream { public: SinkTokenStream(AttributeSourcePtr source, SinkFilterPtr filter); virtual ~SinkTokenStream(); LUCENE_CLASS(SinkTokenStream); protected: Collection cachedStates; AttributeSourceStatePtr finalState; bool initIterator; Collection::iterator it; SinkFilterPtr filter; protected: bool accept(AttributeSourcePtr source); void addState(AttributeSourceStatePtr state); void setFinalState(AttributeSourceStatePtr finalState); public: virtual bool incrementToken(); virtual void end(); virtual void reset(); friend class TeeSinkTokenFilter; }; } #endif LucenePlusPlus-rel_3.0.4/include/Term.h000066400000000000000000000045401217574114600200210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERM_H #define TERM_H #include "LuceneObject.h" namespace Lucene { /// A Term represents a word from text. This is the unit of search. It is composed of two elements, /// the text of the word, as a string, and the name of the field that the text occurred in, an interned /// string. /// /// Note that terms may represent more than words from text fields, but also things like dates, email /// addresses, urls, etc. class LPPAPI Term : public LuceneObject { public: /// Constructs a Term with the given field and text. Term(const String& fld, const String& txt = EmptyString); virtual ~Term(); LUCENE_CLASS(Term); public: String _field; String _text; public: /// Returns the field of this term, an interned string. The field indicates the part of a document /// which this term came from. String field(); /// Returns the text of this term. In the case of words, this is simply the text of the word. In /// the case of dates and other types, this is an encoding of the object as a string. String text(); /// Optimized construction of new Terms by reusing same field as this Term /// @param text The text of the new term (field is implicitly same as this Term instance) /// @return A new Term TermPtr createTerm(const String& text); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); /// Compares two terms, returning a negative integer if this term belongs before the argument, zero /// if this term is equal to the argument, and a positive integer if this term belongs after the argument. /// /// The ordering of terms is first by field, then by text. virtual int32_t compareTo(LuceneObjectPtr other); void set(const String& fld, const String& txt); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermAttribute.h000066400000000000000000000100441217574114600217010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMATTRIBUTE_H #define TERMATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// The term text of a Token. class LPPAPI TermAttribute : public Attribute { public: TermAttribute(); virtual ~TermAttribute(); LUCENE_CLASS(TermAttribute); protected: static const int32_t MIN_BUFFER_SIZE; CharArray _termBuffer; int32_t _termLength; public: virtual String toString(); /// Returns the Token's term text. /// /// This method has a performance penalty because the text is stored internally in a char[]. If possible, /// use {@link #termBuffer()} and {@link #termLength()} directly instead. If you really need a String, use /// this method, which is nothing more than a convenience call to new String(token.termBuffer(), 0, /// token.termLength()) virtual String term(); /// Copies the contents of buffer, starting at offset for length characters, into the termBuffer array. /// @param buffer the buffer to copy /// @param offset the index in the buffer of the first character to copy /// @param length the number of characters to copy virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length); /// Copies the contents of buffer into the termBuffer array. /// @param buffer the buffer to copy virtual void setTermBuffer(const String& buffer); /// Returns the internal termBuffer character array which you can then directly alter. If the array is /// too small for your token, use {@link #resizeTermBuffer(int)} to increase it. After altering the buffer /// be sure to call {@link #setTermLength} to record the number of valid characters that were placed into /// the termBuffer. virtual CharArray termBuffer(); /// Optimized implementation of termBuffer. virtual wchar_t* termBufferArray(); /// Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next /// operation is to change the contents of the term buffer use {@link #setTermBuffer(char[], int, int)}, /// {@link #setTermBuffer(String)}, or {@link #setTermBuffer(String, int, int)} to optimally combine the /// resize with the setting of the termBuffer. /// @param newSize minimum size of the new termBuffer /// @return newly created termBuffer with length >= newSize virtual CharArray resizeTermBuffer(int32_t newSize); /// Return number of valid characters (length of the term) in the termBuffer array. virtual int32_t termLength(); /// Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the /// termBuffer or to synchronize with external manipulation of the termBuffer. Note: to grow the size of /// the array, use {@link #resizeTermBuffer(int)} first. /// @param length the truncated length virtual void setTermLength(int32_t length); virtual int32_t hashCode(); virtual void clear(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual bool equals(LuceneObjectPtr other); virtual void copyTo(AttributePtr target); protected: /// Allocates a buffer char[] of at least newSize, without preserving the existing content. Its always /// used in places that set the content. /// @param newSize minimum size of the buffer void growTermBuffer(int32_t newSize); void initTermBuffer(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermBuffer.h000066400000000000000000000027071217574114600211560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMBUFFER_H #define TERMBUFFER_H #include "LuceneObject.h" namespace Lucene { class TermBuffer : public LuceneObject { public: TermBuffer(); virtual ~TermBuffer(); LUCENE_CLASS(TermBuffer); protected: String field; TermPtr term; // cached bool preUTF8Strings; // true if strings are stored in modified UTF8 encoding UnicodeResultPtr text; UTF8ResultPtr bytes; public: virtual int32_t compareTo(LuceneObjectPtr other); /// Call this if the IndexInput passed to {@link #read} stores terms in the "modified UTF8" format. void setPreUTF8Strings(); void read(IndexInputPtr input, FieldInfosPtr fieldInfos); void set(TermPtr term); void set(TermBufferPtr other); void reset(); TermPtr toTerm(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); protected: int32_t compareChars(wchar_t* chars1, int32_t len1, wchar_t* chars2, int32_t len2); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermDocs.h000066400000000000000000000047071217574114600206370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMDOCS_H #define TERMDOCS_H #include "LuceneObject.h" namespace Lucene { /// TermDocs provides an interface for enumerating ; pairs for a term. The document /// portion names each document containing the term. Documents are indicated by number. The frequency /// portion gives the number of times the term occurred in each document. The pairs are ordered by document /// number. /// @see IndexReader#termDocs() class LPPAPI TermDocs { protected: TermDocs(); public: LUCENE_INTERFACE(TermDocs); public: /// Sets this to the data for a term. The enumeration is reset to the start of the data for this term. virtual void seek(TermPtr term) = 0; /// Sets this to the data for the current term in a {@link TermEnum}. /// This may be optimized in some implementations. virtual void seek(TermEnumPtr termEnum) = 0; /// Returns the current document number. This is invalid until {@link #next()} is called for the first time. virtual int32_t doc() = 0; /// Returns the frequency of the term within the current document. This is invalid until {@link #next()} is /// called for the first time. virtual int32_t freq() = 0; /// Moves to the next pair in the enumeration. Returns true if there is such a next pair in the enumeration. virtual bool next() = 0; /// Attempts to read multiple entries from the enumeration, up to length of docs. Document numbers are stored /// in docs, and term frequencies are stored in freqs. Returns the number of entries read. Zero is only /// returned when the stream has been exhausted. virtual int32_t read(Collection docs, Collection freqs) = 0; /// Skips entries to the first beyond the current whose document number is greater than or equal to target. /// Returns true if there is such an entry. virtual bool skipTo(int32_t target) = 0; /// Frees associated resources. virtual void close() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/TermEnum.h000066400000000000000000000023331217574114600206440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMENUM_H #define TERMENUM_H #include "LuceneObject.h" namespace Lucene { /// Abstract class for enumerating terms. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater /// than all that precede it. class LPPAPI TermEnum : public LuceneObject { public: virtual ~TermEnum(); LUCENE_CLASS(TermEnum); public: /// Increments the enumeration to the next element. True if one exists. virtual bool next() = 0; /// Returns the current Term in the enumeration. virtual TermPtr term() = 0; /// Returns the docFreq of the current Term in the enumeration. virtual int32_t docFreq() = 0; /// Closes the enumeration to further activity, freeing resources. virtual void close() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/TermFreqVector.h000066400000000000000000000050131217574114600220160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMFREQVECTOR_H #define TERMFREQVECTOR_H #include "LuceneObject.h" namespace Lucene { /// Provides access to stored term vector of a document field. The vector consists of the name of the field, an /// array of the terms that occur in the field of the {@link Document} and a parallel array of frequencies. Thus, /// getTermFrequencies()[5] corresponds with the frequency of getTerms()[5], assuming there are at least 5 terms /// in the Document. class LPPAPI TermFreqVector { protected: TermFreqVector(); public: virtual ~TermFreqVector(); LUCENE_INTERFACE(TermFreqVector); public: /// The {@link Fieldable} name. /// @return The name of the field this vector is associated with. virtual String getField(); /// @return The number of terms in the term vector. virtual int32_t size(); /// @return An Array of term texts in ascending order. virtual Collection getTerms(); /// Array of term frequencies. Locations of the array correspond one to one to the terms in the array obtained from /// getTerms method. Each location in the array contains the number of times this term occurs in the document or the /// document field. virtual Collection getTermFrequencies(); /// Return an index in the term numbers array returned from getTerms at which the term with the specified term appears. /// If this term does not appear in the array, return -1. virtual int32_t indexOf(const String& term); /// Just like indexOf(int) but searches for a number of terms at the same time. Returns an array that has the same size /// as the number of terms searched for, each slot containing the result of searching for that term number. /// /// @param terms array containing terms to look for /// @param start index in the array where the list of terms starts /// @param length the number of terms in the list virtual Collection indexesOf(Collection terms, int32_t start, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermInfo.h000066400000000000000000000020711217574114600206320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMINFO_H #define TERMINFO_H #include "LuceneObject.h" namespace Lucene { /// A TermInfo is the record of information stored for a term. class TermInfo : public LuceneObject { public: TermInfo(TermInfoPtr ti); TermInfo(int32_t df = 0, int64_t fp = 0, int64_t pp = 0); virtual ~TermInfo(); LUCENE_CLASS(TermInfo); public: /// The number of documents which contain the term. int32_t docFreq; int64_t freqPointer; int64_t proxPointer; int32_t skipOffset; public: void set(int32_t docFreq, int64_t freqPointer, int64_t proxPointer, int32_t skipOffset); void set(TermInfoPtr ti); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermInfosReader.h000066400000000000000000000056551217574114600221530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMINFOSREADER_H #define TERMINFOSREADER_H #include "CloseableThreadLocal.h" #include "SimpleLRUCache.h" namespace Lucene { /// This stores a monotonically increasing set of pairs in a Directory. Pairs are /// accessed either by Term or by ordinal position the set. class TermInfosReader : public LuceneObject { public: TermInfosReader(DirectoryPtr dir, const String& seg, FieldInfosPtr fis, int32_t readBufferSize, int32_t indexDivisor); virtual ~TermInfosReader(); LUCENE_CLASS(TermInfosReader); protected: DirectoryPtr directory; String segment; FieldInfosPtr fieldInfos; CloseableThreadLocal threadResources; SegmentTermEnumPtr origEnum; int64_t _size; Collection indexTerms; Collection indexInfos; Collection indexPointers; int32_t totalIndexInterval; static const int32_t DEFAULT_CACHE_SIZE; public: int32_t getSkipInterval(); int32_t getMaxSkipLevels(); void close(); /// Returns the number of term/value pairs in the set. int64_t size(); /// Returns the TermInfo for a Term in the set, or null. TermInfoPtr get(TermPtr term); /// Returns the position of a Term in the set or -1. int64_t getPosition(TermPtr term); /// Returns an enumeration of all the Terms and TermInfos in the set. SegmentTermEnumPtr terms(); /// Returns an enumeration of terms starting at or after the named term. SegmentTermEnumPtr terms(TermPtr term); protected: TermInfosReaderThreadResourcesPtr getThreadResources(); /// Returns the offset of the greatest index entry which is less than or equal to term. int32_t getIndexOffset(TermPtr term); void seekEnum(SegmentTermEnumPtr enumerator, int32_t indexOffset); /// Returns the TermInfo for a Term in the set, or null. TermInfoPtr get(TermPtr term, bool useCache); void ensureIndexIsRead(); }; class TermInfosReaderThreadResources : public LuceneObject { public: virtual ~TermInfosReaderThreadResources(); LUCENE_CLASS(TermInfosReaderThreadResources); public: SegmentTermEnumPtr termEnum; // Used for caching the least recently looked-up Terms TermInfoCachePtr termInfoCache; }; } #endif LucenePlusPlus-rel_3.0.4/include/TermInfosWriter.h000066400000000000000000000076351217574114600222250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMINFOSWRITER_H #define TERMINFOSWRITER_H #include "LuceneObject.h" namespace Lucene { /// This stores a monotonically increasing set of pairs in a Directory. A TermInfos /// can be written once, in order. class TermInfosWriter : public LuceneObject { public: TermInfosWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval); TermInfosWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval, bool isIndex); virtual ~TermInfosWriter(); LUCENE_CLASS(TermInfosWriter); public: /// The file format version, a negative number. static const int32_t FORMAT; /// Changed strings to true utf8 with length-in-bytes not length-in-chars. static const int32_t FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; /// NOTE: always change this if you switch to a new format. static const int32_t FORMAT_CURRENT; /// The fraction of terms in the "dictionary" which should be stored in RAM. Smaller values use more memory, but /// make searching slightly faster, while larger values use less memory and make searching slightly slower. /// Searching is typically not dominated by dictionary lookup, so tweaking this is rarely useful. int32_t indexInterval; /// The fraction of {@link TermDocs} entries stored in skip tables, used to accelerate {@link TermDocs#skipTo(int)}. /// Larger values result in smaller indexes, greater acceleration, but fewer accelerable cases, while smaller values /// result in bigger indexes, less acceleration and more accelerable cases. More detailed experiments would be useful /// here. int32_t skipInterval; /// The maximum number of skip levels. Smaller values result in slightly smaller indexes, but slower skipping /// in big posting lists. int32_t maxSkipLevels; protected: FieldInfosPtr fieldInfos; IndexOutputPtr output; TermInfoPtr lastTi; int64_t size; int64_t lastIndexPointer; bool isIndex; ByteArray lastTermBytes; int32_t lastTermBytesLength; int32_t lastFieldNumber; TermInfosWriterPtr otherWriter; TermInfosWriterWeakPtr _other; UTF8ResultPtr utf8Result; // Currently used only by assert statements UnicodeResultPtr unicodeResult1; UnicodeResultPtr unicodeResult2; public: virtual void initialize(); void add(TermPtr term, TermInfoPtr ti); /// Adds a new <, TermInfo> pair to the set. Term must be lexicographically /// greater than all previous Terms added. TermInfo pointers must be positive and greater than all previous. void add(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength, TermInfoPtr ti); /// Called to complete TermInfos creation. void close(); protected: void initialize(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval, bool isi); /// Currently used only by assert statements bool initUnicodeResults(); /// Currently used only by assert statement int32_t compareToLastTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength); void writeTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermPositionVector.h000066400000000000000000000031751217574114600227340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMPOSITIONVECTOR_H #define TERMPOSITIONVECTOR_H #include "TermFreqVector.h" namespace Lucene { /// Extends TermFreqVector to provide additional information about positions in which each of the terms is found. A TermPositionVector not necessarily /// contains both positions and offsets, but at least one of these arrays exists. class LPPAPI TermPositionVector : public TermFreqVector { protected: TermPositionVector(); public: virtual ~TermPositionVector(); LUCENE_INTERFACE(TermPositionVector); public: /// Returns an array of positions in which the term is found. Terms are identified by the index at which its number appears in the term String /// array obtained from the indexOf method. May return null if positions have not been stored. virtual Collection getTermPositions(int32_t index); /// Returns an array of TermVectorOffsetInfo in which the term is found. May return null if offsets have not been stored. /// @see Token /// @param index The position in the array to get the offsets from /// @return An array of TermVectorOffsetInfo objects or the empty list virtual Collection getOffsets(int32_t index); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermPositions.h000066400000000000000000000051271217574114600217330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMPOSITIONS_H #define TERMPOSITIONS_H #include "TermDocs.h" namespace Lucene { /// TermPositions provides an interface for enumerating the *> /// tuples for a term. The document and frequency are the same as for a TermDocs. The positions portion /// lists the ordinal positions of each occurrence of a term in a document. /// @see IndexReader#termPositions() class LPPAPI TermPositions : public TermDocs { protected: TermPositions(); public: virtual ~TermPositions(); LUCENE_INTERFACE(TermPositions); public: /// Returns next position in the current document. It is an error to call this more than {@link #freq()} /// times without calling {@link #next()}. This is invalid until {@link #next()} is called for // the first time. virtual int32_t nextPosition(); /// Returns the length of the payload at the current term position. This is invalid until {@link /// #nextPosition()} is called for the first time. /// @return length of the current payload in number of bytes virtual int32_t getPayloadLength(); /// Returns the payload data at the current term position. This is invalid until {@link #nextPosition()} /// is called for the first time. /// This method must not be called more than once after each call of {@link #nextPosition()}. However, /// payloads are loaded lazily, so if the payload data for the current position is not needed, /// this method may not be called at all for performance reasons. /// @param data the array into which the data of this payload is to be stored /// @param offset the offset in the array into which the data of this payload is to be stored. /// @return a byte array containing the data of this payload virtual ByteArray getPayload(ByteArray data, int32_t offset); /// Checks if a payload can be loaded at this position. /// Payloads can only be loaded once per call to {@link #nextPosition()}. /// @return true if there is a payload available at this position that can be loaded virtual bool isPayloadAvailable(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermQuery.h000066400000000000000000000026131217574114600210460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMQUERY_H #define TERMQUERY_H #include "Query.h" namespace Lucene { /// A Query that matches documents containing a term. This may be combined with other terms with a /// {@link BooleanQuery}. class LPPAPI TermQuery : public Query { public: /// Constructs a query for the term. TermQuery(TermPtr term); virtual ~TermQuery(); LUCENE_CLASS(TermQuery); protected: TermPtr term; public: using Query::toString; /// Returns the term of this query. TermPtr getTerm(); virtual WeightPtr createWeight(SearcherPtr searcher); virtual void extractTerms(SetTerm terms); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); friend class TermWeight; }; } #endif LucenePlusPlus-rel_3.0.4/include/TermRangeFilter.h000066400000000000000000000061101217574114600221370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMRANGEFILTER_H #define TERMRANGEFILTER_H #include "MultiTermQueryWrapperFilter.h" namespace Lucene { /// A Filter that restricts search results to a range of term values in a given field. /// /// This filter matches the documents looking for terms that fall into the supplied range according to {@link /// String#compare(String)}, unless a Collator is provided. It is not intended for numerical ranges; use {@link /// NumericRangeFilter} instead. /// /// If you construct a large number of range filters with different ranges but on the same field, {@link /// FieldCacheRangeFilter} may have significantly better performance. class LPPAPI TermRangeFilter : public MultiTermQueryWrapperFilter { public: /// Warning: Using this constructor and supplying a non-null value in the collator parameter will cause /// every single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending /// on the number of index Terms in this Field, the operation could be very slow. /// @param lowerTerm The lower bound on this range /// @param upperTerm The upper bound on this range /// @param includeLower Does this range include the lower bound? /// @param includeUpper Does this range include the upper bound? /// @param collator The collator to use when determining range inclusion; set to null to use Unicode code /// point ordering instead of collation. TermRangeFilter(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, bool includeUpper, CollatorPtr collator = CollatorPtr()); virtual ~TermRangeFilter(); LUCENE_CLASS(TermRangeFilter); public: /// Constructs a filter for field fieldName matching less than or equal to upperTerm. static TermRangeFilterPtr Less(const String& fieldName, StringValue upperTerm); /// Constructs a filter for field fieldName matching greater than or equal to lowerTerm. static TermRangeFilterPtr More(const String& fieldName, StringValue lowerTerm); /// Returns the field name for this filter String getField(); /// Returns the lower value of this range filter String getLowerTerm(); /// Returns the upper value of this range filter String getUpperTerm(); /// Returns true if the lower endpoint is inclusive bool includesLower(); /// Returns true if the upper endpoint is inclusive bool includesUpper(); /// Returns the collator used to determine range inclusion, if any. CollatorPtr getCollator(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermRangeQuery.h000066400000000000000000000073721217574114600220320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMRANGEQUERY_H #define TERMRANGEQUERY_H #include "MultiTermQuery.h" namespace Lucene { /// A Query that matches documents within an range of terms. /// /// This query matches the documents looking for terms that fall into the supplied range according to {@link /// String#compare(String)}, unless a Collator is provided. It is not intended for numerical ranges; use {@link /// NumericRangeQuery} instead. /// /// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. class LPPAPI TermRangeQuery : public MultiTermQuery { public: /// Constructs a query selecting all terms greater/equal than lowerTerm but less/equal than upperTerm. /// /// If an endpoint is null, it is said to be "open". Either or both endpoints may be open. Open endpoints /// may not be exclusive (you can't select all but the first or last term without explicitly specifying the /// term to exclude.) /// /// If collator is not null, it will be used to decide whether index terms are within the given range, rather /// than using the Unicode code point order in which index terms are stored. /// /// Warning: Using this constructor and supplying a non-null value in the collator parameter will cause every /// single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending on the /// number of index Terms in this Field, the operation could be very slow. /// /// @param lowerTerm The Term text at the lower end of the range /// @param upperTerm The Term text at the upper end of the range /// @param includeLower If true, the lowerTerm is included in the range. /// @param includeUpper If true, the upperTerm is included in the range. /// @param collator The collator to use to collate index Terms, to determine their membership in the range /// bounded by lowerTerm and upperTerm. TermRangeQuery(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, bool includeUpper, CollatorPtr collator = CollatorPtr()); virtual ~TermRangeQuery(); LUCENE_CLASS(TermRangeQuery); protected: StringValue lowerTerm; StringValue upperTerm; CollatorPtr collator; String field; bool includeLower; bool includeUpper; public: using MultiTermQuery::toString; /// Returns the field name for this query String getField(); /// Returns the lower value of this range query String getLowerTerm(); /// Returns the upper value of this range query String getUpperTerm(); /// Returns true if the lower endpoint is inclusive bool includesLower(); /// Returns true if the upper endpoint is inclusive bool includesUpper(); /// Returns the collator used to determine range inclusion, if any. CollatorPtr getCollator(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual String toString(const String& field); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); protected: virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermRangeTermEnum.h000066400000000000000000000046151217574114600224560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMRANGETERMENUM_H #define TERMRANGETERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that match the specified range parameters. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than /// all that precede it. class LPPAPI TermRangeTermEnum : public FilteredTermEnum { public: /// Enumerates all terms greater/equal than lowerTerm but less/equal than upperTerm. /// /// If an endpoint is null, it is said to be "open". Either or both endpoints may be open. Open endpoints /// may not be exclusive (you can't select all but the first or last term without explicitly specifying /// the term to exclude.) /// /// @param reader /// @param field An interned field that holds both lower and upper terms. /// @param lowerTermText The term text at the lower end of the range /// @param upperTermText The term text at the upper end of the range /// @param includeLower If true, the lowerTerm is included in the range. /// @param includeUpper If true, the upperTerm is included in the range. /// @param collator The collator to use to collate index Terms, to determine their membership in the range /// bounded by lowerTerm and upperTerm. TermRangeTermEnum(IndexReaderPtr reader, const String& field, StringValue lowerTermText, StringValue upperTermText, bool includeLower, bool includeUpper, CollatorPtr collator); virtual ~TermRangeTermEnum(); LUCENE_CLASS(TermRangeTermEnum); protected: CollatorPtr collator; bool _endEnum; String field; StringValue upperTermText; StringValue lowerTermText; bool includeLower; bool includeUpper; public: virtual double difference(); protected: virtual bool endEnum(); virtual bool termCompare(TermPtr term); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermScorer.h000066400000000000000000000051071217574114600211770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSCORER_H #define TERMSCORER_H #include "Scorer.h" namespace Lucene { /// A Scorer for documents matching a Term. class TermScorer : public Scorer { public: /// Construct a TermScorer. /// @param weight The weight of the Term in the query. /// @param td An iterator over the documents matching the Term. /// @param similarity The Similarity implementation to be used for score computations. /// @param norms The field norms of the document fields for the Term. TermScorer(WeightPtr weight, TermDocsPtr td, SimilarityPtr similarity, ByteArray norms); virtual ~TermScorer(); LUCENE_CLASS(TermScorer); protected: WeightPtr weight; TermDocsPtr termDocs; ByteArray norms; double weightValue; int32_t doc; Collection docs; // buffered doc numbers Collection freqs; // buffered term freqs int32_t pointer; int32_t pointerMax; static const int32_t SCORE_CACHE_SIZE; Collection scoreCache; public: virtual void score(CollectorPtr collector); virtual int32_t docID(); /// Advances to the next document matching the query. /// The iterator over the matching documents is buffered using {@link /// TermDocs#read(Collection, Collection)}. /// @return the document matching the query or -1 if there are no more documents. virtual int32_t nextDoc(); virtual double score(); /// Advances to the first match beyond the current whose document number is greater than or equal to a /// given target. The implementation uses {@link TermDocs#skipTo(int32_t)}. /// @param target The target document number. /// @return the matching document or -1 if none exist. virtual int32_t advance(int32_t target); /// Returns a string representation of this TermScorer. virtual String toString(); protected: static const Collection SIM_NORM_DECODER(); virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermSpans.h000066400000000000000000000022601217574114600210230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSPANS_H #define TERMSPANS_H #include "Spans.h" namespace Lucene { /// Public for extension only class LPPAPI TermSpans : public Spans { public: TermSpans(TermPositionsPtr positions, TermPtr term); virtual ~TermSpans(); LUCENE_CLASS(TermSpans); protected: TermPositionsPtr positions; TermPtr term; int32_t _doc; int32_t freq; int32_t count; int32_t position; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); TermPositionsPtr getPositions(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermVectorEntry.h000066400000000000000000000032311217574114600222220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORENTRY_H #define TERMVECTORENTRY_H #include "LuceneObject.h" namespace Lucene { /// Convenience class for holding TermVector information. class LPPAPI TermVectorEntry : public LuceneObject { public: TermVectorEntry(const String& field = EmptyString, const String& term = EmptyString, int32_t frequency = 0, Collection offsets = Collection(), Collection positions = Collection()); virtual ~TermVectorEntry(); LUCENE_CLASS(TermVectorEntry); protected: String field; String term; int32_t frequency; Collection offsets; Collection positions; public: String getField(); int32_t getFrequency(); Collection getOffsets(); Collection getPositions(); String getTerm(); void setFrequency(int32_t frequency); void setOffsets(Collection offsets); void setPositions(Collection positions); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermVectorEntryFreqSortedComparator.h000066400000000000000000000016441217574114600262570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORENTRYFREQSORTEDCOMPARATOR_H #define TERMVECTORENTRYFREQSORTEDCOMPARATOR_H #include "LuceneObject.h" namespace Lucene { /// Compares {@link TermVectorEntry}s first by frequency and then by the term (case-sensitive) class LPPAPI TermVectorEntryFreqSortedComparator : public LuceneObject { public: virtual ~TermVectorEntryFreqSortedComparator(); LUCENE_CLASS(TermVectorEntryFreqSortedComparator); public: static bool compare(const TermVectorEntryPtr& first, const TermVectorEntryPtr& second); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermVectorMapper.h000066400000000000000000000072061217574114600223530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORMAPPER_H #define TERMVECTORMAPPER_H #include "LuceneObject.h" namespace Lucene { /// The TermVectorMapper can be used to map Term Vectors into your own structure instead of the parallel /// array structure used by {@link IndexReader#getTermFreqVector(int,String)}. /// /// It is up to the implementation to make sure it is thread-safe. class LPPAPI TermVectorMapper : public LuceneObject { public: /// @param ignoringPositions true if this mapper should tell Lucene to ignore positions even if /// they are stored. /// @param ignoringOffsets similar to ignoringPositions TermVectorMapper(bool ignoringPositions = false, bool ignoringOffsets = false); virtual ~TermVectorMapper(); LUCENE_CLASS(TermVectorMapper); protected: bool ignoringPositions; bool ignoringOffsets; public: /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. /// This method will be called once before retrieving the vector for a field. /// /// This method will be called before {@link #map(String,int,TermVectorOffsetInfo[],int[])}. /// @param field The field the vector is for /// @param numTerms The number of terms that need to be mapped /// @param storeOffsets true if the mapper should expect offset information /// @param storePositions true if the mapper should expect positions info virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) = 0; /// Map the Term Vector information into your own structure /// @param term The term to add to the vector /// @param frequency The frequency of the term in the document /// @param offsets null if the offset is not specified, otherwise the offset into the field of the term /// @param positions null if the position is not specified, otherwise the position in the field of the term virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions) = 0; /// Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and /// they can be skipped over. Derived classes should set this to true if they want to ignore positions. /// The default is false, meaning positions will be loaded if they are stored. virtual bool isIgnoringPositions(); /// @see #isIgnoringPositions() Same principal as {@link #isIgnoringPositions()}, but applied to offsets. virtual bool isIgnoringOffsets(); /// Passes down the index of the document whose term vector is currently being mapped, once for each top /// level call to a term vector reader. /// /// Default implementation IGNORES the document number. Override if your implementation needs the document /// number. /// /// NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations. /// /// @param documentNumber index of document currently being mapped virtual void setDocumentNumber(int32_t documentNumber); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermVectorOffsetInfo.h000066400000000000000000000036131217574114600231670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTOROFFSETINFO_H #define TERMVECTOROFFSETINFO_H #include "LuceneObject.h" namespace Lucene { /// The TermVectorOffsetInfo class holds information pertaining to a Term in a {@link TermPositionVector}'s /// offset information. This offset information is the character offset as set during the Analysis phase /// (and thus may not be the actual offset in the original content). class LPPAPI TermVectorOffsetInfo : public LuceneObject { public: TermVectorOffsetInfo(int32_t startOffset = 0, int32_t endOffset = 0); virtual ~TermVectorOffsetInfo(); LUCENE_CLASS(TermVectorOffsetInfo); protected: int32_t startOffset; int32_t endOffset; public: /// Convenience declaration when creating a {@link TermPositionVector} that stores only position information. static const Collection EMPTY_OFFSET_INFO(); /// The accessor for the ending offset for the term int32_t getEndOffset(); void setEndOffset(int32_t endOffset); /// The accessor for the starting offset of the term. int32_t getStartOffset(); void setStartOffset(int32_t startOffset); /// Two TermVectorOffsetInfos are equals if both the start and end offsets are the same. /// @return true if both {@link #getStartOffset()} and {@link #getEndOffset()} are the same for both objects. virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermVectorsReader.h000066400000000000000000000144021217574114600225100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSREADER_H #define TERMVECTORSREADER_H #include "TermVectorMapper.h" namespace Lucene { class TermVectorsReader : public LuceneObject { public: TermVectorsReader(); TermVectorsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos); TermVectorsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos, int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0); virtual ~TermVectorsReader(); LUCENE_CLASS(TermVectorsReader); public: /// NOTE: if you make a new format, it must be larger than the current format static const int32_t FORMAT_VERSION; /// Changes to speed up bulk merging of term vectors static const int32_t FORMAT_VERSION2; /// Changed strings to UTF8 with length-in-bytes not length-in-chars static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES; /// NOTE: always change this if you switch to a new format. static const int32_t FORMAT_CURRENT; /// The size in bytes that the FORMAT_VERSION will take up at the beginning of each file static const int32_t FORMAT_SIZE; static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR; static const uint8_t STORE_OFFSET_WITH_TERMVECTOR; protected: FieldInfosPtr fieldInfos; IndexInputPtr tvx; IndexInputPtr tvd; IndexInputPtr tvf; int32_t _size; int32_t numTotalDocs; /// The docID offset where our docs begin in the index file. This will be 0 if we have our own private file. int32_t docStoreOffset; int32_t format; public: /// Used for bulk copy when merging IndexInputPtr getTvdStream(); /// Used for bulk copy when merging IndexInputPtr getTvfStream(); bool canReadRawDocs(); /// Retrieve the length (in bytes) of the tvd and tvf entries for the next numDocs starting with /// startDocID. This is used for bulk copying when merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams are seeked to the startDocID. void rawDocs(Collection tvdLengths, Collection tvfLengths, int32_t startDocID, int32_t numDocs); void close(); /// @return The number of documents in the reader int32_t size(); void get(int32_t docNum, const String& field, TermVectorMapperPtr mapper); /// Retrieve the term vector for the given document and field /// @param docNum The document number to retrieve the vector for /// @param field The field within the document to retrieve /// @return The TermFreqVector for the document and field or null if there is no termVector for /// this field. TermFreqVectorPtr get(int32_t docNum, const String& field); /// Return all term vectors stored for this document or null if the could not be read in. /// /// @param docNum The document number to retrieve the vector for /// @return All term frequency vectors Collection get(int32_t docNum); void get(int32_t docNumber, TermVectorMapperPtr mapper); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); protected: void ConstructReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size); void seekTvx(int32_t docNum); int32_t checkValidFormat(IndexInputPtr in); /// Reads the String[] fields; you have to pre-seek tvd to the right point Collection readFields(int32_t fieldCount); /// Reads the long[] offsets into TVF; you have to pre-seek tvx/tvd to the right point Collection readTvfPointers(int32_t fieldCount); Collection readTermVectors(int32_t docNum, Collection fields, Collection tvfPointers); void readTermVectors(Collection fields, Collection tvfPointers, TermVectorMapperPtr mapper); /// @param field The field to read in /// @param tvfPointer The pointer within the tvf file where we should start reading /// @param mapper The mapper used to map the TermVector void readTermVector(const String& field, int64_t tvfPointer, TermVectorMapperPtr mapper); }; /// Models the existing parallel array structure class ParallelArrayTermVectorMapper : public TermVectorMapper { public: ParallelArrayTermVectorMapper(); virtual ~ParallelArrayTermVectorMapper(); LUCENE_CLASS(ParallelArrayTermVectorMapper); protected: Collection terms; Collection termFreqs; Collection< Collection > positions; Collection< Collection > offsets; int32_t currentPosition; bool storingOffsets; bool storingPositions; String field; public: /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. /// This method will be called once before retrieving the vector for a field. virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); /// Map the Term Vector information into your own structure virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); /// Construct the vector /// @return The {@link TermFreqVector} based on the mappings. TermFreqVectorPtr materializeVector(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermVectorsTermsWriter.h000066400000000000000000000063361217574114600236040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSTERMSWRITER_H #define TERMVECTORSTERMSWRITER_H #include "TermsHashConsumer.h" #include "DocumentsWriter.h" #include "RawPostingList.h" namespace Lucene { class TermVectorsTermsWriter : public TermsHashConsumer { public: TermVectorsTermsWriter(DocumentsWriterPtr docWriter); virtual ~TermVectorsTermsWriter(); LUCENE_CLASS(TermVectorsTermsWriter); public: DocumentsWriterWeakPtr _docWriter; TermVectorsWriterPtr termVectorsWriter; Collection docFreeList; int32_t freeCount; IndexOutputPtr tvx; IndexOutputPtr tvd; IndexOutputPtr tvf; int32_t lastDocID; int32_t allocCount; public: virtual TermsHashConsumerPerThreadPtr addThread(TermsHashPerThreadPtr perThread); virtual void createPostings(Collection postings, int32_t start, int32_t count); virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state); virtual void closeDocStore(SegmentWriteStatePtr state); TermVectorsTermsWriterPerDocPtr getPerDoc(); /// Fills in no-term-vectors for all docs we haven't seen since the last doc that had term vectors. void fill(int32_t docID); void initTermVectorsWriter(); void finishDocument(TermVectorsTermsWriterPerDocPtr perDoc); bool freeRAM(); void free(TermVectorsTermsWriterPerDocPtr doc); virtual void abort(); virtual int32_t bytesPerPosting(); }; class TermVectorsTermsWriterPerDoc : public DocWriter { public: TermVectorsTermsWriterPerDoc(TermVectorsTermsWriterPtr termsWriter = TermVectorsTermsWriterPtr()); virtual ~TermVectorsTermsWriterPerDoc(); LUCENE_CLASS(TermVectorsTermsWriterPerDoc); protected: TermVectorsTermsWriterWeakPtr _termsWriter; public: PerDocBufferPtr buffer; RAMOutputStreamPtr perDocTvf; int32_t numVectorFields; Collection fieldNumbers; Collection fieldPointers; public: void reset(); virtual void abort(); void addField(int32_t fieldNumber); virtual int64_t sizeInBytes(); virtual void finish(); }; class TermVectorsTermsWriterPostingList : public RawPostingList { public: TermVectorsTermsWriterPostingList(); virtual ~TermVectorsTermsWriterPostingList(); LUCENE_CLASS(TermVectorsTermsWriterPostingList); public: int32_t freq; // How many times this term occurred in the current doc int32_t lastOffset; // Last offset we saw int32_t lastPosition; // Last position where this term occurred }; } #endif LucenePlusPlus-rel_3.0.4/include/TermVectorsTermsWriterPerField.h000066400000000000000000000037051217574114600252140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSTERMSWRITERPERFIELD_H #define TERMVECTORSTERMSWRITERPERFIELD_H #include "TermsHashConsumerPerField.h" namespace Lucene { class TermVectorsTermsWriterPerField : public TermsHashConsumerPerField { public: TermVectorsTermsWriterPerField(TermsHashPerFieldPtr termsHashPerField, TermVectorsTermsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo); virtual ~TermVectorsTermsWriterPerField(); LUCENE_CLASS(TermVectorsTermsWriterPerField); public: TermVectorsTermsWriterPerThreadWeakPtr _perThread; TermsHashPerFieldWeakPtr _termsHashPerField; TermVectorsTermsWriterWeakPtr _termsWriter; FieldInfoPtr fieldInfo; DocStateWeakPtr _docState; FieldInvertStateWeakPtr _fieldState; bool doVectors; bool doVectorPositions; bool doVectorOffsets; int32_t maxNumPostings; OffsetAttributePtr offsetAttribute; public: virtual int32_t getStreamCount(); virtual bool start(Collection fields, int32_t count); virtual void abort(); /// Called once per field per document if term vectors are enabled, to write the vectors to RAMOutputStream, /// which is then quickly flushed to the real term vectors files in the Directory. virtual void finish(); void shrinkHash(); virtual void start(FieldablePtr field); virtual void newTerm(RawPostingListPtr p0); virtual void addTerm(RawPostingListPtr p0); virtual void skippingLongTerm(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermVectorsTermsWriterPerThread.h000066400000000000000000000030431217574114600253730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSTERMSWRITERPERTHREAD_H #define TERMVECTORSTERMSWRITERPERTHREAD_H #include "TermsHashConsumerPerThread.h" namespace Lucene { class TermVectorsTermsWriterPerThread : public TermsHashConsumerPerThread { public: TermVectorsTermsWriterPerThread(TermsHashPerThreadPtr termsHashPerThread, TermVectorsTermsWriterPtr termsWriter); virtual ~TermVectorsTermsWriterPerThread(); LUCENE_CLASS(TermVectorsTermsWriterPerThread); public: TermVectorsTermsWriterWeakPtr _termsWriter; TermsHashPerThreadWeakPtr _termsHashPerThread; DocStateWeakPtr _docState; TermVectorsTermsWriterPerDocPtr doc; ByteSliceReaderPtr vectorSliceReader; Collection utf8Results; String lastVectorFieldName; public: virtual void startDocument(); virtual DocWriterPtr finishDocument(); virtual TermsHashConsumerPerFieldPtr addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo); virtual void abort(); /// Called only by assert bool clearLastVectorFieldName(); bool vectorFieldsInOrder(FieldInfoPtr fi); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermVectorsWriter.h000066400000000000000000000027321217574114600225650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSWRITER_H #define TERMVECTORSWRITER_H #include "LuceneObject.h" namespace Lucene { class TermVectorsWriter : public LuceneObject { public: TermVectorsWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fieldInfos); virtual ~TermVectorsWriter(); LUCENE_CLASS(TermVectorsWriter); protected: IndexOutputPtr tvx; IndexOutputPtr tvd; IndexOutputPtr tvf; FieldInfosPtr fieldInfos; Collection utf8Results; public: /// Add a complete document specified by all its term vectors. If document has no term vectors, /// add value for tvx. void addAllDocVectors(Collection vectors); /// Do a bulk copy of numDocs documents from reader to our streams. This is used to expedite merging, /// if the field numbers are congruent. void addRawDocuments(TermVectorsReaderPtr reader, Collection tvdLengths, Collection tvfLengths, int32_t numDocs); /// Close all streams. void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermsHash.h000066400000000000000000000054151217574114600210120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASH_H #define TERMSHASH_H #include "InvertedDocConsumer.h" namespace Lucene { /// This class implements {@link InvertedDocConsumer}, which is passed each token produced by the analyzer on /// each field. It stores these tokens in a hash table, and allocates separate byte streams per token. Consumers /// of this class, eg {@link FreqProxTermsWriter} and {@link TermVectorsTermsWriter}, write their own byte streams /// under each term. class TermsHash : public InvertedDocConsumer { public: TermsHash(DocumentsWriterPtr docWriter, bool trackAllocations, TermsHashConsumerPtr consumer, TermsHashPtr nextTermsHash); virtual ~TermsHash(); LUCENE_CLASS(TermsHash); public: TermsHashConsumerPtr consumer; TermsHashPtr nextTermsHash; int32_t bytesPerPosting; int32_t postingsFreeChunk; DocumentsWriterWeakPtr _docWriter; bool trackAllocations; protected: Collection postingsFreeList; int32_t postingsFreeCount; int32_t postingsAllocCount; public: /// Add a new thread virtual InvertedDocConsumerPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread); virtual TermsHashPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread, TermsHashPerThreadPtr primaryPerThread); virtual void setFieldInfos(FieldInfosPtr fieldInfos); /// Abort (called after hitting AbortException) /// NOTE: do not make this sync'd; it's not necessary (DW ensures all other threads are idle), and it /// leads to deadlock virtual void abort(); void shrinkFreePostings(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state); /// Close doc stores virtual void closeDocStore(SegmentWriteStatePtr state); /// Flush a new segment virtual void flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state); /// Attempt to free RAM, returning true if any RAM was freed virtual bool freeRAM(); void recyclePostings(Collection postings, int32_t numPostings); void getPostings(Collection postings); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermsHashConsumer.h000066400000000000000000000023431217574114600225230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHCONSUMER_H #define TERMSHASHCONSUMER_H #include "LuceneObject.h" namespace Lucene { class TermsHashConsumer : public LuceneObject { public: virtual ~TermsHashConsumer(); LUCENE_CLASS(TermsHashConsumer); public: FieldInfosPtr fieldInfos; public: virtual int32_t bytesPerPosting() = 0; virtual void createPostings(Collection postings, int32_t start, int32_t count) = 0; virtual TermsHashConsumerPerThreadPtr addThread(TermsHashPerThreadPtr perThread) = 0; virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state) = 0; virtual void abort() = 0; virtual void closeDocStore(SegmentWriteStatePtr state) = 0; virtual void setFieldInfos(FieldInfosPtr fieldInfos); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermsHashConsumerPerField.h000066400000000000000000000023361217574114600241400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHCONSUMERPERFIELD_H #define TERMSHASHCONSUMERPERFIELD_H #include "LuceneObject.h" namespace Lucene { /// Implement this class to plug into the TermsHash processor, which inverts & stores Tokens into a hash /// table and provides an API for writing bytes into multiple streams for each unique Token. class TermsHashConsumerPerField : public LuceneObject { public: virtual ~TermsHashConsumerPerField(); LUCENE_CLASS(TermsHashConsumerPerField); public: virtual bool start(Collection fields, int32_t count) = 0; virtual void finish() = 0; virtual void skippingLongTerm() = 0; virtual void start(FieldablePtr field) = 0; virtual void newTerm(RawPostingListPtr p) = 0; virtual void addTerm(RawPostingListPtr p) = 0; virtual int32_t getStreamCount() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/TermsHashConsumerPerThread.h000066400000000000000000000016371217574114600243270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHCONSUMERPERTHREAD_H #define TERMSHASHCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class TermsHashConsumerPerThread : public LuceneObject { public: virtual ~TermsHashConsumerPerThread(); LUCENE_CLASS(TermsHashConsumerPerThread); public: virtual void startDocument() = 0; virtual DocWriterPtr finishDocument() = 0; virtual TermsHashConsumerPerFieldPtr addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo) = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/TermsHashPerField.h000066400000000000000000000065261217574114600224310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHPERFIELD_H #define TERMSHASHPERFIELD_H #include "InvertedDocConsumerPerField.h" namespace Lucene { class TermsHashPerField : public InvertedDocConsumerPerField { public: TermsHashPerField(DocInverterPerFieldPtr docInverterPerField, TermsHashPerThreadPtr perThread, TermsHashPerThreadPtr nextPerThread, FieldInfoPtr fieldInfo); virtual ~TermsHashPerField(); LUCENE_CLASS(TermsHashPerField); public: TermsHashConsumerPerFieldPtr consumer; TermsHashPerFieldPtr nextPerField; DocInverterPerFieldWeakPtr _docInverterPerField; TermsHashPerThreadPtr nextPerThread; TermsHashPerThreadWeakPtr _perThread; DocStatePtr docState; FieldInvertStatePtr fieldState; TermAttributePtr termAtt; // Copied from our perThread CharBlockPoolPtr charPool; IntBlockPoolPtr intPool; ByteBlockPoolPtr bytePool; int32_t streamCount; int32_t numPostingInt; FieldInfoPtr fieldInfo; bool postingsCompacted; int32_t numPostings; IntArray intUptos; int32_t intUptoStart; protected: int32_t postingsHashSize; int32_t postingsHashHalfSize; int32_t postingsHashMask; Collection postingsHash; RawPostingListPtr p; bool doCall; bool doNextCall; public: virtual void initialize(); void shrinkHash(int32_t targetSize); void reset(); /// Called on hitting an aborting exception virtual void abort(); void initReader(ByteSliceReaderPtr reader, RawPostingListPtr p, int32_t stream); /// Collapse the hash table and sort in-place. Collection sortPostings(); /// Called before a field instance is being processed virtual void start(FieldablePtr field); /// Called once per field, and is given all Fieldable occurrences for this field in the document. virtual bool start(Collection fields, int32_t count); void add(int32_t textStart); /// Primary entry point (for first TermsHash) virtual void add(); void writeByte(int32_t stream, int8_t b); void writeBytes(int32_t stream, const uint8_t* b, int32_t offset, int32_t length); void writeVInt(int32_t stream, int32_t i); /// Called once per field per document, after all Fieldable occurrences are inverted virtual void finish(); /// Called when postings hash is too small (> 50% occupied) or too large (< 20% occupied). void rehashPostings(int32_t newSize); protected: void compactPostings(); /// Test whether the text for current RawPostingList p equals current tokenText. bool postingEquals(const wchar_t* tokenText, int32_t tokenTextLen); }; } #endif LucenePlusPlus-rel_3.0.4/include/TermsHashPerThread.h000066400000000000000000000037321217574114600226110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHPERTHREAD_H #define TERMSHASHPERTHREAD_H #include "InvertedDocConsumerPerThread.h" namespace Lucene { class TermsHashPerThread : public InvertedDocConsumerPerThread { public: TermsHashPerThread(DocInverterPerThreadPtr docInverterPerThread, TermsHashPtr termsHash, TermsHashPtr nextTermsHash, TermsHashPerThreadPtr primaryPerThread); virtual ~TermsHashPerThread(); LUCENE_CLASS(TermsHashPerThread); public: DocInverterPerThreadWeakPtr _docInverterPerThread; TermsHashWeakPtr _termsHash; TermsHashPtr nextTermsHash; TermsHashPerThreadWeakPtr _primaryPerThread; TermsHashConsumerPerThreadPtr consumer; TermsHashPerThreadPtr nextPerThread; CharBlockPoolPtr charPool; IntBlockPoolPtr intPool; ByteBlockPoolPtr bytePool; bool primary; DocStatePtr docState; Collection freePostings; int32_t freePostingsCount; public: virtual void initialize(); virtual InvertedDocConsumerPerFieldPtr addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo); virtual void abort(); /// perField calls this when it needs more postings void morePostings(); virtual void startDocument(); virtual DocWriterPtr finishDocument(); /// Clear all state void reset(bool recyclePostings); protected: static bool noNullPostings(Collection postings, int32_t count, const String& details); }; } #endif LucenePlusPlus-rel_3.0.4/include/TestPoint.h000066400000000000000000000022331217574114600210400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TESTPOINT_H #define TESTPOINT_H #include "Lucene.h" namespace Lucene { /// Used for unit testing as a substitute for stack trace class TestPoint { public: virtual ~TestPoint(); protected: static MapStringInt testMethods; static bool enable; public: static void enableTestPoints(); static void clear(); static void setTestPoint(const String& object, const String& method, bool point); static bool getTestPoint(const String& object, const String& method); static bool getTestPoint(const String& method); }; class TestScope { public: TestScope(const String& object, const String& method); virtual ~TestScope(); protected: String object; String method; }; } #endif LucenePlusPlus-rel_3.0.4/include/ThreadPool.h000066400000000000000000000047421217574114600211570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef THREADPOOL_H #define THREADPOOL_H #include #include #include #include "LuceneObject.h" namespace Lucene { typedef boost::shared_ptr workPtr; /// A Future represents the result of an asynchronous computation. Methods are provided to check if the computation /// is complete, to wait for its completion, and to retrieve the result of the computation. The result can only be /// retrieved using method get when the computation has completed, blocking if necessary until it is ready. class Future : public LuceneObject { public: virtual ~Future(); protected: boost::any value; public: void set(const boost::any& value) { SyncLock syncLock(this); this->value = value; } template TYPE get() { SyncLock syncLock(this); while (value.empty()) wait(10); return value.empty() ? TYPE() : boost::any_cast(value); } }; /// Utility class to handle a pool of threads. class ThreadPool : public LuceneObject { public: ThreadPool(); virtual ~ThreadPool(); LUCENE_CLASS(ThreadPool); protected: boost::asio::io_service io_service; workPtr work; boost::thread_group threadGroup; static const int32_t THREADPOOL_SIZE; public: /// Get singleton thread pool instance. static ThreadPoolPtr getInstance(); template FuturePtr scheduleTask(FUNC func) { FuturePtr future(newInstance()); io_service.post(boost::bind(&ThreadPool::execute, this, func, future)); return future; } protected: // this will be executed when one of the threads is available template void execute(FUNC func, FuturePtr future) { future->set(func()); future->notifyAll(); } }; } #endif LucenePlusPlus-rel_3.0.4/include/TimeLimitingCollector.h000066400000000000000000000077061217574114600233630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TIMELIMITINGCOLLECTOR_H #define TIMELIMITINGCOLLECTOR_H #include "Collector.h" namespace Lucene { /// The {@link TimeLimitingCollector} is used to timeout search requests that take longer than the maximum /// allowed search time limit. After this time is exceeded, the search thread is stopped by throwing a /// {@link TimeExceededException}. class LPPAPI TimeLimitingCollector : public Collector { public: /// Create a TimeLimitedCollector wrapper over another {@link Collector} with a specified timeout. /// @param collector the wrapped {@link Collector} /// @param timeAllowed max time allowed for collecting hits after which TimeExceeded exception is thrown TimeLimitingCollector(CollectorPtr collector, int64_t timeAllowed); virtual ~TimeLimitingCollector(); LUCENE_CLASS(TimeLimitingCollector); public: /// Default timer resolution. /// @see #setResolution(int64_t) static const int32_t DEFAULT_RESOLUTION; /// Default for {@link #isGreedy()}. /// @see #isGreedy() bool DEFAULT_GREEDY; protected: static int64_t resolution; bool greedy; int64_t t0; int64_t timeout; CollectorPtr collector; int32_t docBase; public: /// Return the timer resolution. /// @see #setResolution(int64_t) static int64_t getResolution(); /// Set the timer resolution. /// The default timer resolution is 20 milliseconds. /// This means that a search required to take no longer than 800 milliseconds may be stopped after /// 780 to 820 milliseconds. Note that: ///
    ///
  • Finer (smaller) resolution is more accurate but less efficient. ///
  • Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds. ///
  • Setting resolution smaller than current resolution might take effect only after current resolution. /// (Assume current resolution of 20 milliseconds is modified to 5 milliseconds, then it can take up to 20 /// milliseconds for the change to have effect. ///
static void setResolution(int64_t newResolution); /// Stop timer thread. static void stopTimer(); /// Checks if this time limited collector is greedy in collecting the last hit. A non greedy collector, /// upon a timeout, would throw a TimeExceeded without allowing the wrapped collector to collect current /// doc. A greedy one would first allow the wrapped hit collector to collect current doc and only then /// throw a TimeExceeded exception. /// @see #setGreedy(boolean) bool isGreedy(); /// Sets whether this time limited collector is greedy. /// @param greedy true to make this time limited greedy /// @see #isGreedy() void setGreedy(bool greedy); /// Calls {@link Collector#collect(int)} on the decorated {@link Collector} unless the allowed time has /// passed, in which case it throws an exception. virtual void collect(int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setScorer(ScorerPtr scorer); virtual bool acceptsDocsOutOfOrder(); protected: /// Initialize a single static timer thread to be used by all TimeLimitedCollector instances. static TimerThreadPtr TIMER_THREAD(); friend class TimerThread; }; } #endif LucenePlusPlus-rel_3.0.4/include/Token.h000066400000000000000000000452551217574114600202020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKEN_H #define TOKEN_H #include "Attribute.h" #include "AttributeSource.h" namespace Lucene { /// A Token is an occurrence of a term from the text of a field. It consists of a term's text, the start and end /// offset of the term in the text of the field and a type string. /// /// The start and end offsets permit applications to re-associate a token with its source text, eg., to display /// highlighted query terms in a document browser, or to show matching text fragments in a /// KWIC display, etc. /// /// The type is a string, assigned by a lexical analyzer (a.k.a. tokenizer), naming the lexical or syntactic class /// that the token belongs to. For example an end of sentence marker token might be implemented with type "eos". /// The default token type is "word". /// /// A Token can optionally have metadata (a.k.a. Payload) in the form of a variable length byte array. Use {@link /// TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads /// from the index. /// /// Tokenizers and TokenFilters should try to re-use a Token instance when possible for best performance, by implementing /// the {@link TokenStream#incrementToken()} API. Failing that, to create a new Token you should first use one of /// the constructors that starts with null text. To load the token from a char[] use /// {@link #setTermBuffer(char[], int, int)}. To load from a String use {@link #setTermBuffer(String)} or {@link /// #setTermBuffer(String, int, int)}. Alternatively you can get the Token's termBuffer by calling either {@link /// #termBuffer()}, if you know that your text is shorter than the capacity of the termBuffer or {@link /// #resizeTermBuffer(int)}, if there is any possibility that you may need to grow the buffer. Fill in the characters /// of your term into this buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string, /// or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to /// set the length of the term text. /// /// Typical Token reuse patterns: /// /// Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
    /// return reusableToken->reinit(string, startOffset, endOffset[, type]);
    /// 
/// /// Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
    /// return reusableToken->reinit(string, 0, string.length(), startOffset, endOffset[, type]);
    /// 
/// /// Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
    /// return reusableToken->reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
    /// 
/// /// Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
    /// return reusableToken->reinit(buffer, start, end - start, startOffset, endOffset[, type]);
    /// 
/// /// Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
    /// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
    /// 
/// /// A few things to note: /// clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but /// should affect no one. /// Because TokenStreams can be chained, one cannot assume that the Token's current type is correct. The startOffset /// and endOffset represent the start and offset in the source text, so be careful in adjusting them. When caching a /// reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again. /// /// @see Payload class LPPAPI Token : public Attribute { public: /// Constructs a Token will null text. Token(); /// Constructs a Token with null text and start and end offsets. /// @param start start offset in the source text /// @param end end offset in the source text Token(int32_t start, int32_t end); /// Constructs a Token with null text and start and end offsets plus the Token type. /// @param start start offset in the source text /// @param end end offset in the source text /// @param type the lexical type of this Token Token(int32_t start, int32_t end, const String& type); /// Constructs a Token with null text and start and end offsets plus flags. /// @param start start offset in the source text /// @param end end offset in the source text /// @param flags The bits to set for this token Token(int32_t start, int32_t end, int32_t flags); /// Constructs a Token with the given term text, start and end offsets. The type defaults to "word." /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. /// @param text term text /// @param start start offset in the source text /// @param end end offset in the source text Token(const String& text, int32_t start, int32_t end); /// Constructs a Token with the given term text, start and end offsets and type. /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. /// @param text term text /// @param start start offset in the source text /// @param end end offset in the source text /// @param type the lexical type of this Token Token(const String& text, int32_t start, int32_t end, const String& type); /// Constructs a Token with the given term text, start and end offsets and flags. /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. /// @param text term text /// @param start start offset in the source text /// @param end end offset in the source text /// @param flags The bits to set for this token Token(const String& text, int32_t start, int32_t end, int32_t flags); /// Constructs a Token with the given term buffer (offset and length), start and end offsets Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end); virtual ~Token(); LUCENE_CLASS(Token); public: static const String& DEFAULT_TYPE(); protected: static const int32_t MIN_BUFFER_SIZE; CharArray _termBuffer; int32_t _termLength; int32_t _startOffset; int32_t _endOffset; String _type; int32_t flags; PayloadPtr payload; int32_t positionIncrement; public: /// Set the position increment. This determines the position of this token relative to the previous Token /// in a {@link TokenStream}, used in phrase searching. /// /// The default value is one. /// /// Some common uses for this are: /// /// Set it to zero to put multiple terms in the same position. This is useful if, eg., a word has multiple /// stems. Searches for phrases including either stem will match. In this case, all but the first stem's /// increment should be set to zero: the increment of the first instance should be one. Repeating a token /// with an increment of zero can also be used to boost the scores of matches on that token. /// /// Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want /// phrases to match across removed stop words, then one could build a stop word filter that removes stop /// words and also sets the increment to the number of stop words removed before each non-stop word. Then /// exact phrase queries will only match when the terms occur with no intervening stop words. /// /// @param positionIncrement the distance from the prior term /// @see TermPositions virtual void setPositionIncrement(int32_t positionIncrement); /// Returns the position increment of this Token. /// @see #setPositionIncrement virtual int32_t getPositionIncrement(); /// Returns the Token's term text. /// /// This method has a performance penalty because the text is stored internally in a char[]. If possible, /// use {@link #termBuffer()} and {@link #termLength()} directly instead. If you really need a String, use /// this method, which is nothing more than a convenience call to String(token->termBuffer(), token->termLength()) virtual String term(); /// Copies the contents of buffer, starting at offset for length characters, into the termBuffer array. /// @param buffer the buffer to copy /// @param offset the index in the buffer of the first character to copy /// @param length the number of characters to copy virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length); /// Copies the contents of buffer into the termBuffer array. /// @param buffer the buffer to copy virtual void setTermBuffer(const String& buffer); /// Copies the contents of buffer, starting at offset and continuing for length characters, into the termBuffer array. /// @param buffer the buffer to copy /// @param offset the index in the buffer of the first character to copy /// @param length the number of characters to copy virtual void setTermBuffer(const String& buffer, int32_t offset, int32_t length); /// Returns the internal termBuffer character array which you can then directly alter. If the array is too /// small for your token, use {@link #resizeTermBuffer(int)} to increase it. After altering the buffer be sure /// to call {@link #setTermLength} to record the number of valid characters that were placed into the termBuffer. virtual CharArray termBuffer(); /// Optimized implementation of termBuffer. virtual wchar_t* termBufferArray(); /// Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next operation is /// to change the contents of the term buffer use {@link #setTermBuffer(char[], int, int)}, {@link /// #setTermBuffer(String)}, or {@link #setTermBuffer(String, int, int)} to optimally combine the resize with the /// setting of the termBuffer. /// @param newSize minimum size of the new termBuffer /// @return newly created termBuffer with length >= newSize virtual CharArray resizeTermBuffer(int32_t newSize); /// Return number of valid characters (length of the term) in the termBuffer array. virtual int32_t termLength(); /// Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the termBuffer /// or to synchronize with external manipulation of the termBuffer. Note: to grow the size of the array, use {@link /// #resizeTermBuffer(int)} first. /// @param length the truncated length virtual void setTermLength(int32_t length); /// Returns this Token's starting offset, the position of the first character corresponding to this token in the /// source text. /// /// Note that the difference between endOffset() and startOffset() may not be equal to {@link #termLength}, as the /// term text may have been altered by a stemmer or some other filter. virtual int32_t startOffset(); /// Set the starting offset. /// @see #startOffset() virtual void setStartOffset(int32_t offset); /// Returns this Token's ending offset, one greater than the position of the last character corresponding to this /// token in the source text. The length of the token in the source text is (endOffset - startOffset). virtual int32_t endOffset(); /// Set the ending offset. /// @see #endOffset() virtual void setEndOffset(int32_t offset); /// Set the starting and ending offset. /// @see #startOffset() and #endOffset() virtual void setOffset(int32_t startOffset, int32_t endOffset); /// Returns this Token's lexical type. Defaults to "word". virtual String type(); /// Set the lexical type. /// @see #type() virtual void setType(const String& type); /// Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although /// they do share similar purposes. The flags can be used to encode information about the token for use by other /// {@link TokenFilter}s. /// /// @return The bits virtual int32_t getFlags(); /// @see #getFlags() virtual void setFlags(int32_t flags); /// Returns this Token's payload. virtual PayloadPtr getPayload(); /// Sets this Token's payload. virtual void setPayload(PayloadPtr payload); virtual String toString(); /// Resets the term text, payload, flags, and positionIncrement, startOffset, endOffset and token type to default. virtual void clear(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Makes a clone, but replaces the term buffer and start/end offset in the process. This is more efficient than /// doing a full clone (and then calling setTermBuffer) because it saves a wasted copy of the old termBuffer. TokenPtr clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(char[], int, int)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} /// @return this Token instance TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(char[], int, int)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE /// @return this Token instance TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} /// @return this Token instance TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} /// @return this Token instance TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE /// @return this Token instance TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String, int, int)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE /// @return this Token instance TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); /// Copy the prototype token's fields into this one. Note: Payloads are shared. void reinit(TokenPtr prototype); /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. void reinit(TokenPtr prototype, const String& newTerm); /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. void reinit(TokenPtr prototype, CharArray newTermBuffer, int32_t offset, int32_t length); virtual void copyTo(AttributePtr target); /// Convenience factory that returns Token as implementation for the basic attributes static AttributeFactoryPtr TOKEN_ATTRIBUTE_FACTORY(); protected: /// Construct Token and initialize values void ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags); /// Allocates a buffer char[] of at least newSize, without preserving the existing content. Its always used in /// places that set the content. /// @param newSize minimum size of the buffer void growTermBuffer(int32_t newSize); void initTermBuffer(); /// Like clear() but doesn't clear termBuffer/text void clearNoTermBuffer(); }; /// Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes and for all other /// attributes calls the given delegate factory. class LPPAPI TokenAttributeFactory : public AttributeFactory { public: TokenAttributeFactory(AttributeFactoryPtr delegate); virtual ~TokenAttributeFactory(); LUCENE_CLASS(TokenAttributeFactory); protected: AttributeFactoryPtr delegate; public: virtual AttributePtr createAttributeInstance(const String& className); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TokenFilter.h000066400000000000000000000026211217574114600213360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENFILTER_H #define TOKENFILTER_H #include "TokenStream.h" namespace Lucene { /// A TokenFilter is a TokenStream whose input is another TokenStream. /// /// This is an abstract class; subclasses must override {@link #incrementToken()}. /// @see TokenStream class LPPAPI TokenFilter : public TokenStream { protected: /// Construct a token stream filtering the given input. TokenFilter(TokenStreamPtr input); public: virtual ~TokenFilter(); LUCENE_CLASS(TokenFilter); protected: /// The source of tokens for this filter. TokenStreamPtr input; public: /// Performs end-of-stream operations, if any, and calls then end() on the input TokenStream. /// NOTE: Be sure to call TokenFilter::end() first when overriding this method. virtual void end(); /// Close the input TokenStream. virtual void close(); /// Reset the filter as well as the input TokenStream. virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.4/include/TokenStream.h000066400000000000000000000140301217574114600213410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENSTREAM_H #define TOKENSTREAM_H #include "AttributeSource.h" namespace Lucene { /// A TokenStream enumerates the sequence of tokens, either from {@link Field}s of a {@link Document} or from /// query text. /// /// This is an abstract class; concrete subclasses are: {@link Tokenizer}, a TokenStream whose input is a Reader; /// and {@link TokenFilter}, a TokenStream whose input is another TokenStream. /// /// A new TokenStream API has been introduced with Lucene 2.9. This API has moved from being {@link Token}-based /// to {@link Attribute}-based. While {@link Token} still exists in 2.9 as a convenience class, the preferred way /// to store the information of a {@link Token} is to use {@link Attribute}s. /// /// TokenStream now extends {@link AttributeSource}, which provides access to all of the token {@link Attribute}s /// for the TokenStream. Note that only one instance per {@link Attribute} is created and reused for every /// token. This approach reduces object creation and allows local caching of references to the {@link Attribute}s. /// See {@link #incrementToken()} for further details. /// /// The workflow of the new TokenStream API is as follows: /// - Instantiation of TokenStream/{@link TokenFilter}s which add/get attributes to/from the {@link AttributeSource}. /// - The consumer calls {@link TokenStream#reset()}. /// - The consumer retrieves attributes from the stream and stores local references to all attributes it wants to access. /// - The consumer calls {@link #incrementToken()} until it returns false consuming the attributes after each call. /// - The consumer calls {@link #end()} so that any end-of-stream operations can be performed. /// - The consumer calls {@link #close()} to release any resource when finished using the TokenStream. /// /// To make sure that filters and consumers know which attributes are available, the attributes must be added during /// instantiation. Filters and consumers are not required to check for availability of attributes in {@link /// #incrementToken()}. /// /// Sometimes it is desirable to capture a current state of a TokenStream, eg., for buffering purposes (see {@link /// CachingTokenFilter}, {@link TeeSinkTokenFilter}). For this use case {@link AttributeSource#captureState} and {@link /// AttributeSource#restoreState} can be used. class LPPAPI TokenStream : public AttributeSource { protected: /// A TokenStream using the default attribute factory. TokenStream(); /// A TokenStream that uses the same attributes as the supplied one. TokenStream(AttributeSourcePtr input); /// A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances. TokenStream(AttributeFactoryPtr factory); public: virtual ~TokenStream(); LUCENE_CLASS(TokenStream); public: /// Consumers (ie., {@link IndexWriter}) use this method to advance the stream to the next token. Implementing /// classes must implement this method and update the appropriate {@link Attribute}s with the attributes of /// the next token. /// /// The producer must make no assumptions about the attributes after the method has been returned: the caller may /// arbitrarily change it. If the producer needs to preserve the state for subsequent calls, it can use {@link /// #captureState} to create a copy of the current attribute state. /// /// This method is called for every token of a document, so an efficient implementation is crucial for good /// performance. To avoid calls to {@link #addAttribute(Class)} and {@link #getAttribute(Class)}, references to /// all {@link Attribute}s that this stream uses should be retrieved during instantiation. /// /// To ensure that filters and consumers know which attributes are available, the attributes must be added during /// instantiation. Filters and consumers are not required to check for availability of attributes in {@link /// #incrementToken()}. /// /// @return false for end of stream; true otherwise virtual bool incrementToken() = 0; /// This method is called by the consumer after the last token has been consumed, after {@link #incrementToken()} /// returned false (using the new TokenStream API). Streams implementing the old API should upgrade to use this /// feature. /// /// This method can be used to perform any end-of-stream operations, such as setting the final offset of a stream. /// The final offset of a stream might differ from the offset of the last token eg in case one or more whitespaces /// followed after the last token, but a {@link WhitespaceTokenizer} was used. virtual void end(); /// Resets this stream to the beginning. This is an optional operation, so subclasses may or may not implement /// this method. {@link #reset()} is not needed for the standard indexing process. However, if the tokens of a /// TokenStream are intended to be consumed more than once, it is necessary to implement {@link #reset()}. Note that /// if your TokenStream caches tokens and feeds them back again after a reset, it is imperative that you clone the /// tokens when you store them away (on the first pass) as well as when you return them (on future passes after /// {@link #reset()}). virtual void reset(); /// Releases resources associated with this stream. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.4/include/Tokenizer.h000066400000000000000000000051221217574114600210610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENIZER_H #define TOKENIZER_H #include "TokenStream.h" namespace Lucene { /// A Tokenizer is a TokenStream whose input is a Reader. /// /// This is an abstract class; subclasses must override {@link #incrementToken()} /// /// Note: Subclasses overriding {@link #incrementToken()} must call {@link AttributeSource#clearAttributes()} /// before setting attributes. class LPPAPI Tokenizer : public TokenStream { protected: /// Construct a tokenizer with null input. Tokenizer(); /// Construct a token stream processing the given input. Tokenizer(ReaderPtr input); /// Construct a tokenizer with null input using the given AttributeFactory. Tokenizer(AttributeFactoryPtr factory); /// Construct a token stream processing the given input using the given AttributeFactory. Tokenizer(AttributeFactoryPtr factory, ReaderPtr input); /// Construct a token stream processing the given input using the given AttributeSource. Tokenizer(AttributeSourcePtr source); /// Construct a token stream processing the given input using the given AttributeSource. Tokenizer(AttributeSourcePtr source, ReaderPtr input); public: virtual ~Tokenizer(); LUCENE_CLASS(Tokenizer); protected: /// The text source for this Tokenizer. ReaderPtr input; CharStreamPtr charStream; public: /// By default, closes the input Reader. virtual void close(); /// Return the corrected offset. If {@link #input} is a {@link CharStream} subclass this method calls /// {@link CharStream#correctOffset}, else returns currentOff. /// @param currentOff offset as seen in the output /// @return corrected offset based on the input /// @see CharStream#correctOffset virtual int32_t correctOffset(int32_t currentOff); using TokenStream::reset; /// Reset the tokenizer to a new reader. Typically, an analyzer (in its reusableTokenStream method) will /// use this to re-use a previously created tokenizer. virtual void reset(ReaderPtr input); }; } #endif LucenePlusPlus-rel_3.0.4/include/TopDocs.h000066400000000000000000000030551217574114600204650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPDOCS_H #define TOPDOCS_H #include "LuceneObject.h" namespace Lucene { /// Represents hits returned by {@link Searcher#search(QueryPtr, FilterPtr, int32_t)} and {@link /// Searcher#search(QueryPtr, int32_t)}. class LPPAPI TopDocs : public LuceneObject { public: /// Constructs a TopDocs with a default maxScore = double.NaN. TopDocs(int32_t totalHits, Collection scoreDocs); /// Constructs a TopDocs. TopDocs(int32_t totalHits, Collection scoreDocs, double maxScore); virtual ~TopDocs(); LUCENE_CLASS(TopDocs); public: /// The total number of hits for the query. int32_t totalHits; /// The top hits for the query. Collection scoreDocs; /// Stores the maximum score value encountered, needed for normalizing. double maxScore; public: /// Returns the maximum score value encountered. Note that in case scores are not tracked, /// this returns NaN. double getMaxScore(); /// Sets the maximum score value encountered. void setMaxScore(double maxScore); }; } #endif LucenePlusPlus-rel_3.0.4/include/TopDocsCollector.h000066400000000000000000000104671217574114600223410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPDOCSCOLLECTOR_H #define TOPDOCSCOLLECTOR_H #include "Collector.h" #include "PriorityQueue.h" namespace Lucene { /// A base class for all collectors that return a {@link TopDocs} output. This collector allows easy extension /// by providing a single constructor which accepts a {@link PriorityQueue} as well as protected members for /// that priority queue and a counter of the number of total hits. /// /// Extending classes can override {@link #topDocs(int32_t, int32_t)} and {@link #getTotalHits()} in order to /// provide their own implementation. class LPPAPI TopDocsCollector : public Collector { public: TopDocsCollector(HitQueueBasePtr pq); virtual ~TopDocsCollector(); LUCENE_CLASS(TopDocsCollector); protected: /// The priority queue which holds the top documents. Note that different implementations of PriorityQueue /// give different meaning to 'top documents'. HitQueue for example aggregates the top scoring documents, /// while other PQ implementations may hold documents sorted by other criteria. HitQueueBasePtr pq; /// The total number of documents that the collector encountered. int32_t totalHits; public: /// The total number of documents that matched this query. virtual int32_t getTotalHits(); /// Returns the top docs that were collected by this collector. virtual TopDocsPtr topDocs(); /// Returns the documents in the range [start .. pq.size()) that were collected by this collector. Note that /// if start >= pq.size(), an empty TopDocs is returned. /// /// This method is convenient to call if the application always asks for the last results, starting from the /// last 'page'. /// /// NOTE: you cannot call this method more than once for each search execution. If you need to call it more /// than once, passing each time a different start, you should call {@link #topDocs()} and work with the /// returned {@link TopDocs} object, which will contain all the results this search execution collected. virtual TopDocsPtr topDocs(int32_t start); /// Returns the documents in the rage [start .. start + howMany) that were collected by this collector. Note /// that if start >= pq.size(), an empty TopDocs is returned, and if pq.size() - start < howMany, then only /// the available documents in [start .. pq.size()) are returned. /// /// This method is useful to call in case pagination of search results is allowed by the search application, /// as well as it attempts to optimize the memory used by allocating only as much as requested by howMany. /// /// NOTE: you cannot call this method more than once for each search execution. If you need to call it more /// than once, passing each time a different range, you should call {@link #topDocs()} and work with the /// returned {@link TopDocs} object, which will contain all the results this search execution collected. virtual TopDocsPtr topDocs(int32_t start, int32_t howMany); protected: /// This is used in case topDocs() is called with illegal parameters, or there simply aren't (enough) results. static TopDocsPtr EMPTY_TOPDOCS(); /// Populates the results array with the ScoreDoc instances. This can be overridden in case a different /// ScoreDoc type should be returned. virtual void populateResults(Collection results, int32_t howMany); /// Returns a {@link TopDocs} instance containing the given results. If results is null it means there are /// no results to return, either because there were 0 calls to collect() or because the arguments to topDocs /// were invalid. virtual TopDocsPtr newTopDocs(Collection results, int32_t start); }; } #endif LucenePlusPlus-rel_3.0.4/include/TopFieldCollector.h000066400000000000000000000070031217574114600224640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPFIELDCOLLECTOR_H #define TOPFIELDCOLLECTOR_H #include "TopDocsCollector.h" namespace Lucene { /// A {@link Collector} that sorts by {@link SortField} using {@link FieldComparator}s. /// /// See the {@link #create(SortPtr, int32_t, bool, bool, bool, bool)} method for instantiating a TopFieldCollector. class LPPAPI TopFieldCollector : public TopDocsCollector { public: TopFieldCollector(HitQueueBasePtr pq, int32_t numHits, bool fillFields); virtual ~TopFieldCollector(); LUCENE_CLASS(TopFieldCollector); protected: bool fillFields; /// Stores the maximum score value encountered, needed for normalizing. If document scores are not tracked, /// this value is initialized to NaN. double maxScore; int32_t numHits; FieldValueHitQueueEntryPtr bottom; bool queueFull; int32_t docBase; public: /// Creates a new {@link TopFieldCollector} from the given arguments. /// /// NOTE: The instances returned by this method pre-allocate a full array of length numHits. /// /// @param sort The sort criteria (SortFields). /// @param numHits The number of results to collect. /// @param fillFields Specifies whether the actual field values should be returned on the results (FieldDoc). /// @param trackDocScores Specifies whether document scores should be tracked and set on the results. Note /// that if set to false, then the results' scores will be set to NaN. Setting this to true affects /// performance, as it incurs the score computation on each competitive result. Therefore if document scores /// are not required by the application, it is recommended to set it to false. /// @param trackMaxScore Specifies whether the query's maxScore should be tracked and set on the resulting /// {@link TopDocs}. Note that if set to false, {@link TopDocs#getMaxScore()} returns NaN. Setting this to /// true affects performance as it incurs the score computation on each result. Also, setting this true /// automatically sets trackDocScores to true as well. /// @param docsScoredInOrder Specifies whether documents are scored in doc Id order or not by the given /// {@link Scorer} in {@link #setScorer(ScorerPtr)}. /// @return a {@link TopFieldCollector} instance which will sort the results by the sort criteria. static TopFieldCollectorPtr create(SortPtr sort, int32_t numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder); virtual void add(int32_t slot, int32_t doc, double score); virtual bool acceptsDocsOutOfOrder(); protected: static const Collection EMPTY_SCOREDOCS(); /// Only the following callback methods need to be overridden since topDocs(int32_t, int32_t) calls them to /// return the results. virtual void populateResults(Collection results, int32_t howMany); virtual TopDocsPtr newTopDocs(Collection results, int32_t start); }; } #endif LucenePlusPlus-rel_3.0.4/include/TopFieldDocs.h000066400000000000000000000022431217574114600214270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPFIELDDOCS_H #define TOPFIELDDOCS_H #include "TopDocs.h" namespace Lucene { /// Represents hits returned by {@link Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr)}. class LPPAPI TopFieldDocs : public TopDocs { public: /// @param totalHits Total number of hits for the query. /// @param scoreDocs The top hits for the query. /// @param fields The sort criteria used to find the top hits. /// @param maxScore The maximum score encountered. TopFieldDocs(int32_t totalHits, Collection scoreDocs, Collection fields, double maxScore); virtual ~TopFieldDocs(); LUCENE_CLASS(TopFieldDocs); public: /// The fields which were used to sort results by. Collection fields; }; } #endif LucenePlusPlus-rel_3.0.4/include/TopScoreDocCollector.h000066400000000000000000000040471217574114600231470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPSCOREDOCCOLLECTOR_H #define TOPSCOREDOCCOLLECTOR_H #include "TopDocsCollector.h" namespace Lucene { /// A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link TopDocs}. /// This is used by {@link IndexSearcher} to implement {@link TopDocs}-based search. Hits are sorted by score /// descending and then (when the scores are tied) docID ascending. When you create an instance of this /// collector you should know in advance whether documents are going to be collected in doc Id order or not. /// /// NOTE: The values Nan, NEGATIVE_INFINITY and POSITIVE_INFINITY are not valid scores. This collector will /// not properly collect hits with such scores. class LPPAPI TopScoreDocCollector : public TopDocsCollector { public: TopScoreDocCollector(int32_t numHits); virtual ~TopScoreDocCollector(); LUCENE_CLASS(TopScoreDocCollector); INTERNAL: ScoreDocPtr pqTop; int32_t docBase; ScorerWeakPtr _scorer; public: /// Creates a new {@link TopScoreDocCollector} given the number of hits to collect and whether documents /// are scored in order by the input {@link Scorer} to {@link #setScorer(ScorerPtr)}. /// /// NOTE: The instances returned by this method pre-allocate a full array of length numHits. static TopScoreDocCollectorPtr create(int32_t numHits, bool docsScoredInOrder); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setScorer(ScorerPtr scorer); protected: virtual TopDocsPtr newTopDocs(Collection results, int32_t start); }; } #endif LucenePlusPlus-rel_3.0.4/include/TypeAttribute.h000066400000000000000000000024711217574114600217200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TYPEATTRIBUTE_H #define TYPEATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// A Token's lexical type. The Default value is "word". class LPPAPI TypeAttribute : public Attribute { public: TypeAttribute(); TypeAttribute(const String& type); virtual ~TypeAttribute(); LUCENE_CLASS(TypeAttribute); protected: String _type; static const String& DEFAULT_TYPE(); public: virtual String toString(); /// Returns this Token's lexical type. Defaults to "word". String type(); /// Set the lexical type. /// @see #type() void setType(const String& type); virtual void clear(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual void copyTo(AttributePtr target); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/UTF8Stream.h000066400000000000000000000076411217574114600210210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef UTF8STREAM_H #define UTF8STREAM_H #include "LuceneObject.h" namespace Lucene { class UTF8Base : public LuceneObject { public: virtual ~UTF8Base(); LUCENE_CLASS(UTF8Base); public: static const uint16_t LEAD_SURROGATE_MIN; static const uint16_t LEAD_SURROGATE_MAX; static const uint16_t TRAIL_SURROGATE_MIN; static const uint16_t TRAIL_SURROGATE_MAX; static const uint16_t LEAD_OFFSET; static const uint32_t SURROGATE_OFFSET; static const uint32_t CODE_POINT_MAX; static const wchar_t UNICODE_REPLACEMENT_CHAR; static const wchar_t UNICODE_TERMINATOR; protected: virtual uint32_t readNext() = 0; uint8_t mask8(uint32_t b); uint16_t mask16(uint32_t c); bool isTrail(uint32_t b); bool isSurrogate(uint32_t cp); bool isLeadSurrogate(uint32_t cp); bool isTrailSurrogate(uint32_t cp); bool isValidCodePoint(uint32_t cp); bool isOverlongSequence(uint32_t cp, int32_t length); }; class UTF8Encoder : public UTF8Base { public: UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd); virtual ~UTF8Encoder(); LUCENE_CLASS(UTF8Encoder); protected: const wchar_t* unicodeBegin; const wchar_t* unicodeEnd; public: int32_t encode(uint8_t* utf8, int32_t length); int32_t utf16to8(uint8_t* utf8, int32_t length); int32_t utf32to8(uint8_t* utf8, int32_t length); protected: virtual uint32_t readNext(); uint8_t* appendChar(uint8_t* utf8, uint32_t cp); }; class UTF8EncoderStream : public UTF8Encoder { public: UTF8EncoderStream(ReaderPtr reader); virtual ~UTF8EncoderStream(); LUCENE_CLASS(UTF8EncoderStream); protected: ReaderPtr reader; protected: virtual uint32_t readNext(); }; class UTF8Decoder : public UTF8Base { public: UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End); virtual ~UTF8Decoder(); LUCENE_CLASS(UTF8Decoder); protected: const uint8_t* utf8Begin; const uint8_t* utf8End; public: int32_t decode(wchar_t* unicode, int32_t length); int32_t utf8to16(wchar_t* unicode, int32_t length); int32_t utf8to32(wchar_t* unicode, int32_t length); protected: virtual uint32_t readNext(); int32_t sequenceLength(uint32_t cp); bool getSequence(uint32_t& cp, int32_t length); bool isValidNext(uint32_t& cp); }; class UTF8DecoderStream : public UTF8Decoder { public: UTF8DecoderStream(ReaderPtr reader); virtual ~UTF8DecoderStream(); LUCENE_CLASS(UTF8DecoderStream); protected: ReaderPtr reader; protected: virtual uint32_t readNext(); }; class UTF16Decoder : public UTF8Base { public: UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End); virtual ~UTF16Decoder(); LUCENE_CLASS(UTF16Decoder); protected: const uint16_t* utf16Begin; const uint16_t* utf16End; public: int32_t decode(wchar_t* unicode, int32_t length); int32_t utf16to16(wchar_t* unicode, int32_t length); int32_t utf16to32(wchar_t* unicode, int32_t length); protected: virtual uint32_t readNext(); }; } #endif LucenePlusPlus-rel_3.0.4/include/UnicodeUtils.h000066400000000000000000000056151217574114600215250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef UNICODEUTILS_H #define UNICODEUTILS_H #include "LuceneObject.h" namespace Lucene { class LPPAPI UnicodeUtil { public: virtual ~UnicodeUtil(); public: /// Return true if supplied character is alpha-numeric. static bool isAlnum(wchar_t c); /// Return true if supplied character is alphabetic. static bool isAlpha(wchar_t c); /// Return true if supplied character is numeric. static bool isDigit(wchar_t c); /// Return true if supplied character is a space. static bool isSpace(wchar_t c); /// Return true if supplied character is uppercase. static bool isUpper(wchar_t c); /// Return true if supplied character is lowercase. static bool isLower(wchar_t c); /// Return true if supplied character is other type of letter. static bool isOther(wchar_t c); /// Return true if supplied character is non-spacing. static bool isNonSpacing(wchar_t c); /// Return uppercase representation of a given character. static wchar_t toUpper(wchar_t c); /// Return lowercase representation of a given character. static wchar_t toLower(wchar_t c); }; /// Utility class that contains utf8 and unicode translations. template class TranslationResult : public LuceneObject { public: TranslationResult() { result = Array::newInstance(10); length = 0; } public: Array result; int32_t length; public: void setLength(int32_t length) { if (!result) result = Array::newInstance((int32_t)(1.5 * (double)length)); if (result.size() < length) result.resize((int32_t)(1.5 * (double)length)); this->length = length; } void copyText(const TranslationResult& other) { setLength(other.length); MiscUtils::arrayCopy(other.result.get(), 0, result.get(), 0, other.length); } void copyText(boost::shared_ptr< TranslationResult > other) { copyText(*other); } }; class LPPAPI UTF8Result : public TranslationResult { public: virtual ~UTF8Result(); }; class LPPAPI UnicodeResult : public TranslationResult { public: virtual ~UnicodeResult(); }; } #endif LucenePlusPlus-rel_3.0.4/include/ValueSource.h000066400000000000000000000034171217574114600213510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef VALUESOURCE_H #define VALUESOURCE_H #include "LuceneObject.h" namespace Lucene { /// Source of values for basic function queries. /// /// At its default/simplest form, values - one per doc - are used as the score of that doc. /// /// Values are instantiated as {@link DocValues} for a particular reader. /// ValueSource implementations differ in RAM requirements: it would always be a factor of the number of /// documents, but for each document the number of bytes can be 1, 2, 4, or 8. class LPPAPI ValueSource : public LuceneObject { public: virtual ~ValueSource(); LUCENE_CLASS(ValueSource); public: /// Return the DocValues used by the function query. /// @param reader The IndexReader used to read these values. If any caching is involved, that caching /// would also be IndexReader based. virtual DocValuesPtr getValues(IndexReaderPtr reader) = 0; /// Description of field, used in explain() virtual String description() = 0; virtual String toString(); /// Needed for possible caching of query results - used by {@link ValueSourceQuery#equals(LuceneObjectPtr)}. virtual bool equals(LuceneObjectPtr other) = 0; /// Needed for possible caching of query results - used by {@link ValueSourceQuery#hashCode()}. virtual int32_t hashCode() = 0; }; } #endif LucenePlusPlus-rel_3.0.4/include/ValueSourceQuery.h000066400000000000000000000033631217574114600223770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef VALUESOURCEQUERY_H #define VALUESOURCEQUERY_H #include "Query.h" namespace Lucene { /// A Query that sets the scores of document to the values obtained from a {@link ValueSource}. /// /// This query provides a score for each and every undeleted document in the index. /// /// The value source can be based on a (cached) value of an indexed field, but it can also be based on an /// external source, eg. values read from an external database. /// /// Score is set as: Score(doc,query) = (query.getBoost() * query.getBoost()) * valueSource(doc). class LPPAPI ValueSourceQuery : public Query { public: /// Create a value source query /// @param valSrc provides the values defines the function to be used for scoring ValueSourceQuery(ValueSourcePtr valSrc); virtual ~ValueSourceQuery(); LUCENE_CLASS(ValueSourceQuery); public: ValueSourcePtr valSrc; public: using Query::toString; virtual QueryPtr rewrite(IndexReaderPtr reader); virtual void extractTerms(SetTerm terms); virtual WeightPtr createWeight(SearcherPtr searcher); virtual String toString(const String& field); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/include/VariantUtils.h000066400000000000000000000063461217574114600215450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef VARIANTUTILS_H #define VARIANTUTILS_H #include #include "Lucene.h" #include "MiscUtils.h" namespace Lucene { class LPPAPI VariantUtils { public: template static TYPE get(boost::any var) { return var.type() == typeid(TYPE) ? boost::any_cast(var) : TYPE(); } template static TYPE get(VAR var) { return var.type() == typeid(TYPE) ? boost::get(var) : TYPE(); } template static bool typeOf(VAR var) { return (var.type() == typeid(TYPE)); } static VariantNull null() { return VariantNull(); } static bool isNull(boost::any var) { return var.empty(); } template static bool isNull(VAR var) { return typeOf(var); } template static int32_t hashCode(VAR var) { if (typeOf(var)) return StringUtils::hashCode(get(var)); if (typeOf(var)) return get(var); if (typeOf(var)) return (int32_t)get(var); if (typeOf(var)) { int64_t longBits = MiscUtils::doubleToLongBits(get(var)); return (int32_t)(longBits ^ (longBits >> 32)); } if (typeOf< Collection >(var)) return get< Collection >(var).hashCode(); if (typeOf< Collection >(var)) return get< Collection >(var).hashCode(); if (typeOf< Collection >(var)) return get< Collection >(var).hashCode(); if (typeOf< Collection >(var)) return get< Collection >(var).hashCode(); if (typeOf< Collection >(var)) return get< Collection >(var).hashCode(); if (typeOf(var)) return get(var)->hashCode(); return 0; } template static bool equalsType(FIRST first, SECOND second) { return (first.type() == second.type()); } template static bool equals(FIRST first, SECOND second) { return first.type() == second.type() ? (first == second) : false; } template static int32_t compareTo(VAR first, VAR second) { return first < second ? -1 : (first == second ? 0 : 1); } }; } #endif LucenePlusPlus-rel_3.0.4/include/Weight.h000066400000000000000000000104271217574114600203420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WEIGHT_H #define WEIGHT_H #include "LuceneObject.h" namespace Lucene { /// Calculate query weights and build query scorers. /// /// The purpose of {@link Weight} is to ensure searching does not modify a {@link Query}, so that a /// {@link Query} instance can be reused. /// {@link Searcher} dependent state of the query should reside in the {@link Weight}. /// {@link IndexReader} dependent state should reside in the {@link Scorer}. /// /// Weight is used in the following way: ///
    ///
  1. A Weight is constructed by a top-level query, given a Searcher ({@link Query#createWeight(Searcher)}). ///
  2. The {@link #sumOfSquaredWeights()} method is called on the Weight to compute the query normalization /// factor {@link Similarity#queryNorm(float)} of the query clauses contained in the query. ///
  3. The query normalization factor is passed to {@link #normalize(float)}. At this point the weighting is /// complete. ///
  4. A Scorer is constructed by {@link #scorer(IndexReaderPtr, bool, bool)}. ///
class LPPAPI Weight : public LuceneObject { public: virtual ~Weight(); LUCENE_CLASS(Weight); public: /// An explanation of the score computation for the named document. /// @param reader sub-reader containing the give doc /// @param doc /// @return an Explanation for the score virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc) = 0; /// The query that this concerns. virtual QueryPtr getQuery() = 0; /// The weight for this query. virtual double getValue() = 0; /// Assigns the query normalization factor to this. virtual void normalize(double norm) = 0; /// Returns a {@link Scorer} which scores documents in/out-of order according to scoreDocsInOrder. /// /// NOTE: even if scoreDocsInOrder is false, it is recommended to check whether the returned Scorer /// indeed scores documents out of order (ie., call {@link #scoresDocsOutOfOrder()}), as some Scorer /// implementations will always return documents in-order. /// /// NOTE: null can be returned if no documents will be scored by this query. /// /// @param reader The {@link IndexReader} for which to return the {@link Scorer}. /// @param scoreDocsInOrder Specifies whether in-order scoring of documents is required. Note that if /// set to false (i.e., out-of-order scoring is required), this method can return whatever scoring mode /// it supports, as every in-order scorer is also an out-of-order one. However, an out-of-order scorer /// may not support {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)}, therefore it is /// recommended to request an in-order scorer if use of these methods is required. /// @param topScorer If true, {@link Scorer#score(CollectorPtr)} will be called; if false, {@link /// Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will be called. /// @return a {@link Scorer} which scores documents in/out-of order. virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) = 0; /// The sum of squared weights of contained query clauses. virtual double sumOfSquaredWeights() = 0; /// Returns true if this implementation scores docs only out of order. This method is used in conjunction /// with {@link Collector}'s {@link Collector#acceptsDocsOutOfOrder() acceptsDocsOutOfOrder} and /// {@link #scorer(IndexReaderPtr, bool, bool)} to create a matching {@link Scorer} instance for a given /// {@link Collector}, or vice versa. /// /// NOTE: the default implementation returns false, ie. the Scorer scores documents in-order. virtual bool scoresDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.4/include/WhitespaceAnalyzer.h000066400000000000000000000015551217574114600227170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WHITESPACEANALYZER_H #define WHITESPACEANALYZER_H #include "Analyzer.h" namespace Lucene { /// An Analyzer that uses {@link WhitespaceTokenizer}. class LPPAPI WhitespaceAnalyzer : public Analyzer { public: virtual ~WhitespaceAnalyzer(); LUCENE_CLASS(WhitespaceAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/WhitespaceTokenizer.h000066400000000000000000000025721217574114600231040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WHITESPACETOKENIZER_H #define WHITESPACETOKENIZER_H #include "CharTokenizer.h" namespace Lucene { /// A WhitespaceTokenizer is a tokenizer that divides text at whitespace. Adjacent sequences of non-Whitespace /// characters form tokens. class LPPAPI WhitespaceTokenizer : public CharTokenizer { public: /// Construct a new WhitespaceTokenizer. WhitespaceTokenizer(ReaderPtr input); /// Construct a new WhitespaceTokenizer using a given {@link AttributeSource}. WhitespaceTokenizer(AttributeSourcePtr source, ReaderPtr input); /// Construct a new WhitespaceTokenizer using a given {@link AttributeSource.AttributeFactory}. WhitespaceTokenizer(AttributeFactoryPtr factory, ReaderPtr input); virtual ~WhitespaceTokenizer(); LUCENE_CLASS(WhitespaceTokenizer); public: /// Collects only characters which do not satisfy {@link Character#isWhitespace(char)}. virtual bool isTokenChar(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.4/include/WildcardQuery.h000066400000000000000000000035161217574114600216730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WILDCARDQUERY_H #define WILDCARDQUERY_H #include "MultiTermQuery.h" namespace Lucene { /// Implements the wildcard search query. Supported wildcards are *, which matches any character sequence /// (including the empty one), and ?, which matches any single character. Note this query can be slow, as /// it needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, a Wildcard /// term should not start with one of the wildcards * or ?. /// /// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. /// @see WildcardTermEnum class LPPAPI WildcardQuery : public MultiTermQuery { public: WildcardQuery(TermPtr term); virtual ~WildcardQuery(); LUCENE_CLASS(WildcardQuery); protected: bool termContainsWildcard; bool termIsPrefix; TermPtr term; public: using MultiTermQuery::toString; /// Returns the pattern term. TermPtr getTerm(); virtual QueryPtr rewrite(IndexReaderPtr reader); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); protected: virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); }; } #endif LucenePlusPlus-rel_3.0.4/include/WildcardTermEnum.h000066400000000000000000000033251217574114600223200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WILDCARDTERMENUM_H #define WILDCARDTERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that match the specified wildcard filter term. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than /// all that precede it. class LPPAPI WildcardTermEnum : public FilteredTermEnum { public: /// Creates a new WildcardTermEnum. /// /// After calling the constructor the enumeration is already pointing to the first valid term if such /// a term exists. WildcardTermEnum(IndexReaderPtr reader, TermPtr term); virtual ~WildcardTermEnum(); LUCENE_CLASS(WildcardTermEnum); public: static const wchar_t WILDCARD_STRING; static const wchar_t WILDCARD_CHAR; TermPtr searchTerm; String field; String text; String pre; int32_t preLen; bool _endEnum; public: virtual double difference(); /// Determines if a word matches a wildcard pattern. static bool wildcardEquals(const String& pattern, int32_t patternIdx, const String& string, int32_t stringIdx); protected: virtual bool termCompare(TermPtr term); virtual bool endEnum(); }; } #endif LucenePlusPlus-rel_3.0.4/include/WordlistLoader.h000066400000000000000000000041701217574114600220470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WORDLISTLOADER_H #define WORDLISTLOADER_H #include "LuceneObject.h" namespace Lucene { /// Loader for text files that represent a list of stopwords. class LPPAPI WordlistLoader : public LuceneObject { public: virtual ~WordlistLoader(); LUCENE_CLASS(WordlistLoader); public: /// Loads a text file and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). /// Every line of the file should contain only one word. The words need to be in lowercase if you make use of an /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). /// /// @param wordfile File name containing the wordlist /// @param comment The comment string to ignore /// @return A set with the file's words static HashSet getWordSet(const String& wordfile, const String& comment = EmptyString); /// Loads a text file and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). /// Every line of the file should contain only one word. The words need to be in lowercase if you make use of an /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). /// /// @param reader Reader containing the wordlist /// @param comment The comment string to ignore /// @return A set with the file's words static HashSet getWordSet(ReaderPtr reader, const String& comment = EmptyString); /// Reads a stem dictionary. Each line contains: ///
word\tstem
/// (ie. two tab separated words) /// @return stem dictionary that overrules the stemming algorithm static MapStringString getStemDict(const String& wordstemfile); }; } #endif LucenePlusPlus-rel_3.0.4/include/targetver.h000066400000000000000000000015011217574114600211070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TARGETVER_H #define TARGETVER_H #ifdef _WIN32 #ifndef WINVER // Specifies that the minimum required platform is Windows XP. #define WINVER 0x0501 // Change this to the appropriate value to target other versions of Windows. #endif #ifndef _WIN32_WINNT // Specifies that the minimum required platform is Windows XP. #define _WIN32_WINNT 0x0501 // Change this to the appropriate value to target other versions of Windows. #endif #endif #endif LucenePlusPlus-rel_3.0.4/lib/000077500000000000000000000000001217574114600160615ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/lib/.gitignore000066400000000000000000000000011217574114600200400ustar00rootroot00000000000000*LucenePlusPlus-rel_3.0.4/liblucene++-contrib.pc.cmake000066400000000000000000000006201217574114600224420ustar00rootroot00000000000000prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix}/bin libdir=@LIB_DESTINATION@ includedir=${prefix}/include/lucene++ lib=lucene++-contrib Name: liblucene++-contrib Description: Contributions for Lucene++ - a C++ search engine, ported from the popular Apache Lucene Version: @LUCENE++_VERSION@ Libs: -L@LIB_DESTINATION@/ -l${lib} Cflags: -I${includedir} Requires: liblucene++=@LUCENE++_VERSION@ ~ LucenePlusPlus-rel_3.0.4/liblucene++.pc.cmake000066400000000000000000000005041217574114600210050ustar00rootroot00000000000000prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix}/bin libdir=@LIB_DESTINATION@ includedir=${prefix}/include/lucene++ lib=lucene++ Name: liblucene++ Description: Lucene++ - a C++ search engine, ported from the popular Apache Lucene Version: @LUCENE++_VERSION@ Libs: -L@LIB_DESTINATION@ -l${lib} Cflags: -I${includedir} ~ LucenePlusPlus-rel_3.0.4/scripts/000077500000000000000000000000001217574114600170025ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/scripts/llvm/000077500000000000000000000000001217574114600177545ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/scripts/llvm/README000066400000000000000000000015611217574114600206370ustar00rootroot00000000000000can almost get this to work: boost needs to be compiled in, so that we don't need native boost libs (DONE) problems with linking with libstdc++: ntv.bc:(.text+0x1742): undefined reference to `std::ctype::_M_widen_init() const' - seems to be a problem with libstdc++ (gcc 4.3 -> 4.4 regression) - a solution is apparently to compile with lower optimisation levels, but that doesn't seem to help waf script doesn't work all the way to the end yet... was still playing around with: llvm-ld -native *.so target.bc -o ntv -lsupc++ -lstdc++ -L/usr/lib/llvm-2.8/gcc-4.2/lib64 -lpthread - it worked better when linking to boost native libs confused about how to compile c++ based bytecode to a runnable lib, seems kind of strange adding all the pthread, etc, which is native? trying to convert the given code into C code doesn't work yet, due to a bug with large int sizes LucenePlusPlus-rel_3.0.4/scripts/llvm/build/000077500000000000000000000000001217574114600210535ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/scripts/llvm/build/clang.py000066400000000000000000000050031217574114600225070ustar00rootroot00000000000000############################################################################# ## Copyright (c) 2009-2011 Alan Wright. All rights reserved. ## Distributable under the terms of either the Apache License (Version 2.0) ## or the GNU Lesser General Public License. ############################################################################# from TaskGen import feature import Options import sys @feature('c') def apply_clang(self): if self.env['HAVE_LLVM'] == False: return ''' Replaced the default compiler with clang if required. ''' if not getattr(self, 'clang', True) or Options.options.disable_clang: return self.env['CC'] = self.env['CLANG'] or self.env['CC'] if sys.platform == "darwin": # workaround problems with non-static inline functions # http://clang.llvm.org/compatibility.html self.env['CCFLAGS'] += ['-std=gnu89'] @feature('c') def apply_clang_cpp(self): if self.env['HAVE_LLVM'] == False: return ''' Replaced the default compiler with clang if required. ''' if not getattr(self, 'clang', True) or Options.options.disable_clang: return self.env['CPP'] = self.env['CLANGPP'] or self.env['CXX'] self.env['CXX'] = self.env['CLANGPP'] or self.env['CXX'] if sys.platform == "darwin": self.env['shlib_CXXFLAGS'] = ['-fPIC'] @feature('c') def apply_clang_llvm(self): if self.env['HAVE_LLVM'] == False: return #self.env['AR'] = self.env['LLVM-AR'] or self.env['AR'] self.env['LINK_CC'] = self.env['LLVM-LD'] or self.env['LINK_CC'] self.env['LINK_CXX'] = self.env['LLVM-LD'] or self.env['LINK_CXX'] self.env['STLIB_MARKER'] = '' self.env['SHLIB_MARKER'] = '' def options(opt): """ Add options specific the codehash tool """ opt.add_option('--noclang', dest = 'disable_clang', action = 'store_true', default = False, help = 'disable the clang compiler if it is available') def configure(conf): search_paths = ['/Xcode4/usr/bin/'] if sys.platform == "darwin" else [] conf.find_program('clang', var='CLANG') conf.find_program('clang++', var='CLANGPP', path_list = search_paths) conf.find_program('llvm-ld', var='LLVM-LD', path_list = search_paths) conf.find_program('llvm-ar', var='LLVM-AR', path_list = search_paths) if conf.env['LLVM-LD'] == None or conf.env['LLVM-AR'] == None or conf.env['CLANG'] == None or conf.env['CLANGPP'] == None: conf.env['HAVE_LLVM'] = False else: conf.env['HAVE_LLVM'] = True LucenePlusPlus-rel_3.0.4/scripts/llvm/waf000066400000000000000000002260141217574114600204610ustar00rootroot00000000000000#!/usr/bin/env python # encoding: ISO8859-1 # Thomas Nagy, 2005-2010 """ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ import os, sys VERSION="1.6.0" REVISION="626f7e8a01dd8a1c64c2eba0815b779a" INSTALL='' C1='#(' C2='#&' cwd = os.getcwd() join = os.path.join WAF='waf' def b(x): return x if sys.hexversion>0x300000f: WAF='waf3' def b(x): return x.encode() def err(m): print(('\033[91mError: %s\033[0m' % m)) sys.exit(1) def unpack_wafdir(dir): f = open(sys.argv[0],'rb') c = 'corrupt archive (%d)' while 1: line = f.readline() if not line: err('run waf-light from a folder containing waflib') if line == b('#==>\n'): txt = f.readline() if not txt: err(c % 1) if f.readline() != b('#<==\n'): err(c % 2) break if not txt: err(c % 3) txt = txt[1:-1].replace(b(C1), b('\n')).replace(b(C2), b('\r')) import shutil, tarfile try: shutil.rmtree(dir) except OSError: pass try: for x in ['Tools', 'extras']: os.makedirs(join(dir, 'waflib', x)) except OSError: err("Cannot unpack waf lib into %s\nMove waf into a writeable directory" % dir) os.chdir(dir) tmp = 't.bz2' t = open(tmp,'wb') t.write(txt) t.close() try: t = tarfile.open(tmp) except: try: os.system('bunzip2 t.bz2') t = tarfile.open('t') tmp = 't' except: os.chdir(cwd) try: shutil.rmtree(dir) except OSError: pass err("Waf cannot be unpacked, check that bzip2 support is present") for x in t: t.extract(x) t.close() for x in ['Tools', 'extras']: os.chmod(join('waflib',x), 493) if sys.hexversion<0x300000f: sys.path = [join(dir, 'waflib')] + sys.path import fixpy2 fixpy2.fixdir(dir) os.unlink(tmp) os.chdir(cwd) try: dir = unicode(dir, 'mbcs') except: pass try: from ctypes import windll windll.kernel32.SetFileAttributesW(dir, 2) except: pass def test(dir): try: os.stat(join(dir, 'waflib')) return os.path.abspath(dir) except OSError: pass def find_lib(): name = sys.argv[0] base = os.path.dirname(os.path.abspath(name)) #devs use $WAFDIR w=test(os.environ.get('WAFDIR', '')) if w: return w #waf-light if name.endswith('waf-light'): w = test(base) if w: return w err('waf-light requires waflib -> export WAFDIR=/folder') dirname = '%s-%s-%s' % (WAF, VERSION, REVISION) for i in [INSTALL,'/usr','/usr/local','/opt']: w = test(i + '/lib/' + dirname) if w: return w #waf-local dir = join(base, (sys.platform != 'win32' and '.' or '') + dirname) w = test(dir) if w: return w #unpack unpack_wafdir(dir) return dir wafdir = find_lib() sys.path.insert(0, wafdir) if __name__ == '__main__': import waflib.extras.compat15 from waflib import Scripting Scripting.waf_entry_point(cwd, VERSION, wafdir) #==> #BZh91AY&SYgP `9#&#B qa=u<<%Gr u]5R͈[]fpk[I*CGq.T:.E=Ǽ6ޜ}}Mo>w> I3g1mp#&)'Z;ۡl3懯Ww0vm{yv)RU钗@P!BR#(}z^jnu*퐥u<ϮU4v6ݾԧ{=yםӻWP)PD#(>޼ VuOMRޭ B Gt[mtww\@#(K[jrqgxq n|oK:ƨV1=ͺ=xΧ{T_fŞݶk}x]ܱ{y݇d#&Is7\6/cϊW,]9ݓnMf%!fY}`y׼/vEYP!AVDJ=tyeWkv"׷ç)lx{q\;Ot\`K31[`mZˮȍݬ>卺SۘǏE*={Fo.﷭3y87}گ^#&v>Z͕ǹw MQ\ 2C}G^7Ǡ=׽w9|lhvo$z|wj㻭5޺45ZهwOMltTխAMrw/Q۳]lXvЊOg8Rݹ7v\ͩY5N;}GwmzĽTkEЂd{ϧ{}ʻW]ֆ_Mhh@h&`F@dSM'2m@#&#&4F4 h#@))a#&#&&L膁H i45'=5?PzAh=C5#&z$O@Ҟ=#M=LF@"@ d&M#M53F*~S4Lz6@ hSѦ <jz@d(LaC ժ^/8b,UU 0բ% QT Qnvc]VzVD7߯rgT^5O#(&#('揘3W&BHp}**f#(iE9s|C#(˹w+x#&@*"p@Dd('""A@ #(EA#(P#&BCmUUZ@hA0Qh24FI JR&ٍ,d@*b1h4YFfF6VUIe D6Ѵ,(Mђ&Q,-kTVeL٘JƢ$j[IL&QIԭ26i#FfZ116šl!a$Ț!ԄI(dFL-CAX @AXj&(JL dDS 2I6",آAA, jHZF3-"% acF3&lX fEE$e3͓M"LBS#(Mfɑ@J#&4d͓Q2XJ&4l&$IhL)&YE,4&Lf$Me#(@"2QJɬldd4$bHL[$%4-3cF&l0R2L2R2EHƆV$҉b5&* I#(i@5&)B@d"#dDAR#iɢ$%%!+I3Q%L0%Ji2ěS)Ri0%I*m[3FX6l16K(fVI1aRR5B}[[1 %T&c%E$jMAiDee3M%"Җ210IJ"iRl`ڋ+2ef$Z$FMfc2Bѱ-MV,[bRQT`4[Z*ԘA%-MIRjQYmml,&5@YiP%2T٦"mbXɊV2J̳J4TIR"TFji-`h؆F(!"Qe(RU,j6BLJi)MBLa(j$iͦP Ih$X*Ml"͙FKh4j$4FE))`RQYjI63fl"hdKdk!)Meh+I6ibRfH,̴QE,̔RmhdJEe2ffR DdeP&TcE`QU)&C(H hA)QJ$L͊lhm"c$ԛIfbEL%hm%DlL6#(m#cd42dI!h-3dŘjLL2-4mbY-aD3#%,U0jQbhDclZ+"0b(#TZi,B3-K2ڃdlP5Jd`b̡a#`4[cS*66fLbѨVLL5lHkaʅ(E3dbbB6bՋ-$LkmfXh)*(P"d%&ڊkM61`FP`Y͙Q$5i4cZcC] ں4C$V,#&-䐇{O2* T#&=6.r#(]FD?oX|9raW ['^iO&WV܃g;r/.I{wUXRx57JWJM0T-%H34F CTtP!wFc%DѨU9W}z@g-쿸Rc}Ʈ4ԯ$9E-I9 ÝloG4#&i"IL>mDɻ25>W׮} np XX3H4x$(Wo7JXbMb>]]|l!uhr1HXiHF#&{xi&u*d@n1=zn$jɁiQֱilZ_iz0 d-N7 UF9z*V1F#(ٟoH{#&ǥ֚RkMG4.̧^Ĕ0jN6qx%Man@*d))YQՐX'{j4N{;(+e6Ydy?qT"Q$I`)Dt&^Pz{y㠄DL_+p}.߹^oO]#EQ*XotAkΌ}.;0-"$޽MvH#J. L)'&J#"1#&:Klmt֩L -5TvU3< }lъRbHy4$`渊(Q%>!#&8F}j*@PI*%MTȅ?#(*ֵkqsc#&Ys@NCPhZߌ^ܮw>*X۝"Gf!JUS>޷;%o^ޞLXg8irI\"+8fe5n׷\EWxR~r,YQ!1hB=l.(M.i޲fYԙx^94(OCoʼnR#+ im9m~-㮵~naq׫^tMA>Z_NQs/vyG0]#()H:#&a:ٚaB2A)ckzNӣ1jDctmFhY1wʩmW$}0ֻ3SQ;t))Y m>'7q5R,K^2!eӛ.S:!{_Kט,Fr;{v2KhYB٤cjƯ#уOOXXɡ7LHXFN#&ZPR1 !3W+7]hҪjƐ)} .EdEBSjZu}OL>W՟GŒ/"sޅ_XENÛ}=.=;|#('5T;{~L24jAm Hz蹈y>YT{~̃>ek3*Chhم>7sXfN  Of\Q\DS"rάԼj&hx_ud;p[qRjvK>n _#&}#09N@I .zd=]ꊣynT|f^hI$4U{@!$glhfm8sM VT_ +MyN蕊[ҹ_:T@h_|#g_b)GXu>*e:=hOkm)֯?.fzӂ(tőK\Uٱա*vЌϞ0n"3(XMO]a#(DQZ#(ڻwT)Gf:OiGlfҋ!J՜,~胂#(W#(T_p6F-2T8ng0z(Ñf"alvwS3h3!crW1f@|\?#W!d;:g42iU@C,m`Ps6{c.FҧbbWupK?g֤B`~=Hvs{9Dxmt ќ]-f%g\O\?UQvjiH;O)Laڿf?)i#,Y5NV=Gɔ])UG^3|^ޭ1zԄ`E`(XEIŚZa ݟ_"3qb*2)K3M1Y3v%oR seHKA_;Wѕa+ _W`cwt< Q?.u2Onf?'vYgR<&mR5|Z׍BdJ6t7H6Pw>r]8B&wu.=&[gSR.2#~yz$qsIB*a:lj^.\AyLYZWA68g>CKߤK2tBR% FslEh awR܃-&`3̴ډv|5 [qH\AOKٚ~ l!d@hep9R;` >V*o>'m~ɭr8PƄL f**m+*bKha̾f'#&/YuH7*WZ^n&xSd#`vnQ+3kEKL܍M{G5r6OGdLtbbp`(/GqՖH0|ѹvh!< %|p _S>q`CA\= 8|El #0˗:ٍ#&vWIwF  %o!/՛-(DOw9#(At~~ :f@aD=0kӆ2bq0AOcc9$,m'FGȁ4d&{(u>%V,F4QH#(j,ϯ;wnej8 (E":-QV#(+v|Sn׶$|攔[6mۚ!q*7Ƀ#(B0/Sv!|`0@V#(-nZ|wiKbd3ωǶGI.l|?"A>}1KD`}^Vcps}Hf; ߣw.|zZM]1u>oua$ V '[,ʐ#(Sq@boQ09{8\8 Ddb9ۮwGw};lNֵ=7TɮtK`GKⶽBhfI$Zӓ. m-Apx }`gvaU>)b2x+=UDH)X801#&m/{(Ue#t$86H""UeDWFDI30Nr%,aΌc#D"( h*K\~kAbn(k(q7ǭ#(s nY~3o |W] !E%5 P^t\(DK-"A;\ڐQ7gv~ P"圄|÷7jqʐg;X us*NFtScÝ/~O2蔺%l>!#(+mMT"`1!=ҝQ0[2u+ שO :$(BRi<8 BUj ljOtk*LSW'k haCN;oR\˖K τ2'Q*mCS22JRҥb:# "#124yў/,:dgf#AMrs熬i v D$\b3ua4JfZ[d"b':V h\ WN9CFaP014>#(ȳwJ(^<;ibWRX܃܎;`ºFz9P맫[s2"|hhqyNEVE0Sa@¹e;/3}RRKCymG=lB v4m#8mԤieT{4կK5OA]C=<$Xq⍄?Mt WOLg.IH辝ӿ#2c0 !X@qЩ`4pݲ4C洷G8 I3]y$hpU*yyq}i70kT4F1 u͏ۏ$yvRnNJ^8KaiȥXJ`A1(~u#& =Sn6ڔ(KKs+*6+d0׉Mbh{yS6-FQBŐ/܊!né,ﭸvS1y"e#qxC+qSOuQ^G0> 6ƛ߫D'UmVjBw;G/# pMڋ!2i(vU~kHޢHEq7č<uAs;e>BFI[gf_90 8I<5)ɐ+\񻽢>C[t{?. 'Lk#(ו<黠tX9#&ٖ 6ɚu^yQkO.65sJ!(L R6#&ƈ kOѕ*ņ" gMo_f!$==*?f{G<>[単Q#`bP?Z(s_C8:\'^Fĭ4vr`Ltt@$lC3 (DMMXy0vrHT{JC[28 o9c丗LA.#&J;tak1i>q#~X 3W~[L]T^QfIQ2fȬM+X%Bw?P2NAk0@7 OhE}~eAMH#&E8MpUܹmۖHH(%VWj6LExя=k^M3BT9@MIB|EgD7 Ѿ{z0^Zk#(@ ڪ:V+׫fin0?嶍_(#Wl{ok~%4IثIwo-޺=w%w]eD.qC#&kc2|;Ό0'r k?##&wѼ!l}fL$,Q'#&m{Kcxc_g =ܩO $cw@o=MB{;#1ǥ8GoڲQwq`D$Gdt[SW\dBX-)!Z+Y9۫GXhٞN6`8+R{hOO[~\Ȍ<^8xǣh琖yHfGZQܺ6OqBqVapG ]C2zM@TswxuL(GqAQHZgPW]~||gbjέ5LtGwYL22xϱz~ iMAZI5gk@H#( ѷ6ϓY`9En'T>Ϫ…Dg"corB)RafdS[ԴVb?wىDqTPYp6\ph1GւhPR/`BrEx}oKB֙|}>kalL:u;(gYB~SQ³w b+nWʶWy5oEd F@ZD2b^edn47R#(bTmoz5gNOߘպXu!*H6KC'۽i<[.sI-U !ء\`,~tPCXe$P3(1Fx#(x_QI$]x,tl_<w'%(/ּ$%$}yr2(b%GO~߾tg<q@5.v}~ &~ ZA z}\U|DHa"Xcm|H[A[ArߍGʻK0"7t8{9M:ⰱ*%PJH]JCǗ?_@"/GX"zzsF md8d4.BAr `AtvK~j/ݻ-)O^^~:C?Mx_v2]O㍙pL. Q:F ve>fm}+wdžLdcW\|׷чޣ 4#l>M]xH꺏2Qo4]xz;-YgѨ^llOž=X#f X]Ϳ-R_|n$)Y|.iVGw7|k4[D|bۼAp,g"p5[#(ן 3׳<Y?1Qkrs # ʬ9jEE}y>wm3bo, W7%wW]f?0ag-CF56Pi#&=0\'V-_1xYV#&;Fk aJ|#(I+ŇOkIa洿D\.M Cž, _\:cװ=?{p w /@ ivϏu~~h#(‘G/#yu|wWVWVZl,m)N /r?0ԭ~y*m̊07#(@0NYw{;ջ߹~1=Έw={:2x~ ڣrQ*1Q]; #(q³vv3vpy%Zơ,c%C/Qq4IzzlZ6޻uRFz^9s]0ɻyyĀ2KI*Qq#@F#I[RZ:VZҝ-lh K Mxj8lѰ%W# "1V~U3()|v>xhHͩ>[\4xO\&cE.V9ϫT8̱w,P@ ;1/mF#_6k:+S@%IBI$ofz9p;]#Gl"wznYWu5YMM9[v@I-ɖ\u_p4D*i@xH,&K*e0#( 'Vz 89C?nې2ȇr*T7~>g'%ʅ 5Y!2! G^1(z7efX>.pZ#&CUKT|_ ݃ٻKw.$pN]ʹ;@#*..#s[ ǏaSzl ]ˊUec0E#&8@l<}|N~ߊ/u|](BeȘ0#(1`"0Pԭ +G0SLGխ8ukGMB h|N58B#ΎFR$ޱxPh;-6f(dQs?tkHFQ(G<9pV4=s#&iB&r;fA8f3\ǻNZʘD)p90!5Sih0@qOM( С}79qm0ʪ*͌?-ǙdU#Aˑx; #&teN0n U"%뎾~؜__-W֡I@HX#&~>9!`I3z%b: LIvΜsI6#(1S3J#&T:Qr#&iHa7Tꊻl|Af'8ZVǻ(qEJZ,@rq.pU83N8vEskX*pg>on{{כֻ:EӉ0 (r4"P:Of"QvE>Qs纻/rvHvk#&hH§&cdm8RUފVA"s.Hv29QH%鱧 @XTc:ӭ1K 1 ƋExՎy\:l]x4Kc!XĢE<q zd4Bhayː$33 xƣ0Iݧ^rU3Tu䇂^#W)V$!.ȍzfR1'LY#(#(DQJ{E4877v[  (Gjim,F8|Mq/ucRgvv8?BI >/^Ô\H6N:#&\Pۙ(#(FɐygbK*5>A/s3Ca7u͜QMcz@:#&iMgwrc;bĒMizf֏I(XmPݭ+0%Hv6s/v3\/ ?fIu(DkjvߍN=j$;1ɶ ŕgJm6H{cEiۤ^( uT`8:Mf\>в V>*AF#&;cliiAԇX{YK9kSv= L NTɴ(fBxC7m7%㽻SRS~2O?=tSTkØ#iꉂTvfWmOJN]R6]#&S"c|ֱOg-f#&EБRלR͚#&j#&燗I,Yw, YT!c6PVHfN27FP*k5fei>>rcIl TJ[&5ˍF xv-l[V7–i{g~=Jm|z"h@4-w;g\13Ƥ^M19rيC$).>" ~xq|*9@櫴^q*SoyA)@{AyKZJ 4w09NYy[U#&Ga}fVB#(طP걏hOO ~Zw.GIޣ/FUwL^iAв`|+hPR';vYJT\'^x6M f4P~Sܬ90~M,~ %^Kp Y X5ƴ(qT9S!:n]$>DKL_fm%هiJclzßGV=M\|\|r&gvi,Zb:&(/, aq|#(E-Mvhcrk/zi0LK"XU]errQg7cdwBR#(ݸwZ=Ϊ@ lU2Fev8?iRxTuϔt![ p=Ӊ}8KX %A>1"@it"@sp5uh਷voIgXVʆ,MmvxCp5 QᓵrtA s5H1$[:&ϋ漡03m_o[_q8n%_#(<'O1%¹lf3dvXΓ=xQcH.IxqC6*LT:U](|4Wb1&F&]89ᥞlH" 'JCv*-tL4]+6UE Rx%Y i.v2T)Mz˻AwHD[E#&Q]pq('C@rNp v:wiƺXV MHggQUUTՃ—4R i"T9;^9޳؉<,e&K.5[gCkyK#)Q:?i^DSXBRP>ar&87WN6҇`tcQ} k:uc @%K\6M▸Q1DC=A^#&\_6ۣS[c6{vq6,: L!#&bD@ދI,#&~tsַQ7mIzC|%r;a/]HB#_US:MYlt+3%ݬ;3B t-(Mەp"V3;%~]~6(~ɰaWkb\niӸZDYpA2՛gf6O%DLtdH`;1M6悚M;܅Eϵk-$SRT:Vesl kPg m<$D Mx$/%:7"h/2*%:v8+plan.2mI\L"̑6_7f% -FS%Asη)Hi]Z[V,gR7jo_ݫ=bs.jqH~>ڑZT=1I\pQH|6vmkjQwp߶a}+:{-=gXѣ>"V#Ui85m`N~rS"x+O{N\Zخfv8gjok٨w߾z0/Uc0/e#&.Wsk)4YJ6kD ˭ɮScE:A6#&$ۛ &fɪisAɪ|#K(-h1Ҥ=I*eǢGtGL'a$f:9,Ɋ6h5uctoARh}_HlS;k2Z".#&\tej3i~u`\LْZT9YJRu!Q;z=}qU$,YE jpa#(n_2aMeӐOx,B-g&VxU{r?\6;,X֏t㡮6x*}>[gtBp{7[n[,=ZuC'[0'^#Yp{o\k޼aE#+\+^~gm;%1.AYwڥ!Ka]o2gG1dL~e9/[8:ongQs!F1¾$|/Œލ>lf NOų[Z66^Ͻ+uaR>>0F,0[܏,Lwl 'm6R-%MvѲ+spwP/cŊ}jVyF^k͓DQ.ddfkG<.׌s[9N[ s NoG(N'˾C#O+phtCmj\4Aѹqq~U9u@D#( 3#ݞTch .خw?ӝm@NyZm Xy[rjw5Ѷ.$Sԡ\6qhk~Ee#(ig6kZė)9*aA:pllt,4NF18*qlu8⥄m~gouŭN8p2:Ž"#({'lSIan0HNB01-`S2Q) Q{jbL9Yq~/6#M4_G6b#.|8#tO*x3ێǞݲ))h|zzXt #f[udVa3eѦq窱RALƥ\NܼmfQbE#|jՀ.jpH** [;y@!9o6ˉ}lA5T$ ;l͠(nc*e,.j+q*s;PNZq&":feØ+"evmydBت(6/'_#(3[迫w۩=#&'s(!>igպeҝYh0UP(svZ ;nljdcJ 2|m"L>;\U#(DHɓh|܊Pww>~~Ck(N"gی=W*do>bApu|#&;Z??_]0JEYcdgtnsMv85EpG>7IH : /NtNW#&_0uAD%w!*fCu(l7E`y(q'u]9Nxy6=-nq ? zT|e~T@aYX'^J( /Ny$˚?P ~khk60iMws6.0f>OHoyY$SŠvXi|ؿ}$6ǫ^wijĈ.RY77I ;rmg={#( PYgm3Ľ琚V<(oPaz$bFDT{uEF9@}l.Rl{RHxC 4|d:`v Oğ̇RX(WZzꁺy!Q0~طxQ]5\!hcMUz#&kpȠ0ohZXpC}} n1͍-IEKw:e{2ѓs0{._;_V]7(I$VFيE;Gl=iyA xuP(5oab̮ƚJZnx>yz!OdI&{,gMxS'qDO/ܢ9$VV|``l+nٶ.Y~{kb8ߑ|y<a5Ϯ)iwxfض#([I,AEJHsuɒzWyCpPL9Q U}{vռ8uaA1\HijįuD7?ivmHiywL%hv19s7z>'[uG8)-2Xs٢+HB#&u~. ofF9ƪкØ0}XFnTNZy傥@R%"~sZf i/wuՍֿ=\y#(uhH`ji &+pS;3rރﲌypijd'I'ʒ1 LgJ!>KkBohv:1S5"wsj]0Z/RZXVP#&3Nx!&钊a4D;/In{䂾Efs[=>4#()L%{@5XȐ$ TY&njtJSť&lr4a:ֲ6gd^'#&a)|Ai5۬(aNLH^RRBp9RDK_bC~NV*UbR,g#5(O0 Ñ@2Z#(Ҝr} H #&oP࢚k L³2]熱ѶSHCױդ0rmt#tkRDIP>E&Jzdd҅,H=p}h(%憷G9s۵5%r/#mem2cw/qHā(8-BE.!"/ ;7 װyfb{{05y3ЕT|4=^^ O+:H(Al'L)DZ}#Gg=%[aPW2UqI57qn1I5`D(ۈ|a _nb~ʺF2e-uRE?#&?Ӊ|ڳw*/ꯕc9 {Izaxxއ,EC`8ءO#(X~"'hmCxad|`u|,yWy]B3~#&i*Ua0 )R-ޝyBR lHrMP.fS;QABgO̿:tb?/V' Xp+C,RPaN,@Þb;:LWu"94(Ntd\MslZf̈. 2:{oAdUa7x5`,gtl`ڕ96ITe5Q.zt"F蚣SKN]K#& 8 FE遌ctez,~VCtoU~ܤ;`)\)D>K}r_ =97=; xFF0L`yFOdj(*9n6Vayzzv=w]$ /Ѷ.!wq.g>t]D%de4*'硖 (y.sL#(2_5ss7l6Wߟ=%mHImށ6Z3!ml8`ӪʩAhIWAQ3/.trNB|.2Fsm qͲ8Q pc;G$}ï|$sWwg]QfX[yb$LkP=O'[K<&gU3vRƔj:A}W A("$D@Kݑk!m}a*|sQBb#(ϴ7=NުX1-lGCA&,D*o< R̅9ugшhx+P߽_#&J&Cng~n}қLJ1M@`Ւ+͋pL$C#(1v :-"Uy1Tyr vldBR,RhNP2W]K .,Cρ +6$ y (t.pmV!T5#&1'Zj%,4B?Y^ݫ&zD>u~kIuIzGAk#j<;w"K7m1rv#(d]fښ4r{˞mmm3;d%3foI8yzM}s"/xrʠy{r#*V+9o[ooff6L.A`u10ةUK`lŝLP0˦/#&W~*_ gZI|u:H:wMIc"'SSWt\s۞Ub3 T`p@R6ۓF.6p>5lg>ۄ%1$U"*7~Sb\%%t"bb|gCF[_e\/<@@ @=O<+Ulhv3,MY'>TKXU>?gd*aW<)ZF8C|esZЃU#EHSCE-ZO~x|ށP(l|&]tR[@D><<}'J8f1<`-RS'!ZmͼnWᅝ%:?#&Ԧ -Be!|Tn-G%Pn?b-D"t}|Y{ϥ#(P^)&ڹB^a^06HPݐe!/ٲD1_`#(:W#(#&~X"r绘{΂梗0$[mw0Ty5y~ w BM;"߮s;7Ƿ<`{2XqRLX0fa'Fаu32qv< 螱miGm;j~~'Σ!%{߳nxÔ)|4C<:nm7#&WiWAhfH'̜P?&sn++N~O'[xOy|050e0{xpvy="0>98<4WӏWݧsTcݳ˱X9[SWq)>UE@= d18O_t⪃Xָ|i;a0.}Ri". B H#(Qh)uӫG$=?1 L7i0.3*Nh\ufYp#(}Ui|BeVt \*6]-/_Л#((ķʴՏvi3}'QXS*G!''z-NdwBI:xN#&46N}・@1b6:3+fd嘂ϘF튺HGpa!7?tƯ8#(MuȐwnmFsVbSd &e֞t΅8mOqƮR^?vj+M$ɾryyh)X3syU bAHY#(܊TP⥅^zzVF 0uHcQҫ{%j%9ʥ pv)]#98+nR!eLoC2%n:5l>sx-:K¾>?"6yF4o Wͨ(2sL!ЌFa;s,pFH Jsd ,*AΘ-/7볆>'iFMdoV.>,a-L6mUQOjC j&Ë6_[vAzZƼOw~Tq}Vio~\;}.X~ p=C#wSp]:s(߁#ֱ_lf@R9 #&A6Q8Rⷧǻ$H$/`<>!/FAꩤ7(KOE#v^Ay?H#NyA#nJ5BhQDLO.F(Y\6?pSb*@xQS~_0 #&?O/~\D:0Bp\7(9K.#&0Aw/ZLe%$"2nAg#Gl~0`o+h\$ -7r=s5C~KaTn{t?NK,0kl~=o$hd {|>EeZ~{_Q xTB.:\%OT9ÿBG~~luJ< n־ՎH}Wd.~;읩>2܌1%k*_b׶Q0IЌ _?vt) 1pV}q4T(r#&bMzώWPh-)Fo#&[@B1.eb_=X}l!ޅ#..(UD4:ΎlzYϿYl?oޡwW=]f,ܯ>*H"jY ӣّ8H#(_vŗ˗5{/fTcyo_Ry0l @igv<7ܝR:)ei}ӜDQ %|{(evA|tpxu|@~:ܸB(l"*K6ґ65^&Oߠ`}AKٵ_T㹆l]rqvEB)}+V C(;yf_ڦŮft>ޓO*TO{|3/LNb$eb7ƽ7Vh\ t;nk]QCTQۮގVb3W 񈘬WBF6Q.J=]-3Aٷ+])xMA|~M`I q4TwGu_)"dā`^ ̡%ۮNۄ Q3#!ӳt`:nPEuaj#M_H_cP99xvigjT1)fh$nmy] '?>)"V-$`j_^-Hʸoܝ en]F1mНcy RM2~پ:6d϶%+#&fs:#&ӮoVq#&M~صa)\KՅE7fVmLijQnoTpX 0v("R=0t4nL]CھOQ4nBh#&c ʍb|ԠnO!hJٍ] _8NFW8RD E#(^('?yQmfdVb'B%s윁~/ͦ2gG3a5DA#(?k9Z*z)E^ }#B#&\'pԎӿDj䭴eopqIp;5;C2|jL2[s?[cD筵q,k>Vt: (%i[7f;|9nʩݙe] _.uHc׼U!JJ2*k`UU8Q-f95ٔ}ePSxuuuP,3"Rv7Q=p6 &,1]83uf1u\'u ?fլZ^N#&v0f~_ZojGFp\@섁mĈ)O) ȟMk Y n?Sm,tg+0 0jߨ5@u'&D p:BL\k:YģIQ[0܏S@xv=(k4;(}eZpWE?߰99KgCۺ:ibhG~!5XqP̨_&z@Ci_-fٌXُ ̆)˦WAXs@6p$GeS2H悝*Ϩ(?#(kb`~D]ΐo+㆗K;"03!j#(`91vPT|b\L#P=b#&fo0 uKw͠ != wF@Y(R#(7@')] n͛*i#(.,Nsoү#({,w#&FѵI`| ^ QC#*f% UKg@OgptM۟gaH{y]w6|&u߾.|\[2q?ӝ3#&e c;gűMS2{r±r4ÿ/~^X~o*կ*_L?Q4<OSjq?z} d8@O#(;/TK1s#(5q qO֘x٩UM*.Ҵ*D#(TV1l8V8h4^߬"~K~Mnu 4 'wJTPE,/`{i>8|W#(=فo3,LϫӠY#w\FNR(`n3MGU&@=`Sd3RcTW6g7̇ 7|@" )#(K1]l5qHi_`];" g8?$?iWOέbO߈I+jP!VR0Eߒ0z}gxR 06, $c!p~;OP' }b@}s:tu]J9Q_G5{۴ӧh|W>'CPE HC&E#&;r:10Iunk׸#(Y0#('( /U'<:M\/}4\7k/M@! $ d ĊPD{>$@;a!:uqHVZD3x(%(|xm*݊ )%ˉ1aET5y_ Չx3;!X ?@,EaIIgث򼢍}H3XsONuк I-`}}#&Ϧ:#(MVD'pPYDXTIB7 %)OwȮ}8oC|4rϯ'gטqȏV푠f+M 6'SE-#(¤\ H1DRaȳ2htl`#(#(K!ѩq!#&%HPP 0sT O!?f &a?& $5D2NS&' bQ#&=N<}az^%H<R9$I#ԘĂ=|^-뻺̒diUX(+'g}c}g!N) y;raf+ }}<^ M[bNġ#&#&hUb % fp,!q/j\#(n8%) #&'Xp*yQA\ypy35cC#&X'@F῟cb< ǂ^&B=rzNa%b(GkKI7vu"#(fy?#LY"iOϐ{{F QHQ, C(%B@*`~:h}S: 䑘y넁iT!وB'Hã\%ukjS;22#(g#s\'YMw8vC@؛ 2qF5ԗթS|F߾X`ީvIJ_OVN=Q'xPSY S~~s[RX*l3M֫V '"z[9#(@)`2#LμrC8j fP,JlI\ @15@)z.SIbf߉y\U_Qs?<.3Pjā_ 6c%}[y%}#&H2w|5ˡ#&#(YX :j^fK,{uwk^Pcg|wi[VK7-)&5^6AMlnD uj zn$8M]wJ 2e;D=Ǻ>J:@qh9&ǁK`Y8b|zWcId8Ѹʉ ZV0&O9:+2owl؀Kgn{j2YIC ;7;P1䟍T;DAvUBXN͟gO8utcHx!ttVyӗ\z|XEmy x܍Ď#'{5+~jH^9!Sq BZ1^졛+ώDlxĖq#&#(%@ph} HM6LhALQlzC#(n]u4A`xu8t$|iam;!q7ORkP8 ( rI>g\0*1Y$r EFY`=45F3CX45 W#S{t1&" SšH~FCqS:3-Ay{!K GW?9߀;Y0f0m[c/v a8xf+}Ϸ|a0SգGhD)`U.~L$Ε5.EMaII[u:#4ˁ>(qI'HAprDxUmaj<Ͼl$S_/Z7Nvʞ/ M>Zx#ğt$aztKjk$FL*Ryϴ.G-~DI#&0XH5AQ䫗T3C"!q|seO}dn 8 @χ >ܟCˠ |#&iC_??GqgQ=?("P2(Ր00αJK@(h"6%b*4UV6 V$6Wo%"/f[ץ Y#(H47o _ΐm#&61#&mЍd3n58#&6clklѥ6;6H\o`*Vq 󾳉҇13l`DgQ4%U#(y?dC6zݙC*Գ}6\<8U 8 K€xATwjQ>%@ Zd)㿷nhknoD$tpݺVfǷXWĴ7T4й99AadʩQ;fOd˳E0A#&Q-9AGׯFF$ RD#(HAɅCۧ<'5Vlޯ&^#(5dE[d rC}%< ?u ۖE+xv/ ;2A[F#&Ȼ@HitECGR.Gُc>o<_y?7==}g ?b?tj UDE@ }O~JN>tAAћv׸Pm۷^.VeP=́ӂ{hU`^^0:U/Wy'.!*|9ר[i,>hA/`"^qzl((TXҁJf<90(/ŕ#(–ӊ¸:tY-md$A͆/A%?ْIjRvưDZ[.޾%ƼD{9뎱KYslmxˀaf +_R?&dPJ_#&𺧥ژKT]D9lٛ%HHpwK/Ɗ xt)5]\ZT#()̪sNw!>Ꜵ?mp^=R3ZOVV:kM@:n/z;LN"tuC79#&ֺxu$?zo(ҟuOV+jҭ`C9u9ol-2.{;%$AG 0\*ԯմp7ѩoN^m=SLNvBh\R #& kFǗﭹ#(B|PY{^_9Gi:A@ #&5?P9}G4rۂ2ur[8TiOȤSazr%pi'0q/((#&jHVk}p!3Lif!Lд(4X$VLzճ.tL. 4t.GdـP/3 b&#(\)8i#& ж$ha%XVڀe146H#& _$Sߟ=p"AߧBBl5 ,Cej׻OaeCiRM~eUP}K8s#{>+Ob#/8J3j CY9x9?`bz0Y k2}m8J@fxbwzޑ,~@#(, n 'YBͷ4vf#&,:am 8(NYFؔ0gY<7+͂8NKst[LI&ݶ8#&^'6,dlfGg./a q9NEɵNfFJ$!XtϏX2عfo_yo94Pƫ!w:A ;Yg'f.86ӖP]ı`@YlfTl'GeةH,*]⫁Cf{rص}I0r)EPs}j:SwWcu׾]la%.BdEwKrP5`xLٴfQA1<;AN\p|'Mʝw2I]gBٙyqٍ3sXʤyR٫lŜis-#&^Sݸ9pQD`ZM꓉Y|Be㴃"o1=tCmmYYRk1peP]ul!Ԥ*NhA$E͌y?+D8YL۬[9MΥX*3tUr/ Knu;O$@ os gT!:]qF@;Ҽպ(A;GeȲKI|6(#&Ѷ#&'A>ſ)p^XpOobU&I Ђ 110ʓ_%l`A@D%=2g\a:tM#gDacY BXX8-M#(ij~1QbC7!gHN("Wjކ#lf?7IQKn#&5 Ä U Ķ<0L9!MO4eGB nxAp=1#(A>0 ( o#&P`1amM<(YNE5?չ0pY.U:gO-KaǴy}0r>?IOUBBBњ:ҵ?^&Jha7#(ZJ=\/*daA]KeuA^qB`Ϛit@x(g'H%N>1Q@ ^( W1"$(Cs!;CMSr>àBaa#XH0 >iEj*4ݶۈl<¡#( QQc7^cGDǒȢ:h ~$`1hj( jQuNIӝ[8+0Gw*I ܧĤI8Md"Ċ "$>+<G) @0tu<|ek!¨pű;XАo#̅U:1M6jA #/CqiLF4필;XVMG^:u$L8")T"[@@cRka$+,74"Wui9Lnvf{kYdM[}qݡ١ BQEQ~^r>k$*w_gwtDwtDF/gć#&Afö1yVJOAXLD-A(&^4@'aܾ"D5Ϳ=߇CʡŲ1\\l  6n6МPzрmlpNq{]n=aw]}U{:'dBJ{zەlNePs"GvQTaC,H" O2 &HH#w 7,A;NuzEݺHp"e Y#()M1#s=R*: йzT-c[Em)TZZZmk&ՖZd`!H]lΞEFNv$$H*}Hrtn \5jj,K)jB+(!1@~:G}هn1z&ԍlQt1JfeDSI ]LM "ԅ*xkjR <:иaBE]tBSnlhξ{jtQ_L2Ta,4vfsW sY~Y)ZC9!lpt;#N55v:>tD4RA1Hn6[[`b_]5ǩ&:)2w;a8[ïm{&{"_I\m%ճ~;/&*AMA#&a3D1HKFŷ.#&20XaZqKU,sT6)\ 3CiZjBdDX@H@{ñ3Є֐7Y*6ӧ^!;EH>MY(> p)qb7wUIg.Lm }:1=ooQ-ao`Bc\hQb?6#(}$(EOn $W! bz9r=gCstB*-T+xmʂ#&&/n缝bVQ6my> dRwɃnjslnם^LI|#&W(S6Z<;m :-0RBă9֢Ct#|e*&!\,,;Skb9Sji2ɅRѫ e%~}ETN;!zI*n !W[)NNq c#1a$"HsNfZ*(pp^E2?"(bj!#&}`Mξh;bI+TwpzBJ8X4 >sĹ9PU:SmG@Z~ZGsyϊ9pƣ"UܺcksnݷmwQ༸NyVm]m,1JLIm;: `,bby#&$ꆘͲ;X]kkf>seY80[0WZ%k0(EҪ3{##&!ɿ%;<)=цN@]rʷe}}i3 '}1yv=Ooxܗ.3*DKoV"Q(J^}ܞocڮY "SfCX84-#(A*or<;MJ7]%9@7۲YSe|wea傦ͻ?/pK/#(̉z06{xFȘ4*#("C&%dq1nQ-̳jqr!8M#č-jk1cFc8W ^7Eؠ@q %',#&҆_C#(`ɅyUިYAm&!|EDB#(F P1Lf.'?ä۞/;ʈ}s-v<=O(GFsN46#edB6P_K['ȟHUu ~?'IU4U5#(E}\mY!)I8r;bC!D25#&cDI{&6ӰO@pCuÅu$Hm-#&L۔w)$oN#(@p~Nl8e'VaL#,d:h…PwIOTCJI\^G벍:EƌH$P!¹J0Pկ! ߡu8TˈLm yB ce=#(/,$2f[ *œP5ai13ܝOPa{NY@F#8g˃#&MlunB[toMv!ؗZt!qf!dg׍*t񒉏^xx=jcB+;1~$jڰ,A"Ahm|vԚڎ#(Ta; iT !zړ~_^N!H{~#&r`w-UɾluUjG8FZVMA3p ڂZVl ހՊ#(i%@cQWJ:}SEĖ<@&4/ V" 7/v"F_\T+auzo2V8HonWRF&؆UI{,Dft]W.)M@}}K]S %&á 5^m2ی=M2q>]BA|fjp٥dˬ@*tzY 6?ϋO[1 xCV\ ۙAY Av]UM}M{t6R$hW2d;d].Ѩf>L@ -$#(IEo۫jsM&Iƪ-QkdQkkR ?DA#( 0(6cYa^Ɂlx hB'a}Ԅ]@$T32fũ&JEf`SI %_#bHI)1$c܌#(YH 2mE#(#&F4%16[)L!,ٱ$b#(AOk&t .zwwǬ4c?dxػn63E!z,`Was_DBҷb}RL$gZΝĢwSٱ/scinbt VWضי#(lYJX4Gr3Qzvߌ&,),d2i$'_,`ed(Rc1N X@J+3ܒ/"/+*6,6D}lL5kw8 xxqnv<#(JuUO!Aկ#&%b1ٸѯ9783!C|J[a7ђ$q׮d0A(SG׊+bCS[l|>"0&N-8M[ޥIge#&ǔhم$HE+PyCaX/oXAs`/+pqۇC̀QݍxgvuSwPl/:J^ZMٕlM֚K0~D&6_e]'#b[1g w]$&0/!)8#&g&TvW#U\"Nn(Q! ~гF>:$\2rt_#.(Ԙͅ0Tkwu#&iјix:ƞ޽k4]瘺sKy\zdiаxk|Jc.Hmr?[Kʤq)NT"tpQW‡bkW ȝV+hv܏y퓍;?ja^_gܽb/_ZPoܘw`0KKdIi֍?SW6{'20̓mrFj"9#&SoV7Գ .8skB7%#t;n1W)aƘ`+ni4Ѱ;QΧsƁӢD\Q#&2<4D نM Ί Y’z>qӎqoH<Y[TMfe9֣#((4FsuM#QU5qmm!,bvz].:pOWﱹnҘX;DeAJ%vEi.T1t%hMm5/Xv#&^\2M[#( e6!y{J34gF=#&9|375iL]G,rW9Vؘ$01ygE]Bf9؉SaU#(q0Ki=IСVkq)VFا2&k[#(&Lޜi_`,Nʮ!hj7rst)U-n,C[7i6 'A0ɉe%/4ڛ$*J4\`1{Mr#EIMn`Lm0n柋̷Xl.0Hm{E>H;. gٖl2~Q<>qF7/RM8fZRX8a#xLzD#&[M ynaǸAjAf9tB[+#&xCḧoQ5Dp(߮Z&5iec#dV1q,rF!Fe޵PfٌcCO$#3dfd3#({Ȇ*:46d{z2ѱS9Bf1fploW;-h5Yxị lSmBlsI+LF 3.Vձ #&4#jaiYXiVYu3fV\fzmxI4Z9UUJdkV@)"7ua;'Sݽ]#&B4ښ8/x5S Tk |ʖ#(Ȼf;U!:#(#(µβkѓKZ|d[`wc`ݐ4֭T8uΓ\d[k+~xJ|R`lx&+DdQe3fkCvi4@م<k]7q'Ԅ|ە8 ͔a>!@\#((hkaEm6&'ent߫8XN`vh#`|^vCiDDC"O&4xDA8¦\դyh"FLf ꆅ.B 8v#&bDb6F*1wa:2HqiSɔ`s W{Zo+쨔i1`?JmTw Fm(CQTCq!,VgY #( n#&QA,&F#(u]MqŁHVlnvͥ&QR #&*6 4ϙ@Y#ay*"d6,[c&#&r*8T&h.xn-uEt@hj*dq؍uVEq$󸜊64q"kEӝ (#(އǍȻ1*Pnko!T4l#v#(H8dbqNaZ[apۚԏfFAĎR/H6YA6s0Vp%LPt:ePA#p jDC#(&#( @"ELA,OϯwZFְ'J}+- ZCYd>FLkz587½)bM8-tM#JjHJdBe2CY2rNmfKH@xɆ*X^W/`Tc#&-54Z6r9$_(5`P){ XVE0`m>Y[#&W ;)^EXVkI"#(#&&\-l6`AATB q%kPa 6ߴ*`b U!kb^1+;(4oDsȇto#j9,uhMlN6Ͽ4,{jz м?!"HypA#&]eZ PWrD0>g}(;$I&IdI$&;TH@]X"2dI&J a#(߳`-iٯbHN?Pl=YP:~, wn;e25<3W#&K(̭AhBk\|*ݻiBLRaŚ`{$q-bi)PRPjXQniM JĔ^L$l?syCTD#(iڮ QaLZ2hVLmQL)\%ٌPnQުޡrZpD1" 8+[ukqZbgM.z@p#(( H#(/Ta}_ȁJT,$45? |QCKɈJV磮UCaPҙ*6*ɪ7i@c{wKWtW;žȷVUqpFD:z@ pG6$XD=Ƈ6 u43 khM^ml]*H?K0¿YD)&QXhV,QtdJ.B ҆*IFAj Ԡ˔!ِ !-&0#((;m+f6^$bMEïzzjX^MFH#&jQ"#&1ŖwPIG4mi+p\\ Q*2٫}$F٦:Db4L 0ՃJ%AD5&QA0oMv FSUs82~îDAI ʨP0ID1ܙ o3SAAmT.(El)7r`R$9RL齺1m. X4tR̨$I)&{/ Ua MEp=7h,(x&K\.","@!1pP0h+GAX*o0O~M"79bI K:.hb\eQxL`x)PGtP#(%oȳM'hK@99 qo~z?Ômscn#&hcbӴSQ'hԻQR0hfMTYEdXHE1mAly #(W9oՒ=Jd~b6f2ƥ\ċPpR+UEo/K;x4H٠]Q$#(B2zyU(7!Qs!=i<'h8X! ȨyA3ag=#&iWm1BDz\#(HnEKG"lt:#(CM1M$ZӿAQ`Tew@'v=P[2α+2BNQ}AYB>6G%c5fMHdRmLԈRTTVf2ȪFIZ[iRU4+ebjk%Kf {Pul4 $L0{5[mZ) `t0#&l1"$M`i"DO3: }>VЩ%1,ʢ\h!|j{ e9y"^ .1=meNGĚCS5#(iha=i#(E@#(ZK GW^qjL8ơ@|VP4C=e^e}'P;l^U/ t$YmGۖoڛx&ӹ]UCɣ=WPJE#&#-`图tlRg!֔'sYmr<RU#(8^!+@-&ީ@Qbb$zXPlI񱧲c#(7(ďb#&6@ĀbUHz`i[sr,7EuxX,H|ST@MDbEҠBx`n,h% '?^8P8|d^wiX9@it;06qXɔ[xm@RY$BCXȦ$|wd(h}$6Ӻ}TR)#&HdPVqO2'T4H8nDߦY* /nM$2{;4˝|/WWdm_3Jλ,duxSx\viTYTj0!XPuZ҃Mo5+j̺7N#[#&%,|L F`%#O \fMǃ]\S#5e p?Q˩ZHTh7v٘HEqfWy?ȃ:}[ I>A_n$bwO-8EIR:AgM 1k1 #&Nh.D48at͝ 7GvcTGq zJX9Ӱ#&[]lD9Sэ݆5QK⢙>[3s\#&vL]*18aZPBB„(%)Y[ uiI ȅ#t};m;ͦ5yk*Yqy#(m9KBHD@p(d1*P6=Bri#&^Y~tMժOn`]j:]+CA bB')FbjPP#(|N1""A,)B*JR+w Z#(R9 &YDb@:lLaQr%FR7P-* iڒ#ycdZҴ"P!'x4f55BaUrFp&7R%F<#(ZrHe0FU]c0A< 9 L@B1ǔ|ZV'&YRaꝷ3!aGI5#&65ᑴ8((Mȡ "آS-#;uv[˲5t5Q#&4OBmSH(zWœ';crk(RDPKJIp޷W?ӡhi5YV&q#&Lh1$l$a3о߹,#:bFX-05]c.`Q!z(NLP?GB^rmDeWӀre\\#yk"kkGmZd遲0-ĉ&o;IQ>ChF;#&>l~s3:h]#u# QjS{w ઇ4J'\=}ˆJ\F҂L-:RFp-`LO0Qty1y~Lbs[<<2oyoc3*Oo (C {v&nE16IؼmmjR:ǖ҈E[Dav3_nW'Q5WÒO"ƞؗB2+9 @jFiZug95m}JRD!@ạؗ<9ȆbHѱ#G*e}D;v:^( Y%_zxZ4*zsVȐ -!uR:ehՁ_/ad"+t@&(vOFDR^fIQ#H/{^z$wkŻmWnfjڔ+Z:2*g",`H 7'k%iZO]J&i?Y}ɲP1@`IN|uU{O"}=HzFw1dYW*#(@Td3ub$-LU]MC<8-ƛ5BO`vEkefƌM7{mCWJr걧Uܧ=o7OMIm%(׋PTWWtAn3K^QVd#([N`u}zÅ~ZX?aA:(U n9bR])@pz C&>3gu5Ӎc*2V$[iSCKJ};K;XmpG-1{pKVFtudO}Xt{ =8ilaZkԦ0tlAƋSVn`d|#"HIoOeLn Z#fd}FϐȌhEwu.Tgg.ys>f#&1~cIIߡK^} a#&Xf@gw˪kN |u㙜cʹk[!/tRr#&S ~dT#&nÁ.HeGd:" Ԁ;tjMV5ThQlXD* H!QT/1^?5".ޮx*^#&2EIԬ[ۇق"GdkR Y/$RGPБeќBٲ#& l;Ÿ1QgA'$iZv"wgNܞtq"rXL8w*@x`b7}(K1o[B~Ļ/#&0SԑSHYڶ!M5\xywR0ԗ#()4L@Vh@v/n(!DHDY0ٍfbDڅm2Ĝq՟w]{?8 (]nwn㬭lc4N֟X>fPPV\6{={z#Pb(\{v~yޖܼz%l| }VI鶫H8oT! P$fn#&jŸ&$/79Db`V!mJ4mi`$z042=2F Ґ|`$I+Fݱ|/jy4;nRU= hҤ`f,ѓ#(c"5iPT0b""T51!fu]+!0kr̺X0H"=.۟PdCEgɓ9{{ $83XjXE# ia{M- xRz/)C|x|ϩק5L B!*S>bca^OΛ+Gaӷ?fYGt#&;Dɢ LUhi`׸9TJ:YۼyBj\c7[$BF"#&P}@<=|UϕZ=vK`grE;j8t T)) ^JeQ* XQ; Np 1#(Ĕ6@~0̛RGj@QEuUbmnm*-st+S㴮QIx-kjM'mrYe;]Qѵw]5uwv 25dj QFDWh#e@O(J`#(>z6lvQ{#( M=!tEE >;@G^"#&I @^F.r]нZDžt/]j=о :$zyW}ټ#&fiXޱQ:b(mE.X?kLSQKVkj|ꋖ*ΙeEr%~ryF_+֋U"#& 䐓d$%b(G+]yDccADŽiH]uL몊(c5fpa?{ԃ(AȪJP#&BIAPnaTB$K1S9 @mq#&6&6 b&UFT"n15A",N.W#x*HB1Јe@1UboA%b A.XaTǗݙ|i}aX1JQLlrV[#&Oʉf6$hƱ+:ӯi81p-_-1(R#(V9C@{Nw*ݧ7x#({;"P#(lXHJmh]mt'X\XV ]?G![WnǪdH7#(BҨ5D*"@Nr]L椓LWD0"omXDd㪋"2O!iOo#KoD㮏T#&> }kaUh9EKАـHqpPP˹}\tڤ=}gYB+ p y7:9v>G8NA#&DJ4;jL#SzKhbh0}qP#(0O!SK/4nQMEPLw|B])J!#&iˉNl'RQ*EJЀ'\ ĀȤ&[\n9cE!e"B驮Yɫ)sv];u1Qw74-T!F#(ل#(HodF4.EM_6j*7Uvu$%3yxOvB 0dsxkHQҐ잢\ܢpyc:4>K=E;ɪʔi٫nZf';rጝ"MHQR4 ajPZMdጌ&]865fX&1RK&W)iJ#(1Ҥ$72)Y#(bDrD)F7hqE-PTf"K}~l;s#(m Ν*#(W&:ǎaҙv Mfg-,,d*PS)1HA6Yf*"t)H1QoM`U鑚A7#(ƯW4z$a/DDك74lѻeӋ1:ReZ&R;R0l,$#(ugBD]l:<\k)ZR}bX7)3 w#(15R#T)D(4be#&5]V:_:!+kRX*شQB$LT#(C(Kd0CD--$ d)+s\HU;hصw;^yyw_B #(5"Ƃ4{aiA8<_3m4^(By1!qvuk6ꫢD /ʩ! $RJBTPs0φdt٭*ZT%$TP2 !P66h]sY[-!0"$"t%@kWt_p f8o!{0CxSIj<"JC.(w}@t!e/=Fuݮy+,Z#Hu&B,_AfʗKMYI"!r|*¥z^d(#(5h-`*c?.f)n"DEhKNJ,d^UE<#|PHN#(Ү-RTR0=;^8Ta*عNbq#&?JO;u٧{08W12ɼv6}c#(Jf*уOsS]7WoH+wDžǒ;^V̞'ۭn#K4zNWK#(pD7AS-66Uǖ&#ޝG?CWiHX^6CRZmĜ#(p,ٳz7}|qXK/r[ZI0V'BK s XN Ep4ɯ99ܙ؞H;N[w4m 3?5\e͞O.$eCQ SEbtu#(+mpk91lZ}]o9W̘[1Γf#&E"M&3KĶ\Vz#(lҟز7T,+HɳF[KdԹ3DW='eVh.9C& .f=W|畾5/#&oC_1Σ^~N˺u;k#Pl.}!l૓v:Wb[42ۤ3GEbMYcLeҨ(k4vXhYVC+\qr媑I:@B /n#{> W}1K1ĽDz<9 Dc:za<DC:@#(3A\@* i#&RuƗ,tv)ݘPyaAsΆ$WBz9;q.R|]FE@#(I6o#&1K{{k\q`nYlƅ@?$E&\yh7H*‚ k9hy(t<#(#&LZt#&^.w};L umr)YQW;aIA[.ME q ~3{'JM&쫿H]X<3JkB2d (.ue6Ab&)g97M7/]eZ\*񧆇Ysc=0̺<ˑҢT{|@q!Z4d!OnD/d4yaSnhYknċsw^+/Z:k]Ss:I>ԲDE]88Gw~9v<'|V節TʜVg1"&^:)2m(y6;\\ݪ5XN/խjiq}|p>15M38*t% PWugLz?#l].4@( $Y,%-0YhP`yfwt[8"!\~:r0hPUִY60M#& 8MQiR"縅 uԃAȄA0uy|8)="p`1Gp0~^.Tz.޽:5$`BBER|eU c&h];yȮ;u/]^WcN#(v:ޕ_y HHޝp1,ݱK`;N, [7,AHHw䰃^(S(A|`2#(2*P .^iTs~;8R/GCH@(Dq1hTE 6@>c#(דAZ)$ dDq6Hԁ$"O~­u#& }DKU7vpLL{B(a:&Q98q[j*:դmz9~]A/d5D 5>3_D{GçԼ\OTՋ1OWIe]uy%xvRUU* șyk9}I"{Ǽۯ]TvvZ((Ɲrr*U$}V\6wOd*!wAL a*HRƷ6[^Miܞ&|oE%mA1S#VJTIdM"jF4SL+(L̪JdԚj1 6"HNR/`O`[@T)qMZ<-6UO#&`5ˆBɰ9CW;O(!]/{]#&~(ұEcI@` ZBB6|#&#(!YJIt4:D4`EُA5 :#(kCNT 4j5\E4El9hEhJ3I4pmVK`Olh)"64Dj2"C8X뒮Jz#(#( MN#pkOݟ!HMG{&7e $St&eL ˓Ucڻwv{ab"`cVHd^QAxYWH|K2q)(x;.v>}@cA=P(s(.dO@0$#(:&,=BC:;(ȤXi|>;ѩju`#nT,nϡx=z8Th!1cx? z5%eӶ-po%bu=))RB8Fq{zgY,W0lb1ʚ?\.ka)G( `% qD*"Abfg&d1p\R@#6tD6#8`,#&Eh .g4U!#(b&0@ۧ3<5oN'GT<ڀ00aG3~mt!2c4ƒ!t4!HdQ{n#pǍ/R0"C#Z6Hމzݙꥌ\Ryz{8N [`rp[X jBH:kU#&P& :*Lts\6X񷩜O=9)iB/,;k$T@(lm 2UKlKC-:^x&𽔻J\f6NoM=c5ҭȐYB#(p^#&#,̻nS. H6`ZnO$!tE261Hj4pv!)4]rsO[mfpNZʔlwv-pS-sM(4 ̪|BȆ!@·Xn)L[˙t #d uH9#IȻ謰-1!~C8gLP{FV[wV+`BM8*:ӝ*s:adW2BzqhvWVz!&pOJ␭:٠$lrYcp!S˷\\Zh Ȅ@wԬW&LQI4),F m2&zQd= %Q *#&60*8ewi0Jp$Ց-kfjL#(ddX*n,VgdbޯӓD"VS6ES~Xg14ߟߕ|=-xwax*cy)5FZRT%2RCnƂmVL\7V-h|Nٕ#&,E#&24&Q@&G"u5YFMo7]bkŚ8oJ+$ɋk& EJ16AkaͽW/0Xᢗ=zdd8-[GWk ̅gfDõ.l}1+2$o0!/ 4%b]8 #& 2% &bEXTpt-#&rf<#dkFŌ#&c<5Pəbl3T: Ćbt"iaAF6BP 9PlF$mB鉫֑̆C^eW322mRSM(U4ȚB`h<`DaTDJXmTSg^Ôμd8h;NCgd7"}G5KEDV OM\MwPnKm閫M!Ǩ9Zo>=r(sG@kK`Ϯ[T=.wZ rSKwY}/UIm1[ pcf|lvPNoq!^3lYF]r]4ȼ Ƙlw!w7*,1wAd ٜgmo`M ~dL)tǠv>o0a dcoxK?y'b|8`e#(C \m:xNLuX$/D%6@6gxU,J#(_B+PpS#$;$q-&Q@9X[ QeH]ZʨL`a c!F*BQ jQaf-#FT >(W ߔԎjI!G#&-FLJ"0n<:ξk؅P9]ub&(N)G*-"E5oLh$j4akڇAT#(B7f>KOZ#(bM-2in[Kd-TEC#(B hPmA6|RpI #(B("$WUT]ٷÖF$UJp6_3lVX mʯwETsVP1P5|Nۄw;x>9| 7 r*^\F(ԔIjU܄#O<ҝtXnRכj[ĩe)j`^{;m5 +E̡tf)TS{ l^t&j 0\S}[adEAFɑІenmgQfHfPV#(HT0D`٘ä4?3S/ dBhI e#_,a*MY7 KvJwl1>ِؓ6'*YQ83vqO8 Q%഑K5DZ"L6 z x#&OL /%N=QM]r6gU;^x+0A",ٸvMo|F7nF%`A@aU5CAGA&j1#(S#&fzi3nf2E<9#( x8v3JH HlmN x=y$ zh['){КX$uaK:aZς4m8zWPŲ d{h)9Ǘp;:79'e`dSp=T`@a@B"2='d=ԢW=xl*X; 6ʅu@ȚEE )ƛ6 rے^HH6$nQb5o\ c@c(mr+jJVZ!#(Z(?"&G "+Yl߯@;Pub8Pq @ BBA1sTQY5-6Ij>֋F%ų4"D",[8!޸Lj8qQd +A#(Hѭ26~7tX;)n]fg,"~;<#&vC[U>XB7|Lrxz_DVl=p?5q#&,cqW$>W|ko7"jB0l8QRbJ FCfjjɷ3mYZ, Aϰ?CuѨY]FԒl}=2ok׿,TH16a]M f^jz #(:h$a)Fo9ХWAEU{E#&QQ`;cDgóKBk6lQ(ihn#Id-!66[0y5߶ZrcOZ| rSp҃l\aFRO]R )TDUYc#(shXnV^#a$^1`aRɳ0vN^5UiKKؾ-Q4*d!!U~.]JI-^^wpI5˔ҁPuxZ3*yWK$])bmyݷ*&4REkż<+֯Jk-"FͲ3iM]umbPNaWt0AUcf?6-ț:a%I߅0+C &Q{Ϩ%P"B_)~妄;L$#oUG.FUUW1_ OT`YȧiaPVKN?|It:|R]_w=woSnnKd!uJ.5{n'< )4su-\N#&s3h0a :gejxZ}84xGhBV܋GaWcg&ďCaka!`"B&eb0!.!iR ľY=>FpTcbKƝ&Xp1(uc*H懧|PYłptࡇ&/"@Ӫ:Զ$<f\HmQɑ5ӶJ4Mi﫽J+T;ϣ2`:#&LS~n ѿR yU%ښ׹qva`}{vq2#&~ Y>y̓Dκ,#&v)u73%1/GWs|MV'sHsI#&YG}_??_{߯/_/wO?3;$ $v`&5'?"ƿt*0 n6j~t )$q)#& &#%v4Ky_v#(`o6O]l 3$#N vi>mښtzuC?ܨ)Cmv~Z01pΏZbO1t_SZ.MΎEJĺvfR]7%f%lxHOV#&2Y#&Hr#&mAm#cZج.fP.hjk}ciǝ`QԦ4Yg^xAɘj-xsDv1{C;N4qkՀ\tzqp@e#(TPܡ&GVCb#&`.G^hs#(_H1NH:q]K^.xMngx\(X,V#&hKDvn6傁d z5UV}.2F0ICt#^I$ DbBQBbLH Hc;訆*CnLX=LlZ*@×=~'}R{ITv7Y}P<mBJ&a"#&3~ieZ#(} \6#{&^ęL3J|QJbymDfMϻp!#& sqqS gۭ#(^q/&Zs[G[rmnle,auZ"2+xT@ȃ*QiƳN;pj,HFp*^i!`MdqiO-!b2KaE1XvMcץ! hw&dn;`6w ؎EXBN.ɕt~Q`HT=?{J>+1#&ž~H#&]aPOZahm@A4J?;aO{ey 2j((*D:5a̿w)f}//(1`O&}tmA?xn<(JhO(b2E1x?-YFL5YAAԎpouX氶$6tfeӲ9'Z'U?"96߲q3? @ܑN$+:3 #<== LucenePlusPlus-rel_3.0.4/scripts/llvm/wscript000066400000000000000000000220331217574114600213720ustar00rootroot00000000000000############################################################################# ## Copyright (c) 2009-2011 Ben van Klinken. All rights reserved. ## Distributable under the terms of either the Apache License (Version 2.0) ## or the GNU Lesser General Public License. ############################################################################# import sys import os from copy import copy import Options import TaskGen from Configure import conf from TaskGen import feature, after #import Task, ccroot APPNAME='Lucene++' VERSION='3.0.2' top = '../../' out = 'bin' source_patterns = '**/*.(c|cpp)' lucene_source_dirs = [ top + 'src/core/analysis', top + 'src/core/document', top + 'src/core/index', top + 'src/core/queryparser', top + 'src/core/search', top + 'src/core/store', top + 'src/core/util' ] boost_defines = [ 'BOOST_BUILD_THREAD_DLL', 'BOOST_BUILD_FILESYSTEM_DLL', 'BOOST_BUILD_REGEX_DLL', 'BOOST_BUILD_DATE_TIME_DLL', 'BOOST_BUILD_IOSTREAMS_DLL', ] boost_sources_dirs = [ 'libs/thread/src', 'libs/filesystem/src', 'libs/regex/src', 'libs/date_time/src', 'libs/iostreams/src', 'libs/system/src' ] lucene_contrib_source_dirs = [ top + 'src/contrib' ] lucene_include_dirs = [ top + 'include', top + 'src/core/include', top + 'src/contrib/include' ] tester_source_dirs = [ top + 'src/test' ] tester_include_dirs = [ top + 'include', top + 'src/core/include', top + 'src/contrib/include', top + 'src/test/include' ] def options(opt): opt.tool_options("boost") opt.tool_options('compiler_cxx') opt.tool_options('clang', tooldir = 'build') opt.add_option( '--debug', default = False, action = "store_true", help ='debug build no optimization, etc...', dest = 'debug') opt.add_option( '--static', default = False, action = "store_true", help ='fully static build', dest = 'static') opt.add_option( '--boost', default = 'boost_1_42_0', action = "store", help ='boost path', dest = 'BOOST_HOME') def configure(conf): conf.env['INCLUDES_BOOST'] = Options.options.BOOST_HOME conf.check_tool('g++') conf.check_tool('gcc') #now try with overridden clang... conf.check_tool('clang', 'build') conf.check_cc(lib = 'pthread', mandatory = True) conf.check(header_name='bzlib.h', mandatory = True) conf.env['LINKFLAGS_cshlib'] = '' conf.env['LINKFLAGS_cxxshlib'] = '' conf.check_tool('boost') conf.check_tool('clang', 'build') conf.check_boost( #static = 'onlystatic', lib = ['filesystem', 'thread', 'regex', 'system', 'date_time', 'iostreams', 'unit_test_framework'] ) if conf.env['HAVE_LLVM'] == False: raise Exception("No clang found") #if conf.path.find_dir(conf.env['INCLUDES_BOOST'] + "/libs") == None: # raise Exception(conf.env['INCLUDES_BOOST'] + " does not have the libs directory or is not within the source path (" + top + ") - check that the path is correctly and points to a source distribution") #if conf.path.find_dir(conf.env['INCLUDES_BOOST'] + "/boost") != None: # raise Exception("Please remove the boost includes path, it causes problems for some unknown reason") def build(bld): target_type = 'cxxstlib' debug_define = '_DEBUG' if Options.options.debug else 'NDEBUG' compile_flags = ['-emit-llvm'] if Options.options.debug: compile_flags = compile_flags + ['-O0', '-g', ] else: compile_flags = compile_flags + ['-O3'] dll_link_flags = ['-link-as-library'] app_link_flags = ['-native', 'scripts/llvm/liblucene++.a', '-L/usr/lib/gcc/x86_64-linux-gnu/4.5/', '-lsupc++', '-lstdc++', '-lpthread', '-lm', '-lc' ] # 'scripts/llvm/liblucene_boost.a', # # ############### #libraries... ############### lucene_sources = [] for source_dir in lucene_source_dirs: source_dir = bld.path.find_dir(source_dir) lucene_sources.extend(source_dir.ant_glob(source_patterns)) bld( name = 'lucene++', features = ['cxx', 'c'] + [target_type], source = [source.relpath_gen(bld.path) for source in lucene_sources], target = 'lucene++', includes = lucene_include_dirs + [bld.env["INCLUDES_BOOST"]], cflags = compile_flags, cxxflags = compile_flags, linkflags = dll_link_flags, defines = ['LPP_BUILDING_LIB', 'LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD' ) lucene_contrib_sources = [] for source_dir in lucene_contrib_source_dirs: source_dir = bld.path.find_dir(source_dir) lucene_contrib_sources.extend(source_dir.ant_glob(source_patterns)) bld( name = 'lucene_contrib', features = ['cxx', 'c'] + [target_type], source = [source.relpath_gen(bld.path) for source in lucene_contrib_sources], target = 'lucene_contrib', includes = lucene_include_dirs + [bld.env["INCLUDES_BOOST"]], cflags = compile_flags, cxxflags = compile_flags, linkflags = dll_link_flags, defines = ['LPP_BUILDING_LIB', 'LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], ) #lucene_boost_sources = [] #for source_dir in boost_sources_dirs: # if not bld.path.find_dir(bld.env["INCLUDES_BOOST"] + "/" + source_dir): # raise Exception(source_dir + " was not found or is not inside the lucene path") # source_dir = bld.path.find_dir(bld.env["INCLUDES_BOOST"] + "/" + source_dir) # lucene_boost_sources.extend(source_dir.ant_glob(source_patterns, excl='win32')) #bld( # name = 'lucene_boost', # features = ['cxx', 'c'] + [target_type], # source = [source.relpath_gen(bld.path) for source in lucene_boost_sources], # target = 'lucene_boost', # includes = bld.env["INCLUDES_BOOST"], # cflags = compile_flags, # cxxflags = compile_flags, # linkflags = dll_link_flags, # defines = [debug_define] + boost_defines, #) ########## # applications ########## tester_sources = [] for source_dir in tester_source_dirs: source_dir = bld.path.find_dir(source_dir) tester_sources.extend(source_dir.ant_glob(source_patterns)) #bld( # name = 'lucene_tester', # features = ['cxx', 'c', 'cprogram'], # #source = [source.relpath_gen(bld.path) for source in tester_sources], # target = 'lucene_tester', # includes = tester_include_dirs + [bld.env["INCLUDES_BOOST"]], # cflags = compile_flags, # cxxflags = compile_flags, # linkflags = app_link_flags, # defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + ['LPP_EXPOSE_INTERNAL'] + [debug_define], # uselib = 'PTHREAD', # use = 'lucene++ lucene_contrib' # ) bld( name = 'deletefiles', features = ['cxx', 'c', 'cprogram'], source = bld.path.find_resource(top + 'src/demo/deletefiles/main.cpp').relpath_gen(bld.path), target = 'deletefiles', includes = [top + 'include'] + [bld.env["INCLUDES_BOOST"]], cflags = compile_flags, cxxflags = compile_flags, linkflags = app_link_flags, defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD', uselib_local = 'lucene++' ) bld( name = 'indexfiles', features = ['cxx', 'c', 'cprogram'], source = bld.path.find_resource(top + 'src/demo/indexfiles/main.cpp').relpath_gen(bld.path), target = 'indexfiles', includes = [top + 'include'] + [bld.env["INCLUDES_BOOST"]], cflags = compile_flags, cxxflags = compile_flags, linkflags = app_link_flags, defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD', uselib_local = 'lucene++' ) bld( name = 'searchfiles', features = ['cxx', 'c', 'cprogram'], source = bld.path.find_resource(top + 'src/demo/searchfiles/main.cpp').relpath_gen(bld.path), target = 'searchfiles', includes = [top + 'include'] + [bld.env["INCLUDES_BOOST"]], cflags = compile_flags, cxxflags = compile_flags, linkflags = app_link_flags, defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD', uselib_local = 'lucene++' ) #Todo: #llvm-ld -native *.so target.bc -o ntv -lsupc++ -lstdc++ -L/usr/lib/llvm-2.8/gcc-4.2/lib64 -lpthread LucenePlusPlus-rel_3.0.4/src/000077500000000000000000000000001217574114600161025ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/000077500000000000000000000000001217574114600175425ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/CMakeLists.txt000066400000000000000000000034551217574114600223110ustar00rootroot00000000000000project(lucene++-contrib) #################################### # THE lucene++-contrib library #################################### file(GLOB_RECURSE lucene_sources ${lucene++-contrib_SOURCE_DIR}/*.cpp ${lucene++-contrib_SOURCE_DIR}/snowball/libstemmer_c/libstemmer/libstemmer_utf8.c ${lucene++-contrib_SOURCE_DIR}/snowball/libstemmer_c/src_c/*.c ${lucene++-contrib_SOURCE_DIR}/snowball/libstemmer_c/runtime/*.c) file(GLOB_RECURSE HEADERS ${lucene++-contrib_SOURCE_DIR}/include/*.h) ADD_DEFINITIONS(-DLPP_BUILDING_LIB) INCLUDE_DIRECTORIES(${lucene++-base_SOURCE_DIR}/include) INCLUDE_DIRECTORIES(${lucene++-lib_SOURCE_DIR}/include) INCLUDE_DIRECTORIES(${lucene++-contrib_SOURCE_DIR}/include) INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) install(FILES ${HEADERS} DESTINATION include/lucene++ COMPONENT development-contrib) ################################# # lucene++ static library ################################# ADD_LIBRARY(lucene++-contrib-static STATIC EXCLUDE_FROM_ALL ${lucene_sources} ${HEADERS} ) #set properties on the libraries SET_TARGET_PROPERTIES(lucene++-contrib-static PROPERTIES VERSION ${LUCENE++_VERSION} SOVERSION ${LUCENE++_SOVERSION} ) ################################# # lucene++ shared library ################################# SET(PCH_ADDITIONAL_COMPILER_FLAGS_lucene++-contrib -DLPP_HAVE_DLL) ADD_LIBRARY(lucene++-contrib SHARED ${lucene_sources} ${HEADERS} ) #set properties on the libraries SET_TARGET_PROPERTIES(lucene++-contrib PROPERTIES VERSION ${LUCENE++_VERSION} SOVERSION ${LUCENE++_SOVERSION} COMPILE_FLAGS -DLPP_HAVE_DLL ) TARGET_LINK_LIBRARIES(lucene++-contrib ${CMAKE_THREAD_LIBS_INIT} lucene++) install(TARGETS lucene++-contrib DESTINATION ${LIB_DESTINATION} COMPONENT runtime ) LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/000077500000000000000000000000001217574114600215525ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/000077500000000000000000000000001217574114600230425ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/000077500000000000000000000000001217574114600246655ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ar/000077500000000000000000000000001217574114600252675ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ar/ArabicAnalyzer.cpp000066400000000000000000000211021217574114600306560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicAnalyzer.h" #include "ArabicLetterTokenizer.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "ArabicNormalizationFilter.h" #include "ArabicStemFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Arabic stopwords in UTF-8 format. /// /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html /// The stopword list is BSD-Licensed. const uint8_t ArabicAnalyzer::DEFAULT_STOPWORD_FILE[] = { 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, 0xd9, 0x81, 0xd9, 0x8a, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0x0a, 0xd9, 0x81, 0x0a, 0xd8, 0xab, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0x0a, 0xd8, 0xa3, 0xd9, 0x88, 0x0a, 0xd8, 0xa8, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0x0a, 0xd8, 0xa7, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa3, 0xd9, 0x8a, 0x0a, 0xd8, 0xa3, 0xd9, 0x89, 0x0a, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd9, 0x83, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x88, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x81, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd8, 0xb9, 0x0a, 0xd8, 0xa7, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x81, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x81, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa5, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xaa, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xa5, 0xd9, 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xa5, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xb6, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x8a, 0xd8, 0xb6, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x84, 0xd9, 0x85, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x85, 0x0a, 0xd9, 0x84, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd9, 0x84, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb0, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xaa, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd8, 0xb0, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x83, 0x0a, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd9, 0x83, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xba, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb6, 0x0a, 0xd9, 0x82, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd8, 0xad, 0xd9, 0x88, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0xd8, 0xb0, 0x0a, 0xd8, 0xb6, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd8, 0xad, 0xd9, 0x8a, 0xd8, 0xab, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa2, 0xd9, 0x86, 0x0a, 0xd8, 0xae, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xaf, 0x0a, 0xd9, 0x82, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd8, 0xad, 0xd8, 0xaa, 0xd9, 0x89, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x89, 0x0a, 0xd8, 0xac, 0xd9, 0x85, 0xd9, 0x8a, 0xd8, 0xb9, 0x0a }; ArabicAnalyzer::ArabicAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } ArabicAnalyzer::ArabicAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } ArabicAnalyzer::~ArabicAnalyzer() { } const HashSet ArabicAnalyzer::getDefaultStopSet() { static HashSet stopSet; if (!stopSet) { String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); } return stopSet; } TokenStreamPtr ArabicAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result); // the order here is important: the stopword list is not normalized result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = newLucene(result); result = newLucene(result); return result; } TokenStreamPtr ArabicAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { ArabicAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(streams->source); // the order here is important: the stopword list is not normalized streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); streams->result = newLucene(streams->result); streams->result = newLucene(streams->result); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } ArabicAnalyzerSavedStreams::~ArabicAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ar/ArabicLetterTokenizer.cpp000066400000000000000000000020721217574114600322300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicLetterTokenizer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { ArabicLetterTokenizer::ArabicLetterTokenizer(ReaderPtr input) : LetterTokenizer(input) { } ArabicLetterTokenizer::ArabicLetterTokenizer(AttributeSourcePtr source, ReaderPtr input) : LetterTokenizer(source, input) { } ArabicLetterTokenizer::ArabicLetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : LetterTokenizer(factory, input) { } ArabicLetterTokenizer::~ArabicLetterTokenizer() { } bool ArabicLetterTokenizer::isTokenChar(wchar_t c) { return LetterTokenizer::isTokenChar(c) || UnicodeUtil::isNonSpacing(c); } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilter.cpp000066400000000000000000000021261217574114600330720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicNormalizationFilter.h" #include "ArabicNormalizer.h" #include "TermAttribute.h" namespace Lucene { ArabicNormalizationFilter::ArabicNormalizationFilter(TokenStreamPtr input) : TokenFilter(input) { normalizer = newLucene(); termAtt = addAttribute(); } ArabicNormalizationFilter::~ArabicNormalizationFilter() { } bool ArabicNormalizationFilter::incrementToken() { if (input->incrementToken()) { int32_t newlen = normalizer->normalize(termAtt->termBuffer().get(), termAtt->termLength()); termAtt->setTermLength(newlen); return true; } else return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ar/ArabicNormalizer.cpp000066400000000000000000000053351217574114600312250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicNormalizer.h" #include "MiscUtils.h" namespace Lucene { const wchar_t ArabicNormalizer::ALEF = (wchar_t)0x0627; const wchar_t ArabicNormalizer::ALEF_MADDA = (wchar_t)0x0622; const wchar_t ArabicNormalizer::ALEF_HAMZA_ABOVE = (wchar_t)0x0623; const wchar_t ArabicNormalizer::ALEF_HAMZA_BELOW = (wchar_t)0x0625; const wchar_t ArabicNormalizer::YEH = (wchar_t)0x064a; const wchar_t ArabicNormalizer::DOTLESS_YEH = (wchar_t)0x0649; const wchar_t ArabicNormalizer::TEH_MARBUTA = (wchar_t)0x0629; const wchar_t ArabicNormalizer::HEH = (wchar_t)0x0647; const wchar_t ArabicNormalizer::TATWEEL = (wchar_t)0x0640; const wchar_t ArabicNormalizer::FATHATAN = (wchar_t)0x064b; const wchar_t ArabicNormalizer::DAMMATAN = (wchar_t)0x064c; const wchar_t ArabicNormalizer::KASRATAN = (wchar_t)0x064d; const wchar_t ArabicNormalizer::FATHA = (wchar_t)0x064e; const wchar_t ArabicNormalizer::DAMMA = (wchar_t)0x064f; const wchar_t ArabicNormalizer::KASRA = (wchar_t)0x0650; const wchar_t ArabicNormalizer::SHADDA = (wchar_t)0x0651; const wchar_t ArabicNormalizer::SUKUN = (wchar_t)0x0652; ArabicNormalizer::~ArabicNormalizer() { } int32_t ArabicNormalizer::normalize(wchar_t* s, int32_t len) { for (int32_t i = 0; i < len; ++i) { switch (s[i]) { case ALEF_MADDA: case ALEF_HAMZA_ABOVE: case ALEF_HAMZA_BELOW: s[i] = ALEF; break; case DOTLESS_YEH: s[i] = YEH; break; case TEH_MARBUTA: s[i] = HEH; break; case TATWEEL: case KASRATAN: case DAMMATAN: case FATHATAN: case FATHA: case DAMMA: case KASRA: case SHADDA: case SUKUN: len = deleteChar(s, i--, len); break; default: break; } } return len; } int32_t ArabicNormalizer::deleteChar(wchar_t* s, int32_t pos, int32_t len) { if (pos < len) MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); return len - 1; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ar/ArabicStemFilter.cpp000066400000000000000000000020171217574114600311530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicStemFilter.h" #include "ArabicStemmer.h" #include "TermAttribute.h" namespace Lucene { ArabicStemFilter::ArabicStemFilter(TokenStreamPtr input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } ArabicStemFilter::~ArabicStemFilter() { } bool ArabicStemFilter::incrementToken() { if (input->incrementToken()) { int32_t newlen = stemmer->stem(termAtt->termBuffer().get(), termAtt->termLength()); termAtt->setTermLength(newlen); return true; } else return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ar/ArabicStemmer.cpp000066400000000000000000000114651217574114600305200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicStemmer.h" #include "MiscUtils.h" namespace Lucene { const wchar_t ArabicStemmer::ALEF = (wchar_t)0x0627; const wchar_t ArabicStemmer::BEH = (wchar_t)0x0628; const wchar_t ArabicStemmer::TEH_MARBUTA = (wchar_t)0x0629; const wchar_t ArabicStemmer::TEH = (wchar_t)0x062a; const wchar_t ArabicStemmer::FEH = (wchar_t)0x0641; const wchar_t ArabicStemmer::KAF = (wchar_t)0x0643; const wchar_t ArabicStemmer::LAM = (wchar_t)0x0644; const wchar_t ArabicStemmer::NOON = (wchar_t)0x0646; const wchar_t ArabicStemmer::HEH = (wchar_t)0x0647; const wchar_t ArabicStemmer::WAW = (wchar_t)0x0648; const wchar_t ArabicStemmer::YEH = (wchar_t)0x064a; ArabicStemmer::~ArabicStemmer() { } const Collection ArabicStemmer::prefixes() { static Collection _prefixes; if (!_prefixes) { _prefixes = Collection::newInstance(); _prefixes.add(String(L"") + ALEF + LAM); _prefixes.add(String(L"") + WAW + ALEF + LAM); _prefixes.add(String(L"") + BEH + ALEF + LAM); _prefixes.add(String(L"") + KAF + ALEF + LAM); _prefixes.add(String(L"") + FEH + ALEF + LAM); _prefixes.add(String(L"") + LAM + LAM); _prefixes.add(String(L"") + WAW); } return _prefixes; } const Collection ArabicStemmer::suffixes() { static Collection _suffixes; if (!_suffixes) { _suffixes = Collection::newInstance(); _suffixes.add(String(L"") + HEH + ALEF); _suffixes.add(String(L"") + ALEF + NOON); _suffixes.add(String(L"") + ALEF + TEH); _suffixes.add(String(L"") + WAW + NOON); _suffixes.add(String(L"") + YEH + NOON); _suffixes.add(String(L"") + YEH + HEH); _suffixes.add(String(L"") + YEH + TEH_MARBUTA); _suffixes.add(String(L"") + HEH); _suffixes.add(String(L"") + TEH_MARBUTA); _suffixes.add(String(L"") + YEH); } return _suffixes; } int32_t ArabicStemmer::stem(wchar_t* s, int32_t len) { len = stemPrefix(s, len); len = stemSuffix(s, len); return len; } int32_t ArabicStemmer::stemPrefix(wchar_t* s, int32_t len) { Collection stemPrefixes(prefixes()); for (int32_t i = 0; i < stemPrefixes.size(); ++i) { if (startsWith(s, len, stemPrefixes[i])) return deleteChars(s, 0, len, (int32_t)stemPrefixes[i].length()); } return len; } int32_t ArabicStemmer::stemSuffix(wchar_t* s, int32_t len) { Collection stemSuffixes(suffixes()); for (int32_t i = 0; i < stemSuffixes.size(); ++i) { if (endsWith(s, len, stemSuffixes[i])) len = (int32_t)deleteChars(s, (int32_t)(len - stemSuffixes[i].length()), len, (int32_t)stemSuffixes[i].length()); } return len; } bool ArabicStemmer::startsWith(wchar_t* s, int32_t len, const String& prefix) { if (prefix.length() == 1 && len < 4) // wa- prefix requires at least 3 characters return false; else if (len < (int32_t)prefix.length() + 2) // other prefixes require only 2 return false; else { for (int32_t i = 0; i < (int32_t)prefix.length(); ++i) { if (s[i] != prefix[i]) return false; } return true; } } bool ArabicStemmer::endsWith(wchar_t* s, int32_t len, const String& suffix) { if (len < (int32_t)suffix.length() + 2) // all suffixes require at least 2 characters after stemming return false; else { for (int32_t i = 0; i < (int32_t)suffix.length(); ++i) { if (s[len - suffix.length() + i] != suffix[i]) return false; } return true; } } int32_t ArabicStemmer::deleteChars(wchar_t* s, int32_t pos, int32_t len, int32_t chars) { for (int32_t i = 0; i < chars; ++i) len = deleteChar(s, pos, len); return len; } int32_t ArabicStemmer::deleteChar(wchar_t* s, int32_t pos, int32_t len) { if (pos < len) MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); return len - 1; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/br/000077500000000000000000000000001217574114600252705ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/br/BrazilianAnalyzer.cpp000066400000000000000000000113221217574114600314140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "BrazilianAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "BrazilianStemFilter.h" namespace Lucene { const wchar_t* BrazilianAnalyzer::_BRAZILIAN_STOP_WORDS[] = { L"a", L"ainda", L"alem", L"ambas", L"ambos", L"antes", L"ao", L"aonde", L"aos", L"apos", L"aquele", L"aqueles", L"as", L"assim", L"com", L"como", L"contra", L"contudo", L"cuja", L"cujas", L"cujo", L"cujos", L"da", L"das", L"de", L"dela", L"dele", L"deles", L"demais", L"depois", L"desde", L"desta", L"deste", L"dispoe", L"dispoem", L"diversa", L"diversas", L"diversos", L"do", L"dos", L"durante", L"e", L"ela", L"elas", L"ele", L"eles", L"em", L"entao", L"entre", L"essa", L"essas", L"esse", L"esses", L"esta", L"estas", L"este", L"estes", L"ha", L"isso", L"isto", L"logo", L"mais", L"mas", L"mediante", L"menos", L"mesma", L"mesmas", L"mesmo", L"mesmos", L"na", L"nas", L"nao", L"nas", L"nem", L"nesse", L"neste", L"nos", L"o", L"os", L"ou", L"outra", L"outras", L"outro", L"outros", L"pelas", L"pelas", L"pelo", L"pelos", L"perante", L"pois", L"por", L"porque", L"portanto", L"proprio", L"propios", L"quais", L"qual", L"qualquer", L"quando", L"quanto", L"que", L"quem", L"quer", L"se", L"seja", L"sem", L"sendo", L"seu", L"seus", L"sob", L"sobre", L"sua", L"suas", L"tal", L"tambem", L"teu", L"teus", L"toda", L"todas", L"todo", L"todos", L"tua", L"tuas", L"tudo", L"um", L"uma", L"umas", L"uns" }; BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { this->stoptable = stopwords; this->excltable = exclusions; this->matchVersion = matchVersion; } BrazilianAnalyzer::~BrazilianAnalyzer() { } const HashSet BrazilianAnalyzer::getDefaultStopSet() { static HashSet stopSet; if (!stopSet) stopSet = HashSet::newInstance(_BRAZILIAN_STOP_WORDS, _BRAZILIAN_STOP_WORDS + SIZEOF_ARRAY(_BRAZILIAN_STOP_WORDS)); return stopSet; } void BrazilianAnalyzer::setStemExclusionTable(HashSet exclusions) { excltable = exclusions; setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created } TokenStreamPtr BrazilianAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = newLucene(result, excltable); return result; } TokenStreamPtr BrazilianAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { BrazilianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); streams->result = newLucene(streams->result, excltable); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } BrazilianAnalyzerSavedStreams::~BrazilianAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/br/BrazilianStemFilter.cpp000066400000000000000000000030561217574114600317120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "BrazilianStemFilter.h" #include "BrazilianStemmer.h" #include "TermAttribute.h" namespace Lucene { BrazilianStemFilter::BrazilianStemFilter(TokenStreamPtr input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } BrazilianStemFilter::BrazilianStemFilter(TokenStreamPtr input, HashSet exclusiontable) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); exclusions = exclusiontable; } BrazilianStemFilter::~BrazilianStemFilter() { } bool BrazilianStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); // Check the exclusion table. if (!exclusions || !exclusions.contains(term)) { String s(stemmer->stem(term)); // If not stemmed, don't waste the time adjusting the token. if (!s.empty() && s != term) termAtt->setTermBuffer(s); } return true; } else return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/br/BrazilianStemmer.cpp000066400000000000000000001021501217574114600312430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "BrazilianStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { BrazilianStemmer::~BrazilianStemmer() { } String BrazilianStemmer::stem(const String& term) { // creates CT createCT(term); if (!isIndexable(CT)) return L""; if (!isStemmable(CT)) return CT; R1 = getR1(CT); R2 = getR1(R1); RV = getRV(CT); TERM = term + L";" + CT; bool altered = step1(); if (!altered) altered = step2(); if (altered) step3(); else step4(); step5(); return CT; } bool BrazilianStemmer::isStemmable(const String& term) { for (int32_t c = 0; c < (int32_t)term.length(); ++c) { // Discard terms that contain non-letter characters. if (!UnicodeUtil::isAlpha(term[c])) return false; } return true; } bool BrazilianStemmer::isIndexable(const String& term) { return (term.length() < 30) && (term.length() > 2); } bool BrazilianStemmer::isVowel(wchar_t value) { return (value == L'a' || value == L'e' || value == L'i' || value == L'o' || value == L'u'); } String BrazilianStemmer::getR1(const String& value) { if (value.empty()) return L""; // find 1st vowel int32_t i = (int32_t)(value.length() - 1); int32_t j = 0; for (; j < i; ++j) { if (isVowel(value[j])) break; } if (j >= i) return L""; // find 1st non-vowel for (; j < i; ++j) { if (!isVowel(value[j])) break; } if (j >= i) return L""; return value.substr(j + 1); } String BrazilianStemmer::getRV(const String& value) { if (value.empty()) return L""; int32_t i = (int32_t)(value.length() - 1); // RV - IF the second letter is a consonant, RV is the region after the next following vowel if (i > 0 && !isVowel(value[1])) { int32_t j = 2; // find 1st vowel for (; j < i; ++j) { if (isVowel(value[j])) break; } if (j < i) return value.substr(j + 1); } // RV - OR if the first two letters are vowels, RV is the region after the next consonant, if (i > 1 && isVowel(value[0]) && isVowel(value[1])) { int32_t j = 2; // find 1st consonant for (; j < i; ++j) { if (!isVowel(value[j])) break; } if (j < i) return value.substr(j + 1); } // RV - AND otherwise (consonant-vowel case) RV is the region after the third letter. if (i > 2) return value.substr(3); return L""; } String BrazilianStemmer::changeTerm(const String& value) { if (value.empty()) return L""; String lowerValue(StringUtils::toLower(value)); String r; for (int32_t j = 0; j < (int32_t)value.length(); ++j) { if (value[j] == 0x00e1 || value[j] == 0x00e2 || value[j] == 0x00e3) { r += L"a"; continue; } if (value[j] == 0x00e9 || value[j] == 0x00ea) { r += L"e"; continue; } if (value[j] == 0x00ed) { r += L"i"; continue; } if (value[j] == 0x00f3 || value[j] == 0x00f4 || value[j] == 0x00f5) { r += L"o"; continue; } if (value[j] == 0x00fa || value[j] == 0x00fc) { r += L"u"; continue; } if (value[j] == 0x00e7) { r += L"c"; continue; } if (value[j] == 0x00f1) { r += L"n"; continue; } r += value[j]; } return r ; } bool BrazilianStemmer::checkSuffix(const String& value, const String& suffix) { if (value.empty() || suffix.empty()) return false; if (suffix.length() > value.length()) return false; return (value.substr(value.length() - suffix.length()) == suffix); } String BrazilianStemmer::replaceSuffix(const String& value, const String& toReplace, const String& changeTo) { if (value.empty() || toReplace.empty() || changeTo.empty()) return value; String vvalue = removeSuffix(value, toReplace); if (value == vvalue) return value; else return vvalue + changeTo; } String BrazilianStemmer::removeSuffix(const String& value, const String& toRemove) { if (value.empty() || toRemove.empty() || !checkSuffix(value, toRemove)) return value; return value.substr(0, value.length() - toRemove.length()); } bool BrazilianStemmer::suffixPreceded(const String& value, const String& suffix, const String& preceded) { if (value.empty() || suffix.empty() || preceded.empty() || !checkSuffix(value, suffix)) return false; return checkSuffix(removeSuffix(value, suffix), preceded); } void BrazilianStemmer::createCT(const String& term) { CT = changeTerm(term); if (CT.length() < 2) return; // if the first character is ... , remove it if (CT[0] == L'"' || CT[0] == L'\'' || CT[0] == L'-' || CT[0] == L',' || CT[0] == L';' || CT[0] == L'.' || CT[0] == L'?' || CT[0] == L'!') CT = CT.substr(1); if (CT.length() < 2) return; // if the last character is ... , remove it if (CT[CT.length() - 1] == L'-' || CT[CT.length() - 1] == L',' || CT[CT.length() - 1] == L';' || CT[CT.length() - 1] == L'.' || CT[CT.length() - 1] == L'?' || CT[CT.length() - 1] == L'!' || CT[CT.length() - 1] == L'\'' || CT[CT.length() - 1] == L'"') CT = CT.substr(0, CT.length() - 1); } bool BrazilianStemmer::step1() { if (CT.empty()) return false; // suffix length = 7 if (checkSuffix(CT, L"uciones") && checkSuffix(R2, L"uciones")) { CT = replaceSuffix(CT, L"uciones", L"u"); return true; } // suffix length = 6 if (CT.length() >= 6) { if (checkSuffix(CT, L"imentos") && checkSuffix(R2, L"imentos")) { CT = removeSuffix(CT, L"imentos"); return true; } if (checkSuffix(CT, L"amentos") && checkSuffix(R2, L"amentos")) { CT = removeSuffix(CT, L"amentos"); return true; } if (checkSuffix(CT, L"adores") && checkSuffix(R2, L"adores")) { CT = removeSuffix(CT, L"adores"); return true; } if (checkSuffix(CT, L"adoras") && checkSuffix(R2, L"adoras")) { CT = removeSuffix(CT, L"adoras"); return true; } if (checkSuffix(CT, L"logias") && checkSuffix(R2, L"logias")) { replaceSuffix(CT, L"logias", L"log"); return true; } if (checkSuffix(CT, L"encias") && checkSuffix(R2, L"encias")) { CT = replaceSuffix(CT, L"encias", L"ente"); return true; } if (checkSuffix(CT, L"amente") && checkSuffix(R1, L"amente")) { CT = removeSuffix(CT, L"amente"); return true; } if (checkSuffix(CT, L"idades") && checkSuffix(R2, L"idades")) { CT = removeSuffix(CT, L"idades"); return true; } } // suffix length = 5 if (CT.length() >= 5) { if (checkSuffix(CT, L"acoes") && checkSuffix(R2, L"acoes")) { CT = removeSuffix(CT, L"acoes"); return true; } if (checkSuffix(CT, L"imento") && checkSuffix(R2, L"imento")) { CT = removeSuffix(CT, L"imento"); return true; } if (checkSuffix(CT, L"amento") && checkSuffix(R2, L"amento")) { CT = removeSuffix(CT, L"amento"); return true; } if (checkSuffix(CT, L"adora") && checkSuffix(R2, L"adora")) { CT = removeSuffix(CT, L"adora"); return true; } if (checkSuffix(CT, L"ismos") && checkSuffix(R2, L"ismos")) { CT = removeSuffix(CT, L"ismos"); return true; } if (checkSuffix(CT, L"istas") && checkSuffix(R2, L"istas")) { CT = removeSuffix(CT, L"istas"); return true; } if (checkSuffix(CT, L"logia") && checkSuffix(R2, L"logia")) { CT = replaceSuffix(CT, L"logia", L"log"); return true; } if (checkSuffix(CT, L"ucion") && checkSuffix(R2, L"ucion")) { CT = replaceSuffix(CT, L"ucion", L"u"); return true; } if (checkSuffix(CT, L"encia") && checkSuffix(R2, L"encia")) { CT = replaceSuffix(CT, L"encia", L"ente"); return true; } if (checkSuffix(CT, L"mente") && checkSuffix(R2, L"mente")) { CT = removeSuffix(CT, L"mente"); return true; } if (checkSuffix(CT, L"idade") && checkSuffix(R2, L"idade")) { CT = removeSuffix(CT, L"idade"); return true; } } // suffix length = 4 if (CT.length() >= 4) { if (checkSuffix(CT, L"acao") && checkSuffix(R2, L"acao")) { CT = removeSuffix(CT, L"acao"); return true; } if (checkSuffix(CT, L"ezas") && checkSuffix(R2, L"ezas")) { CT = removeSuffix(CT, L"ezas"); return true; } if (checkSuffix(CT, L"icos") && checkSuffix(R2, L"icos")) { CT = removeSuffix(CT, L"icos"); return true; } if (checkSuffix(CT, L"icas") && checkSuffix(R2, L"icas")) { CT = removeSuffix(CT, L"icas"); return true; } if (checkSuffix(CT, L"ismo") && checkSuffix(R2, L"ismo")) { CT = removeSuffix(CT, L"ismo"); return true; } if (checkSuffix(CT, L"avel") && checkSuffix(R2, L"avel")) { CT = removeSuffix(CT, L"avel"); return true; } if (checkSuffix(CT, L"ivel") && checkSuffix(R2, L"ivel")) { CT = removeSuffix(CT, L"ivel"); return true; } if (checkSuffix(CT, L"ista") && checkSuffix(R2, L"ista")) { CT = removeSuffix(CT, L"ista"); return true; } if (checkSuffix(CT, L"osos") && checkSuffix(R2, L"osos")) { CT = removeSuffix(CT, L"osos"); return true; } if (checkSuffix(CT, L"osas") && checkSuffix(R2, L"osas")) { CT = removeSuffix(CT, L"osas"); return true; } if (checkSuffix(CT, L"ador") && checkSuffix(R2, L"ador")) { CT = removeSuffix(CT, L"ador"); return true; } if (checkSuffix(CT, L"ivas") && checkSuffix(R2, L"ivas")) { CT = removeSuffix(CT, L"ivas"); return true; } if (checkSuffix(CT, L"ivos") && checkSuffix(R2, L"ivos")) { CT = removeSuffix(CT, L"ivos"); return true; } if (checkSuffix(CT, L"iras") && checkSuffix(RV, L"iras") && suffixPreceded(CT, L"iras", L"e")) { CT = replaceSuffix(CT, L"iras", L"ir"); return true; } } // suffix length = 3 if (CT.length() >= 3) { if (checkSuffix(CT, L"eza") && checkSuffix(R2, L"eza")) { CT = removeSuffix(CT, L"eza"); return true; } if (checkSuffix(CT, L"ico") && checkSuffix(R2, L"ico")) { CT = removeSuffix(CT, L"ico"); return true; } if (checkSuffix(CT, L"ica") && checkSuffix(R2, L"ica")) { CT = removeSuffix(CT, L"ica"); return true; } if (checkSuffix(CT, L"oso") && checkSuffix(R2, L"oso")) { CT = removeSuffix(CT, L"oso"); return true; } if (checkSuffix(CT, L"osa") && checkSuffix(R2, L"osa")) { CT = removeSuffix(CT, L"osa"); return true; } if (checkSuffix(CT, L"iva") && checkSuffix(R2, L"iva")) { CT = removeSuffix(CT, L"iva"); return true; } if (checkSuffix(CT, L"ivo") && checkSuffix(R2, L"ivo")) { CT = removeSuffix(CT, L"ivo"); return true; } if (checkSuffix(CT, L"ira") && checkSuffix(RV, L"ira") && suffixPreceded(CT, L"ira", L"e")) { CT = replaceSuffix(CT, L"ira", L"ir"); return true; } } // no ending was removed by step1 return false; } bool BrazilianStemmer::step2() { if (RV.empty()) return false; // suffix lenght = 7 if (RV.length() >= 7) { if (checkSuffix(RV, L"issemos")) { CT = removeSuffix(CT, L"issemos"); return true; } if (checkSuffix(RV, L"essemos")) { CT = removeSuffix(CT, L"essemos"); return true; } if (checkSuffix(RV, L"assemos")) { CT = removeSuffix(CT, L"assemos"); return true; } if (checkSuffix(RV, L"ariamos")) { CT = removeSuffix(CT, L"ariamos"); return true; } if (checkSuffix(RV, L"eriamos")) { CT = removeSuffix(CT, L"eriamos"); return true; } if (checkSuffix(RV, L"iriamos")) { CT = removeSuffix(CT, L"iriamos"); return true; } } // suffix length = 6 if (RV.length() >= 6) { if (checkSuffix(RV, L"iremos")) { CT = removeSuffix(CT, L"iremos"); return true; } if (checkSuffix(RV, L"eremos")) { CT = removeSuffix(CT, L"eremos"); return true; } if (checkSuffix(RV, L"aremos")) { CT = removeSuffix(CT, L"aremos"); return true; } if (checkSuffix(RV, L"avamos")) { CT = removeSuffix(CT, L"avamos"); return true; } if (checkSuffix(RV, L"iramos")) { CT = removeSuffix(CT, L"iramos"); return true; } if (checkSuffix(RV, L"eramos")) { CT = removeSuffix(CT, L"eramos"); return true; } if (checkSuffix(RV, L"aramos")) { CT = removeSuffix(CT, L"aramos"); return true; } if (checkSuffix(RV, L"asseis")) { CT = removeSuffix(CT, L"asseis"); return true; } if (checkSuffix(RV, L"esseis")) { CT = removeSuffix(CT, L"esseis"); return true; } if (checkSuffix(RV, L"isseis")) { CT = removeSuffix(CT, L"isseis"); return true; } if (checkSuffix(RV, L"arieis")) { CT = removeSuffix(CT, L"arieis"); return true; } if (checkSuffix(RV, L"erieis")) { CT = removeSuffix(CT, L"erieis"); return true; } if (checkSuffix(RV, L"irieis")) { CT = removeSuffix(CT, L"irieis"); return true; } } // suffix length = 5 if (RV.length() >= 5) { if (checkSuffix(RV, L"irmos")) { CT = removeSuffix(CT, L"irmos"); return true; } if (checkSuffix(RV, L"iamos")) { CT = removeSuffix(CT, L"iamos"); return true; } if (checkSuffix(RV, L"armos")) { CT = removeSuffix(CT, L"armos"); return true; } if (checkSuffix(RV, L"ermos")) { CT = removeSuffix(CT, L"ermos"); return true; } if (checkSuffix(RV, L"areis")) { CT = removeSuffix(CT, L"areis"); return true; } if (checkSuffix(RV, L"ereis")) { CT = removeSuffix(CT, L"ereis"); return true; } if (checkSuffix(RV, L"ireis")) { CT = removeSuffix(CT, L"ireis"); return true; } if (checkSuffix(RV, L"asses")) { CT = removeSuffix(CT, L"asses"); return true; } if (checkSuffix(RV, L"esses")) { CT = removeSuffix(CT, L"esses"); return true; } if (checkSuffix(RV, L"isses")) { CT = removeSuffix(CT, L"isses"); return true; } if (checkSuffix(RV, L"astes")) { CT = removeSuffix(CT, L"astes"); return true; } if (checkSuffix(RV, L"assem")) { CT = removeSuffix(CT, L"assem"); return true; } if (checkSuffix(RV, L"essem")) { CT = removeSuffix(CT, L"essem"); return true; } if (checkSuffix(RV, L"issem")) { CT = removeSuffix(CT, L"issem"); return true; } if (checkSuffix(RV, L"ardes")) { CT = removeSuffix(CT, L"ardes"); return true; } if (checkSuffix(RV, L"erdes")) { CT = removeSuffix(CT, L"erdes"); return true; } if (checkSuffix(RV, L"irdes")) { CT = removeSuffix(CT, L"irdes"); return true; } if (checkSuffix(RV, L"ariam")) { CT = removeSuffix(CT, L"ariam"); return true; } if (checkSuffix(RV, L"eriam")) { CT = removeSuffix(CT, L"eriam"); return true; } if (checkSuffix(RV, L"iriam")) { CT = removeSuffix(CT, L"iriam"); return true; } if (checkSuffix(RV, L"arias")) { CT = removeSuffix(CT, L"arias"); return true; } if (checkSuffix(RV, L"erias")) { CT = removeSuffix(CT, L"erias"); return true; } if (checkSuffix(RV, L"irias")) { CT = removeSuffix(CT, L"irias"); return true; } if (checkSuffix(RV, L"estes")) { CT = removeSuffix(CT, L"estes"); return true; } if (checkSuffix(RV, L"istes")) { CT = removeSuffix(CT, L"istes"); return true; } if (checkSuffix(RV, L"areis")) { CT = removeSuffix(CT, L"areis"); return true; } if (checkSuffix(RV, L"aveis")) { CT = removeSuffix(CT, L"aveis"); return true; } } // suffix length = 4 if (RV.length() >= 4) { if (checkSuffix(RV, L"aria")) { CT = removeSuffix(CT, L"aria"); return true; } if (checkSuffix(RV, L"eria")) { CT = removeSuffix(CT, L"eria"); return true; } if (checkSuffix(RV, L"iria")) { CT = removeSuffix(CT, L"iria"); return true; } if (checkSuffix(RV, L"asse")) { CT = removeSuffix(CT, L"asse"); return true; } if (checkSuffix(RV, L"esse")) { CT = removeSuffix(CT, L"esse"); return true; } if (checkSuffix(RV, L"isse")) { CT = removeSuffix(CT, L"isse"); return true; } if (checkSuffix(RV, L"aste")) { CT = removeSuffix(CT, L"aste"); return true; } if (checkSuffix(RV, L"este")) { CT = removeSuffix(CT, L"este"); return true; } if (checkSuffix(RV, L"iste")) { CT = removeSuffix(CT, L"iste"); return true; } if (checkSuffix(RV, L"arei")) { CT = removeSuffix(CT, L"arei"); return true; } if (checkSuffix(RV, L"erei")) { CT = removeSuffix(CT, L"erei"); return true; } if (checkSuffix(RV, L"irei")) { CT = removeSuffix(CT, L"irei"); return true; } if (checkSuffix(RV, L"aram")) { CT = removeSuffix(CT, L"aram"); return true; } if (checkSuffix(RV, L"eram")) { CT = removeSuffix(CT, L"eram"); return true; } if (checkSuffix(RV, L"iram")) { CT = removeSuffix(CT, L"iram"); return true; } if (checkSuffix(RV, L"avam")) { CT = removeSuffix(CT, L"avam"); return true; } if (checkSuffix(RV, L"arem")) { CT = removeSuffix(CT, L"arem"); return true; } if (checkSuffix(RV, L"erem")) { CT = removeSuffix(CT, L"erem"); return true; } if (checkSuffix(RV, L"irem")) { CT = removeSuffix(CT, L"irem"); return true; } if (checkSuffix(RV, L"ando")) { CT = removeSuffix(CT, L"ando"); return true; } if (checkSuffix(RV, L"endo")) { CT = removeSuffix(CT, L"endo"); return true; } if (checkSuffix(RV, L"indo")) { CT = removeSuffix(CT, L"indo"); return true; } if (checkSuffix(RV, L"arao")) { CT = removeSuffix(CT, L"arao"); return true; } if (checkSuffix(RV, L"erao")) { CT = removeSuffix(CT, L"erao"); return true; } if (checkSuffix(RV, L"irao")) { CT = removeSuffix(CT, L"irao"); return true; } if (checkSuffix(RV, L"adas")) { CT = removeSuffix(CT, L"adas"); return true; } if (checkSuffix(RV, L"idas")) { CT = removeSuffix(CT, L"idas"); return true; } if (checkSuffix(RV, L"aras")) { CT = removeSuffix(CT, L"aras"); return true; } if (checkSuffix(RV, L"eras")) { CT = removeSuffix(CT, L"eras"); return true; } if (checkSuffix(RV, L"iras")) { CT = removeSuffix(CT, L"iras"); return true; } if (checkSuffix(RV, L"avas")) { CT = removeSuffix(CT, L"avas"); return true; } if (checkSuffix(RV, L"ares")) { CT = removeSuffix(CT, L"ares"); return true; } if (checkSuffix(RV, L"eres")) { CT = removeSuffix(CT, L"eres"); return true; } if (checkSuffix(RV, L"ires")) { CT = removeSuffix(CT, L"ires"); return true; } if (checkSuffix(RV, L"ados")) { CT = removeSuffix(CT, L"ados"); return true; } if (checkSuffix(RV, L"idos")) { CT = removeSuffix(CT, L"idos"); return true; } if (checkSuffix(RV, L"amos")) { CT = removeSuffix(CT, L"amos"); return true; } if (checkSuffix(RV, L"emos")) { CT = removeSuffix(CT, L"emos"); return true; } if (checkSuffix(RV, L"imos")) { CT = removeSuffix(CT, L"imos"); return true; } if (checkSuffix(RV, L"iras")) { CT = removeSuffix(CT, L"iras"); return true; } if (checkSuffix(RV, L"ieis")) { CT = removeSuffix(CT, L"ieis"); return true; } } // suffix length = 3 if (RV.length() >= 3) { if (checkSuffix(RV, L"ada")) { CT = removeSuffix(CT, L"ada"); return true; } if (checkSuffix(RV, L"ida")) { CT = removeSuffix(CT, L"ida"); return true; } if (checkSuffix(RV, L"ara")) { CT = removeSuffix(CT, L"ara"); return true; } if (checkSuffix(RV, L"era")) { CT = removeSuffix(CT, L"era"); return true; } if (checkSuffix(RV, L"ira")) { CT = removeSuffix(CT, L"ava"); return true; } if (checkSuffix(RV, L"iam")) { CT = removeSuffix(CT, L"iam"); return true; } if (checkSuffix(RV, L"ado")) { CT = removeSuffix(CT, L"ado"); return true; } if (checkSuffix(RV, L"ido")) { CT = removeSuffix(CT, L"ido"); return true; } if (checkSuffix(RV, L"ias")) { CT = removeSuffix(CT, L"ias"); return true; } if (checkSuffix(RV, L"ais")) { CT = removeSuffix(CT, L"ais"); return true; } if (checkSuffix(RV, L"eis")) { CT = removeSuffix(CT, L"eis"); return true; } if (checkSuffix(RV, L"ira")) { CT = removeSuffix(CT, L"ira"); return true; } if (checkSuffix(RV, L"ear")) { CT = removeSuffix(CT, L"ear"); return true; } } // suffix length = 2 if (RV.length() >= 2) { if (checkSuffix(RV, L"ia")) { CT = removeSuffix(CT, L"ia"); return true; } if (checkSuffix(RV, L"ei")) { CT = removeSuffix(CT, L"ei"); return true; } if (checkSuffix(RV, L"am")) { CT = removeSuffix(CT, L"am"); return true; } if (checkSuffix(RV, L"em")) { CT = removeSuffix(CT, L"em"); return true; } if (checkSuffix(RV, L"ar")) { CT = removeSuffix(CT, L"ar"); return true; } if (checkSuffix(RV, L"er")) { CT = removeSuffix(CT, L"er"); return true; } if (checkSuffix(RV, L"ir")) { CT = removeSuffix(CT, L"ir"); return true; } if (checkSuffix(RV, L"as")) { CT = removeSuffix(CT, L"as"); return true; } if (checkSuffix(RV, L"es")) { CT = removeSuffix(CT, L"es"); return true; } if (checkSuffix(RV, L"is")) { CT = removeSuffix(CT, L"is"); return true; } if (checkSuffix(RV, L"eu")) { CT = removeSuffix(CT, L"eu"); return true; } if (checkSuffix(RV, L"iu")) { CT = removeSuffix(CT, L"iu"); return true; } if (checkSuffix(RV, L"iu")) { CT = removeSuffix(CT, L"iu"); return true; } if (checkSuffix(RV, L"ou")) { CT = removeSuffix(CT, L"ou"); return true; } } // no ending was removed by step2 return false; } void BrazilianStemmer::step3() { if (RV.empty()) return; if (checkSuffix(RV, L"i") && suffixPreceded(RV, L"i", L"c")) CT = removeSuffix(CT, L"i"); } void BrazilianStemmer::step4() { if (RV.empty()) return; if (checkSuffix(RV, L"os")) { CT = removeSuffix(CT, L"os"); return; } if (checkSuffix(RV, L"a")) { CT = removeSuffix(CT, L"a"); return; } if (checkSuffix(RV, L"i")) { CT = removeSuffix(CT, L"i"); return; } if (checkSuffix(RV, L"o")) { CT = removeSuffix(CT, L"o"); return; } } void BrazilianStemmer::step5() { if (RV.empty()) return; if (checkSuffix(RV, L"e")) { if (suffixPreceded(RV, L"e", L"gu")) { CT = removeSuffix(CT, L"e"); CT = removeSuffix(CT, L"u"); return; } if (suffixPreceded(RV, L"e", L"ci")) { CT = removeSuffix(CT, L"e"); CT = removeSuffix(CT, L"i"); return; } CT = removeSuffix(CT, L"e"); return; } } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/cjk/000077500000000000000000000000001217574114600254345ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/cjk/CJKAnalyzer.cpp000066400000000000000000000047731217574114600302700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "CJKAnalyzer.h" #include "CJKTokenizer.h" #include "StopFilter.h" namespace Lucene { const wchar_t* CJKAnalyzer::_STOP_WORDS[] = { L"a", L"and", L"are", L"as", L"at", L"be", L"but", L"by", L"for", L"if", L"in", L"into", L"is", L"it", L"no", L"not", L"of", L"on", L"or", L"s", L"such", L"t", L"that", L"the", L"their", L"then", L"there", L"these", L"they", L"this", L"to", L"was", L"will", L"with", L"", L"www" }; CJKAnalyzer::CJKAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } CJKAnalyzer::CJKAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } CJKAnalyzer::~CJKAnalyzer() { } const HashSet CJKAnalyzer::getDefaultStopSet() { static HashSet stopSet; if (!stopSet) stopSet = HashSet::newInstance(_STOP_WORDS, _STOP_WORDS + SIZEOF_ARRAY(_STOP_WORDS)); return stopSet; } TokenStreamPtr CJKAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), newLucene(reader), stoptable); } TokenStreamPtr CJKAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { CJKAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->source, stoptable); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } CJKAnalyzerSavedStreams::~CJKAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/cjk/CJKTokenizer.cpp000066400000000000000000000216101217574114600304420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "CJKTokenizer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "TypeAttribute.h" #include "Reader.h" #include "CharFolder.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { /// Word token type const int32_t CJKTokenizer::WORD_TYPE = 0; /// Single byte token type const int32_t CJKTokenizer::SINGLE_TOKEN_TYPE = 1; /// Double byte token type const int32_t CJKTokenizer::DOUBLE_TOKEN_TYPE = 2; /// Names for token types const wchar_t* CJKTokenizer::TOKEN_TYPE_NAMES[] = {L"word", L"single", L"double"}; const int32_t CJKTokenizer::MAX_WORD_LEN = 255; const int32_t CJKTokenizer::IO_BUFFER_SIZE = 256; CJKTokenizer::CJKTokenizer(ReaderPtr input) : Tokenizer(input) { } CJKTokenizer::CJKTokenizer(AttributeSourcePtr source, ReaderPtr input) : Tokenizer(source, input) { } CJKTokenizer::CJKTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : Tokenizer(factory, input) { } CJKTokenizer::~CJKTokenizer() { } void CJKTokenizer::initialize() { offset = 0; bufferIndex = 0; dataLen = 0; buffer = CharArray::newInstance(MAX_WORD_LEN); ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); tokenType = WORD_TYPE; preIsTokened = false; termAtt = addAttribute(); offsetAtt = addAttribute(); typeAtt = addAttribute(); } CJKTokenizer::UnicodeBlock CJKTokenizer::unicodeBlock(wchar_t c) { if (c >= 0x0000 && c <= 0x007f) return BASIC_LATIN; else if (c >= 0xff00 && c <= 0xffef) return HALFWIDTH_AND_FULLWIDTH_FORMS; return NONE; } bool CJKTokenizer::incrementToken() { clearAttributes(); while (true) // loop until we find a non-empty token { int32_t length = 0; // the position used to create Token int32_t start = offset; while (true) // loop until we've found a full token { wchar_t c = 0; UnicodeBlock ub = NONE; ++offset; if (bufferIndex >= dataLen) { dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); bufferIndex = 0; } if (dataLen == -1) { if (length > 0) { if (preIsTokened == true) { length = 0; preIsTokened = false; } else --offset; break; } else { --offset; return false; } } else { // get current character c = ioBuffer[bufferIndex++]; // get the UnicodeBlock of the current character ub = unicodeBlock(c); } // if the current character is ASCII or Extend ASCII if (ub == BASIC_LATIN || ub == HALFWIDTH_AND_FULLWIDTH_FORMS) { if (ub == HALFWIDTH_AND_FULLWIDTH_FORMS) { int32_t i = (int32_t)c; if (i >= 65281 && i <= 65374) { // convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN i = i - 65248; c = (wchar_t)i; } } // if the current character is a letter or "_" "+" "#" if (UnicodeUtil::isAlnum(c) || c == L'_' || c == L'+' || c == L'#') { if (length == 0) { // "javaC1C2C3C4linux"
// ^--: the current character begin to token the ASCII // letter start = offset - 1; } else if (tokenType == DOUBLE_TOKEN_TYPE) { // "javaC1C2C3C4linux"
// ^--: the previous non-ASCII // : the current character --offset; --bufferIndex; if (preIsTokened) { // there is only one non-ASCII has been stored length = 0; preIsTokened = false; break; } else break; } // store the LowerCase(c) in the buffer buffer[length++] = CharFolder::toLower(c); tokenType = SINGLE_TOKEN_TYPE; // break the procedure if buffer overflowed! if (length == MAX_WORD_LEN) break; } else if (length > 0) { if (preIsTokened) { length = 0; preIsTokened = false; } else break; } } else { // non-ASCII letter, e.g."C1C2C3C4" if (UnicodeUtil::isAlpha(c)) { if (length == 0) { start = offset - 1; buffer[length++] = c; tokenType = DOUBLE_TOKEN_TYPE; } else { if (tokenType == SINGLE_TOKEN_TYPE) { --offset; --bufferIndex; // return the previous ASCII characters break; } else { buffer[length++] = c; tokenType = DOUBLE_TOKEN_TYPE; if (length == 2) { --offset; --bufferIndex; preIsTokened = true; break; } } } } else if (length > 0) { if (preIsTokened) { // empty the buffer length = 0; preIsTokened = false; } else break; } } } if (length > 0) { termAtt->setTermBuffer(buffer.get(), 0, length); offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); typeAtt->setType(TOKEN_TYPE_NAMES[tokenType]); return true; } else if (dataLen == -1) { --offset; return false; } // Cycle back and try for the next token (don't return an empty string) } } void CJKTokenizer::end() { // set final offset int32_t finalOffset = correctOffset(offset); offsetAtt->setOffset(finalOffset, finalOffset); } void CJKTokenizer::reset() { Tokenizer::reset(); offset = 0; bufferIndex = 0; dataLen = 0; preIsTokened = false; tokenType = WORD_TYPE; } void CJKTokenizer::reset(ReaderPtr input) { Tokenizer::reset(input); reset(); } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/cn/000077500000000000000000000000001217574114600252655ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/cn/ChineseAnalyzer.cpp000066400000000000000000000027131217574114600310600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ChineseAnalyzer.h" #include "ChineseTokenizer.h" #include "ChineseFilter.h" namespace Lucene { ChineseAnalyzer::~ChineseAnalyzer() { } TokenStreamPtr ChineseAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result); return result; } TokenStreamPtr ChineseAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { ChineseAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(streams->source); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } ChineseAnalyzerSavedStreams::~ChineseAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp000066400000000000000000000036211217574114600305170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ChineseFilter.h" #include "TermAttribute.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { const wchar_t* ChineseFilter::STOP_WORDS[] = { L"and", L"are", L"as", L"at", L"be", L"but", L"by", L"for", L"if", L"in", L"into", L"is", L"it", L"no", L"not", L"of", L"on", L"or", L"such", L"that", L"the", L"their", L"then", L"there", L"these", L"they", L"this", L"to", L"was", L"will", L"with" }; ChineseFilter::ChineseFilter(TokenStreamPtr input) : TokenFilter(input) { stopTable = HashSet::newInstance(STOP_WORDS, STOP_WORDS + SIZEOF_ARRAY(STOP_WORDS)); termAtt = addAttribute(); } ChineseFilter::~ChineseFilter() { } bool ChineseFilter::incrementToken() { while (input->incrementToken()) { String text(termAtt->term()); if (!stopTable.contains(text)) { if (UnicodeUtil::isLower(text[0]) || UnicodeUtil::isUpper(text[0])) { // English word/token should larger than 1 character. if (text.length() > 1) return true; } else if (UnicodeUtil::isOther(text[0])) { // One Chinese character as one Chinese word. // Chinese word extraction to be added later here. return true; } } } return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp000066400000000000000000000070621217574114600312470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ChineseTokenizer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "Reader.h" #include "CharFolder.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { const int32_t ChineseTokenizer::MAX_WORD_LEN = 255; const int32_t ChineseTokenizer::IO_BUFFER_SIZE = 1024; ChineseTokenizer::ChineseTokenizer(ReaderPtr input) : Tokenizer(input) { } ChineseTokenizer::ChineseTokenizer(AttributeSourcePtr source, ReaderPtr input) : Tokenizer(source, input) { } ChineseTokenizer::ChineseTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : Tokenizer(factory, input) { } ChineseTokenizer::~ChineseTokenizer() { } void ChineseTokenizer::initialize() { offset = 0; bufferIndex = 0; dataLen = 0; buffer = CharArray::newInstance(MAX_WORD_LEN); ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); length = 0; start = 0; termAtt = addAttribute(); offsetAtt = addAttribute(); } void ChineseTokenizer::push(wchar_t c) { if (length == 0) start = offset - 1; // start of token buffer[length++] = CharFolder::toLower(c); // buffer it } bool ChineseTokenizer::flush() { if (length > 0) { termAtt->setTermBuffer(buffer.get(), 0, length); offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); return true; } else return false; } bool ChineseTokenizer::incrementToken() { clearAttributes(); length = 0; start = offset; while (true) { wchar_t c; ++offset; if (bufferIndex >= dataLen) { dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); bufferIndex = 0; } if (dataLen == -1) { --offset; return flush(); } else c = ioBuffer[bufferIndex++]; if (UnicodeUtil::isDigit(c) || UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) { push(c); if (length == MAX_WORD_LEN) return flush(); } else if (UnicodeUtil::isOther(c)) { if (length > 0) { --bufferIndex; --offset; return flush(); } push(c); return flush(); } else if (length > 0) return flush(); } } void ChineseTokenizer::end() { // set final offset int32_t finalOffset = correctOffset(offset); offsetAtt->setOffset(finalOffset, finalOffset); } void ChineseTokenizer::reset() { Tokenizer::reset(); offset = 0; bufferIndex = 0; dataLen = 0; } void ChineseTokenizer::reset(ReaderPtr input) { Tokenizer::reset(input); reset(); } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/cz/000077500000000000000000000000001217574114600253015ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/cz/CzechAnalyzer.cpp000066400000000000000000000203171217574114600305520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "CzechAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Czech stopwords in UTF-8 format. const uint8_t CzechAnalyzer::_CZECH_STOP_WORDS[] = { 0x61, 0x0a, 0x73, 0x0a, 0x6b, 0x0a, 0x6f, 0x0a, 0x69, 0x0a, 0x75, 0x0a, 0x76, 0x0a, 0x7a, 0x0a, 0x64, 0x6e, 0x65, 0x73, 0x0a, 0x63, 0x7a, 0x0a, 0x74, 0xc3, 0xad, 0x6d, 0x74, 0x6f, 0x0a, 0x62, 0x75, 0x64, 0x65, 0xc5, 0xa1, 0x0a, 0x62, 0x75, 0x64, 0x65, 0x6d, 0x0a, 0x62, 0x79, 0x6c, 0x69, 0x0a, 0x6a, 0x73, 0x65, 0xc5, 0xa1, 0x0a, 0x6d, 0x75, 0x6a, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x6d, 0x0a, 0x74, 0x61, 0x0a, 0x74, 0x6f, 0x6d, 0x74, 0x6f, 0x0a, 0x74, 0x6f, 0x68, 0x6c, 0x65, 0x0a, 0x74, 0x75, 0x74, 0x6f, 0x0a, 0x74, 0x79, 0x74, 0x6f, 0x0a, 0x6a, 0x65, 0x6a, 0x0a, 0x7a, 0x64, 0x61, 0x0a, 0x70, 0x72, 0x6f, 0x63, 0x0a, 0x6d, 0xc3, 0xa1, 0x74, 0x65, 0x0a, 0x74, 0x61, 0x74, 0x6f, 0x0a, 0x6b, 0x61, 0x6d, 0x0a, 0x74, 0x6f, 0x68, 0x6f, 0x74, 0x6f, 0x0a, 0x6b, 0x64, 0x6f, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xad, 0x0a, 0x6d, 0x69, 0x0a, 0x6e, 0xc3, 0xa1, 0x6d, 0x0a, 0x74, 0x6f, 0x6d, 0x0a, 0x74, 0x6f, 0x6d, 0x75, 0x74, 0x6f, 0x0a, 0x6d, 0xc3, 0xad, 0x74, 0x0a, 0x6e, 0x69, 0x63, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0x6f, 0x75, 0x0a, 0x62, 0x79, 0x6c, 0x61, 0x0a, 0x74, 0x6f, 0x68, 0x6f, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0xc5, 0xbe, 0x65, 0x0a, 0x61, 0x73, 0x69, 0x0a, 0x68, 0x6f, 0x0a, 0x6e, 0x61, 0xc5, 0xa1, 0x69, 0x0a, 0x6e, 0x61, 0x70, 0x69, 0xc5, 0xa1, 0x74, 0x65, 0x0a, 0x72, 0x65, 0x0a, 0x63, 0x6f, 0xc5, 0xbe, 0x0a, 0x74, 0xc3, 0xad, 0x6d, 0x0a, 0x74, 0x61, 0x6b, 0xc5, 0xbe, 0x65, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x63, 0x68, 0x0a, 0x6a, 0x65, 0x6a, 0xc3, 0xad, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x6d, 0x69, 0x0a, 0x6a, 0x73, 0x74, 0x65, 0x0a, 0x61, 0x6a, 0x0a, 0x74, 0x75, 0x0a, 0x74, 0x65, 0x64, 0x79, 0x0a, 0x74, 0x65, 0x74, 0x6f, 0x0a, 0x62, 0x79, 0x6c, 0x6f, 0x0a, 0x6b, 0x64, 0x65, 0x0a, 0x6b, 0x65, 0x0a, 0x70, 0x72, 0x61, 0x76, 0xc3, 0xa9, 0x0a, 0x6a, 0x69, 0x0a, 0x6e, 0x61, 0x64, 0x0a, 0x6e, 0x65, 0x6a, 0x73, 0x6f, 0x75, 0x0a, 0x63, 0x69, 0x0a, 0x70, 0x6f, 0x64, 0x0a, 0x74, 0xc3, 0xa9, 0x6d, 0x61, 0x0a, 0x6d, 0x65, 0x7a, 0x69, 0x0a, 0x70, 0x72, 0x65, 0x73, 0x0a, 0x74, 0x79, 0x0a, 0x70, 0x61, 0x6b, 0x0a, 0x76, 0xc3, 0xa1, 0x6d, 0x0a, 0x61, 0x6e, 0x69, 0x0a, 0x6b, 0x64, 0x79, 0xc5, 0xbe, 0x0a, 0x76, 0xc5, 0xa1, 0x61, 0x6b, 0x0a, 0x6e, 0x65, 0x67, 0x0a, 0x6a, 0x73, 0x65, 0x6d, 0x0a, 0x74, 0x65, 0x6e, 0x74, 0x6f, 0x0a, 0x63, 0x6c, 0xc3, 0xa1, 0x6e, 0x6b, 0x75, 0x0a, 0x63, 0x6c, 0xc3, 0xa1, 0x6e, 0x6b, 0x79, 0x0a, 0x61, 0x62, 0x79, 0x0a, 0x6a, 0x73, 0x6d, 0x65, 0x0a, 0x70, 0x72, 0x65, 0x64, 0x0a, 0x70, 0x74, 0x61, 0x0a, 0x6a, 0x65, 0x6a, 0x69, 0x63, 0x68, 0x0a, 0x62, 0x79, 0x6c, 0x0a, 0x6a, 0x65, 0xc5, 0xa1, 0x74, 0x65, 0x0a, 0x61, 0xc5, 0xbe, 0x0a, 0x62, 0x65, 0x7a, 0x0a, 0x74, 0x61, 0x6b, 0xc3, 0xa9, 0x0a, 0x70, 0x6f, 0x75, 0x7a, 0x65, 0x0a, 0x70, 0x72, 0x76, 0x6e, 0xc3, 0xad, 0x0a, 0x76, 0x61, 0xc5, 0xa1, 0x65, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xa1, 0x0a, 0x6e, 0xc3, 0xa1, 0x73, 0x0a, 0x6e, 0x6f, 0x76, 0xc3, 0xbd, 0x0a, 0x74, 0x69, 0x70, 0x79, 0x0a, 0x70, 0x6f, 0x6b, 0x75, 0x64, 0x0a, 0x6d, 0x75, 0xc5, 0xbe, 0x65, 0x0a, 0x73, 0x74, 0x72, 0x61, 0x6e, 0x61, 0x0a, 0x6a, 0x65, 0x68, 0x6f, 0x0a, 0x73, 0x76, 0xc3, 0xa9, 0x0a, 0x6a, 0x69, 0x6e, 0xc3, 0xa9, 0x0a, 0x7a, 0x70, 0x72, 0xc3, 0xa1, 0x76, 0x79, 0x0a, 0x6e, 0x6f, 0x76, 0xc3, 0xa9, 0x0a, 0x6e, 0x65, 0x6e, 0xc3, 0xad, 0x0a, 0x76, 0xc3, 0xa1, 0x73, 0x0a, 0x6a, 0x65, 0x6e, 0x0a, 0x70, 0x6f, 0x64, 0x6c, 0x65, 0x0a, 0x7a, 0x64, 0x65, 0x0a, 0x75, 0xc5, 0xbe, 0x0a, 0x62, 0xc3, 0xbd, 0x74, 0x0a, 0x76, 0xc3, 0xad, 0x63, 0x65, 0x0a, 0x62, 0x75, 0x64, 0x65, 0x0a, 0x6a, 0x69, 0xc5, 0xbe, 0x0a, 0x6e, 0x65, 0xc5, 0xbe, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xbd, 0x0a, 0x62, 0x79, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xa9, 0x0a, 0x63, 0x6f, 0x0a, 0x6e, 0x65, 0x62, 0x6f, 0x0a, 0x74, 0x65, 0x6e, 0x0a, 0x74, 0x61, 0x6b, 0x0a, 0x6d, 0xc3, 0xa1, 0x0a, 0x70, 0x72, 0x69, 0x0a, 0x6f, 0x64, 0x0a, 0x70, 0x6f, 0x0a, 0x6a, 0x73, 0x6f, 0x75, 0x0a, 0x6a, 0x61, 0x6b, 0x0a, 0x64, 0x61, 0x6c, 0xc5, 0xa1, 0xc3, 0xad, 0x0a, 0x61, 0x6c, 0x65, 0x0a, 0x73, 0x69, 0x0a, 0x73, 0x65, 0x0a, 0x76, 0x65, 0x0a, 0x74, 0x6f, 0x0a, 0x6a, 0x61, 0x6b, 0x6f, 0x0a, 0x7a, 0x61, 0x0a, 0x7a, 0x70, 0x65, 0x74, 0x0a, 0x7a, 0x65, 0x0a, 0x64, 0x6f, 0x0a, 0x70, 0x72, 0x6f, 0x0a, 0x6a, 0x65, 0x0a, 0x6e, 0x61, 0x0a, 0x61, 0x74, 0x64, 0x0a, 0x61, 0x74, 0x70, 0x0a, 0x6a, 0x61, 0x6b, 0x6d, 0x69, 0x6c, 0x65, 0x0a, 0x70, 0x72, 0x69, 0x63, 0x65, 0x6d, 0xc5, 0xbe, 0x0a, 0x6a, 0xc3, 0xa1, 0x0a, 0x6f, 0x6e, 0x0a, 0x6f, 0x6e, 0x61, 0x0a, 0x6f, 0x6e, 0x6f, 0x0a, 0x6f, 0x6e, 0x69, 0x0a, 0x6f, 0x6e, 0x79, 0x0a, 0x6d, 0x79, 0x0a, 0x76, 0x79, 0x0a, 0x6a, 0xc3, 0xad, 0x0a, 0x6a, 0x69, 0x0a, 0x6d, 0x65, 0x0a, 0x6d, 0x6e, 0x65, 0x0a, 0x6a, 0x65, 0x6d, 0x75, 0x0a, 0x74, 0x6f, 0x6d, 0x75, 0x0a, 0x74, 0x65, 0x6d, 0x0a, 0x74, 0x65, 0x6d, 0x75, 0x0a, 0x6e, 0x65, 0x6d, 0x75, 0x0a, 0x6e, 0x65, 0x6d, 0x75, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0x68, 0x6f, 0xc5, 0xbe, 0x0a, 0x6a, 0xc3, 0xad, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0x6c, 0x69, 0x6b, 0x6f, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0xc5, 0xbe, 0x0a, 0x6a, 0x61, 0x6b, 0x6f, 0xc5, 0xbe, 0x0a, 0x6e, 0x61, 0x63, 0x65, 0xc5, 0xbe, 0x0a }; CzechAnalyzer::CzechAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } CzechAnalyzer::CzechAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } CzechAnalyzer::~CzechAnalyzer() { } const HashSet CzechAnalyzer::getDefaultStopSet() { static HashSet stopSet; if (!stopSet) { String stopWords(UTF8_TO_STRING(_CZECH_STOP_WORDS)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); } return stopSet; } TokenStreamPtr CzechAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); return result; } TokenStreamPtr CzechAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { CzechAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } CzechAnalyzerSavedStreams::~CzechAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/de/000077500000000000000000000000001217574114600252555ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/de/GermanAnalyzer.cpp000066400000000000000000000074131217574114600307050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GermanAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "GermanStemFilter.h" namespace Lucene { const wchar_t* GermanAnalyzer::_GERMAN_STOP_WORDS[] = { L"einer", L"eine", L"eines", L"einem", L"einen", L"der", L"die", L"das", L"dass", L"da\x00df", L"du", L"er", L"sie", L"es", L"was", L"wer", L"wie", L"wir", L"und", L"oder", L"ohne", L"mit", L"am", L"im", L"in", L"aus", L"auf", L"ist", L"sein", L"war", L"wird", L"ihr", L"ihre", L"ihres", L"als", L"f\x00fcr", L"von", L"mit", L"dich", L"dir", L"mich", L"mir", L"mein", L"sein", L"kein", L"durch", L"wegen", L"wird" }; GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion) { this->stopSet = getDefaultStopSet(); this->matchVersion = matchVersion; } GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stopSet = stopwords; this->matchVersion = matchVersion; } GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { this->stopSet = stopwords; this->exclusionSet = exclusions; this->matchVersion = matchVersion; } GermanAnalyzer::~GermanAnalyzer() { } const HashSet GermanAnalyzer::getDefaultStopSet() { static HashSet stopSet; if (!stopSet) stopSet = HashSet::newInstance(_GERMAN_STOP_WORDS, _GERMAN_STOP_WORDS + SIZEOF_ARRAY(_GERMAN_STOP_WORDS)); return stopSet; } void GermanAnalyzer::setStemExclusionTable(HashSet exclusions) { exclusionSet = exclusions; setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created } TokenStreamPtr GermanAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); result = newLucene(result, exclusionSet); return result; } TokenStreamPtr GermanAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { GermanAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); streams->result = newLucene(streams->result, exclusionSet); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } GermanAnalyzerSavedStreams::~GermanAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/de/GermanStemFilter.cpp000066400000000000000000000034451217574114600311770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GermanStemFilter.h" #include "GermanStemmer.h" #include "TermAttribute.h" namespace Lucene { GermanStemFilter::GermanStemFilter(TokenStreamPtr input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } GermanStemFilter::GermanStemFilter(TokenStreamPtr input, HashSet exclusionSet) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); this->exclusionSet = exclusionSet; } GermanStemFilter::~GermanStemFilter() { } bool GermanStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); // Check the exclusion table. if (!exclusionSet || !exclusionSet.contains(term)) { String s(stemmer->stem(term)); // If not stemmed, don't waste the time adjusting the token. if (!s.empty() && s != term) termAtt->setTermBuffer(s); } return true; } else return false; } void GermanStemFilter::setStemmer(GermanStemmerPtr stemmer) { if (stemmer) this->stemmer = stemmer; } void GermanStemFilter::setExclusionSet(HashSet exclusionSet) { this->exclusionSet = exclusionSet; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/de/GermanStemmer.cpp000066400000000000000000000153161217574114600305350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include #include "GermanStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { GermanStemmer::GermanStemmer() { substCount = 0; } GermanStemmer::~GermanStemmer() { } String GermanStemmer::stem(const String& term) { // Use lowercase for medium stemming. buffer = StringUtils::toLower(term); if (!isStemmable()) return buffer; // Stemming starts here substitute(); strip(); optimize(); resubstitute(); removeParticleDenotion(); return buffer; } bool GermanStemmer::isStemmable() { for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { if (!UnicodeUtil::isAlpha(buffer[c])) return false; } return true; } void GermanStemmer::strip() { bool doMore = true; while (doMore && buffer.length() > 3) { if (buffer.length() + substCount > 5 && boost::ends_with(buffer, L"nd")) buffer.resize(buffer.length() - 2); else if (buffer.length() + substCount > 4 && boost::ends_with(buffer, L"em")) buffer.resize(buffer.length() - 2); else if (buffer.length() + substCount > 4 && boost::ends_with(buffer, L"er")) buffer.resize(buffer.length() - 2); else if (buffer[buffer.length() - 1] == L'e') buffer.resize(buffer.length() - 1); else if (buffer[buffer.length() - 1] == L's') buffer.resize(buffer.length() - 1); else if (buffer[buffer.length() - 1] == L'n') buffer.resize(buffer.length() - 1); // "t" occurs only as suffix of verbs. else if (buffer[buffer.length() - 1] == L't') buffer.resize(buffer.length() - 1); else doMore = false; } } void GermanStemmer::optimize() { // Additional step for female plurals of professions and inhabitants. if (buffer.length() > 5 && boost::ends_with(buffer, L"erin*")) { buffer.resize(buffer.length() - 1); strip(); } // Additional step for irregular plural nouns like "Matrizen -> Matrix". if (buffer[buffer.length() - 1] == L'z') buffer[buffer.length() - 1] = L'x'; } void GermanStemmer::removeParticleDenotion() { if (buffer.length() > 4) { for (int32_t c = 0; c < (int32_t)buffer.length() - 3; ++c) { if (buffer.substr(c, 4) == L"gege") { buffer.erase(c, 2); return; } } } } void GermanStemmer::substitute() { substCount = 0; for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { // Replace the second char of a pair of the equal characters with an asterisk if (c > 0 && buffer[c] == buffer[c - 1]) buffer[c] = L'*'; // Substitute Umlauts. else if (buffer[c] == L'\x00e4') buffer[c] = L'a'; else if (buffer[c] == L'\x00f6') buffer[c] = L'o'; else if (buffer[c] == L'\x00fc') buffer[c] = L'u'; // Fix bug so that '' at the end of a word is replaced. else if (buffer[c] == L'\x00df') { buffer[c] = L's'; buffer.insert(c + 1, 1, L's'); ++substCount; } // Take care that at least one character is left left side from the current one if (c < (int32_t)buffer.length() - 1) { // Masking several common character combinations with an token if (c < (int32_t)buffer.length() - 2 && buffer[c] == L's' && buffer[c + 1] == L'c' && buffer[c + 2] == L'h') { buffer[c] = L'$'; buffer.erase(c + 1, 2); substCount += 2; } else if (buffer[c] == L'c' && buffer[c + 1] == L'h') { buffer[c] = L'\x00a7'; buffer.erase(c + 1, 1); ++substCount; } else if (buffer[c] == L'e' && buffer[c + 1] == L'i') { buffer[c] = L'%'; buffer.erase(c + 1, 1); ++substCount; } else if (buffer[c] == L'i' && buffer[c + 1] == L'e') { buffer[c] = L'&'; buffer.erase(c + 1, 1); ++substCount; } else if (buffer[c] == L'i' && buffer[c + 1] == L'g') { buffer[c] = L'#'; buffer.erase(c + 1, 1); ++substCount; } else if (buffer[c] == L's' && buffer[c + 1] == L't') { buffer[c] = L'!'; buffer.erase(c + 1, 1); ++substCount; } } } } void GermanStemmer::resubstitute() { for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { if (buffer[c] == L'*') buffer[c] = buffer[c - 1]; else if (buffer[c] == L'$') { buffer[c] = L's'; buffer.insert(c + 1, L"ch"); } else if (buffer[c] == L'\x00a7') { buffer[c] = L'c'; buffer.insert(c + 1, 1, L'h'); } else if (buffer[c] == L'%') { buffer[c] = L'e'; buffer.insert(c + 1, 1, L'i'); } else if (buffer[c] == L'&') { buffer[c] = L'i'; buffer.insert(c + 1, 1, L'e'); } else if (buffer[c] == L'#') { buffer[c] = L'i'; buffer.insert(c + 1, 1, L'g'); } else if (buffer[c] == L'!') { buffer[c] = L's'; buffer.insert(c + 1, 1, L't'); } } } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/el/000077500000000000000000000000001217574114600252655ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/el/GreekAnalyzer.cpp000066400000000000000000000155211217574114600305400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GreekAnalyzer.h" #include "StandardTokenizer.h" #include "GreekLowerCaseFilter.h" #include "StopFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Greek stopwords in UTF-8 format. const uint8_t GreekAnalyzer::_GREEK_STOP_WORDS[] = { 0xce, 0xbf, 0x0a, 0xce, 0xb7, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x85, 0x0a, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x83, 0x0a, 0xcf, 0x84, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbc, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbc, 0xce, 0xb1, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xbd, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb7, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xbc, 0xce, 0xb1, 0x0a, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0x0a, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbf, 0x0a, 0xce, 0xb3, 0xce, 0xb9, 0xce, 0xb1, 0x0a, 0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xbc, 0xce, 0xb5, 0x0a, 0xcf, 0x83, 0xce, 0xb5, 0x0a, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0x0a, 0xce, 0xb1, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0xce, 0xb1, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xb8, 0xce, 0xb1, 0x0a, 0xce, 0xbd, 0xce, 0xb1, 0x0a, 0xce, 0xb4, 0xce, 0xb5, 0x0a, 0xce, 0xb4, 0xce, 0xb5, 0xce, 0xbd, 0x0a, 0xce, 0xbc, 0xce, 0xb7, 0x0a, 0xce, 0xbc, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xb5, 0xcf, 0x80, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xbd, 0xcf, 0x89, 0x0a, 0xce, 0xb5, 0xce, 0xb1, 0xce, 0xbd, 0x0a, 0xce, 0xb1, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xcf, 0x85, 0x0a, 0xcf, 0x80, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xb1, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xb5, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb7, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb5, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb7, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb5, 0xcf, 0x83, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb1, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xcf, 0x80, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xce, 0xbc, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xb9, 0xcf, 0x83, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xcf, 0x83, 0xce, 0xbf, 0x0a, 0xce, 0xbf, 0xcf, 0x84, 0xce, 0xb9, 0x0a }; GreekAnalyzer::GreekAnalyzer(LuceneVersion::Version matchVersion) { this->stopSet = getDefaultStopSet(); this->matchVersion = matchVersion; } GreekAnalyzer::GreekAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stopSet = stopwords; this->matchVersion = matchVersion; } GreekAnalyzer::~GreekAnalyzer() { } const HashSet GreekAnalyzer::getDefaultStopSet() { static HashSet stopSet; if (!stopSet) { String stopWords(UTF8_TO_STRING(_GREEK_STOP_WORDS)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); } return stopSet; } TokenStreamPtr GreekAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); return result; } TokenStreamPtr GreekAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { GreekAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } GreekAnalyzerSavedStreams::~GreekAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/el/GreekLowerCaseFilter.cpp000066400000000000000000000065441217574114600320120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GreekLowerCaseFilter.h" #include "TermAttribute.h" #include "CharFolder.h" namespace Lucene { GreekLowerCaseFilter::GreekLowerCaseFilter(TokenStreamPtr input) : TokenFilter(input) { termAtt = addAttribute(); } GreekLowerCaseFilter::~GreekLowerCaseFilter() { } bool GreekLowerCaseFilter::incrementToken() { if (input->incrementToken()) { wchar_t* chArray = termAtt->termBufferArray(); int32_t chLen = termAtt->termLength(); for (int32_t i = 0; i < chLen; ++i) chArray[i] = lowerCase(chArray[i]); return true; } else return false; } wchar_t GreekLowerCaseFilter::lowerCase(wchar_t codepoint) { switch (codepoint) { case L'\x03c2': // small final sigma return 0x03c3; // small sigma // Some Greek characters contain diacritics. // This filter removes these, converting to the lowercase base form. case L'\x0386': // capital alpha with tonos case L'\x03ac': // small alpha with tonos return L'\x03b1'; // small alpha case L'\x0388': // capital epsilon with tonos case L'\x03ad': // small epsilon with tonos return L'\x03b5'; // small epsilon case L'\x0389': // capital eta with tonos case L'\x03ae': // small eta with tonos return L'\x03b7'; // small eta case L'\x038a': // capital iota with tonos case L'\x03aa': // capital iota with dialytika case L'\x03af': // small iota with tonos case L'\x03ca': // small iota with dialytika case L'\x0390': // small iota with dialytika and tonos return L'\x03b9'; // small iota case L'\x038e': // capital upsilon with tonos case L'\x03ab': // capital upsilon with dialytika case L'\x03cd': // small upsilon with tonos case L'\x03cb': // small upsilon with dialytika case L'\x03b0': // small upsilon with dialytika and tonos return L'\x03c5'; // small upsilon case L'\x038c': // capital omicron with tonos case L'\x03cc': // small omicron with tonos return L'\x03bf'; // small omicron case L'\x038f': // capital omega with tonos case L'\x03ce': // small omega with tonos return L'\x03c9'; // small omega // The previous implementation did the conversion below. // Only implemented for backwards compatibility with old indexes. case L'\x03a2': // reserved return L'\x03c2'; // small final sigma default: return CharFolder::toLower(codepoint); } } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/fa/000077500000000000000000000000001217574114600252535ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/fa/PersianAnalyzer.cpp000066400000000000000000000511561217574114600310760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "PersianAnalyzer.h" #include "ArabicLetterTokenizer.h" #include "ArabicNormalizationFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "PersianNormalizationFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Persian stopwords in UTF-8 format. /// /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html /// The stopword list is BSD-Licensed. const uint8_t PersianAnalyzer::DEFAULT_STOPWORD_FILE[] = { 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xb3, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xb1, 0x0a, 0xd8, 0xae, 0xd9, 0x8a, 0xd8, 0xa7, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x85, 0x0a, 0xd9, 0xbe, 0xd8, 0xb3, 0x0a, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, 0xda, 0xaf, 0xd9, 0x88, 0x0a, 0xd9, 0x8a, 0xd8, 0xa7, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd9, 0xbe, 0xd8, 0xb3, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xda, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x87, 0xd8, 0xb1, 0xda, 0xaf, 0xd8, 0xb2, 0x0a, 0xd9, 0xbe, 0xd9, 0x86, 0xd8, 0xac, 0x0a, 0xd9, 0x86, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd8, 0xb7, 0xd9, 0x88, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0x0a, 0xd8, 0xaf, 0xd9, 0x88, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xda, 0x86, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xda, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0xd8, 0xb3, 0xd8, 0xb7, 0x0a, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x82, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0x0a, 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd9, 0x87, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, 0x86, 0xd9, 0x86, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd8, 0xb1, 0x0a, 0xd9, 0x87, 0xd8, 0xb2, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xb4, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x86, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x84, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x88, 0xd9, 0x82, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xaf, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, 0xd8, 0xac, 0xd8, 0xb2, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xae, 0xd8, 0xaf, 0xd9, 0x85, 0xd8, 0xa7, 0xd8, 0xaa, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xae, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb3, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xac, 0xd9, 0x84, 0xd9, 0x88, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xad, 0xd9, 0x82, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xb9, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xb8, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd9, 0x86, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x82, 0x0a, 0xd9, 0x87, 0xd9, 0x8a, 0xda, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x85, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd9, 0x83, 0xd8, 0xac, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xaf, 0x0a, 0xd9, 0x83, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd9, 0x85, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x85, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xac, 0xd8, 0xaf, 0xd8, 0xa7, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xda, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, 0xb3, 0xd9, 0x85, 0xd8, 0xaa, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xda, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xab, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa8, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xb3, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb6, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0x0a, 0xd8, 0xac, 0xd8, 0xb1, 0xd9, 0x8a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd9, 0x84, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x85, 0xd8, 0xaf, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x8a, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0x0a, 0xd8, 0xaa, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xac, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x85, 0x0a, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x86, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xb2, 0xd8, 0xaf, 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd9, 0x82, 0xd8, 0xb5, 0xd8, 0xaf, 0x0a, 0xd9, 0x81, 0xd9, 0x82, 0xd8, 0xb7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xb3, 0xd8, 0xb7, 0x0a, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x81, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb4, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xb7, 0xd9, 0x88, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd9, 0x86, 0xd8, 0xb2, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x83, 0x0a, 0xd8, 0xb7, 0xd9, 0x8a, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb2, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xaa, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a, 0x0a, 0xd8, 0xb7, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x82, 0x0a, 0xd8, 0xa7, 0xd8, 0xb4, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0x0a, 0xd9, 0x86, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xda, 0xaf, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0xbe, 0xd8, 0xa7, 0xd8, 0xb9, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb3, 0x0a, 0xd8, 0xad, 0xd8, 0xaf, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xd8, 0xae, 0xd8, 0xaa, 0xd9, 0x84, 0xd9, 0x81, 0x0a, 0xd9, 0x85, 0xd9, 0x82, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0x0a, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xb6, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd8, 0xb1, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xda, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xae, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xac, 0x0a, 0xd8, 0xb4, 0xd8, 0xb4, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd8, 0xaa, 0xd8, 0xad, 0xd8, 0xaa, 0x0a, 0xd8, 0xb6, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xda, 0xaf, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x81, 0xd9, 0x83, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xb3, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0xbe, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x88, 0xd9, 0x82, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xd9, 0x83, 0xd9, 0x8a, 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0x0a, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x8a, 0x0a, 0xd8, 0xad, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb9, 0xd9, 0x82, 0xd8, 0xa8, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xd8, 0xab, 0xd9, 0x84, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd8, 0xb7, 0xd8, 0xa8, 0xd9, 0x82, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xd8, 0xb5, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaa, 0x0a, 0xd8, 0xba, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xb1, 0xd9, 0x8a, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xda, 0x86, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0x84, 0xd8, 0xb7, 0xd9, 0x81, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd8, 0xb1, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xda, 0xaf, 0xd8, 0xb0, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd8, 0xaa, 0x0a, 0xda, 0xaf, 0xd8, 0xb0, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0x0a, 0xd9, 0x81, 0xd9, 0x88, 0xd9, 0x82, 0x0a, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xb4, 0xd9, 0x88, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd9, 0x84, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd9, 0x82, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x85, 0x0a, 0xd8, 0xb3, 0xd8, 0xb9, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xb2, 0xd9, 0x87, 0x0a, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xac, 0xd9, 0x84, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0x0a }; PersianAnalyzer::PersianAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } PersianAnalyzer::PersianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } PersianAnalyzer::~PersianAnalyzer() { } const HashSet PersianAnalyzer::getDefaultStopSet() { static HashSet stopSet; if (!stopSet) { String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); } return stopSet; } TokenStreamPtr PersianAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result); result = newLucene(result); // additional Persian-specific normalization result = newLucene(result); // the order here is important: the stopword list is not normalized result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); return result; } TokenStreamPtr PersianAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { PersianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); // additional Persian-specific normalization streams->result = newLucene(streams->result); // the order here is important: the stopword list is not normalized streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } PersianAnalyzerSavedStreams::~PersianAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/fa/PersianNormalizationFilter.cpp000066400000000000000000000021361217574114600332770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "PersianNormalizationFilter.h" #include "PersianNormalizer.h" #include "TermAttribute.h" namespace Lucene { PersianNormalizationFilter::PersianNormalizationFilter(TokenStreamPtr input) : TokenFilter(input) { normalizer = newLucene(); termAtt = addAttribute(); } PersianNormalizationFilter::~PersianNormalizationFilter() { } bool PersianNormalizationFilter::incrementToken() { if (input->incrementToken()) { int32_t newlen = normalizer->normalize(termAtt->termBuffer().get(), termAtt->termLength()); termAtt->setTermLength(newlen); return true; } else return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/fa/PersianNormalizer.cpp000066400000000000000000000037641217574114600314350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "PersianNormalizer.h" #include "MiscUtils.h" namespace Lucene { const wchar_t PersianNormalizer::YEH = (wchar_t)0x064a; const wchar_t PersianNormalizer::FARSI_YEH = (wchar_t)0x06cc; const wchar_t PersianNormalizer::YEH_BARREE = (wchar_t)0x06d2; const wchar_t PersianNormalizer::KEHEH = (wchar_t)0x06a9; const wchar_t PersianNormalizer::KAF = (wchar_t)0x0643; const wchar_t PersianNormalizer::HAMZA_ABOVE = (wchar_t)0x0654; const wchar_t PersianNormalizer::HEH_YEH = (wchar_t)0x06c0; const wchar_t PersianNormalizer::HEH_GOAL = (wchar_t)0x06c1; const wchar_t PersianNormalizer::HEH = (wchar_t)0x0647; PersianNormalizer::~PersianNormalizer() { } int32_t PersianNormalizer::normalize(wchar_t* s, int32_t len) { for (int32_t i = 0; i < len; ++i) { switch (s[i]) { case FARSI_YEH: case YEH_BARREE: s[i] = YEH; break; case KEHEH: s[i] = KAF; break; case HEH_YEH: case HEH_GOAL: s[i] = HEH; break; case HAMZA_ABOVE: // necessary for HEH + HAMZA len = deleteChar(s, i--, len); break; default: break; } } return len; } int32_t PersianNormalizer::deleteChar(wchar_t* s, int32_t pos, int32_t len) { if (pos < len) MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); return len - 1; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/fr/000077500000000000000000000000001217574114600252745ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/fr/ElisionFilter.cpp000066400000000000000000000043031217574114600305500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ElisionFilter.h" #include "CharArraySet.h" #include "TermAttribute.h" namespace Lucene { const wchar_t ElisionFilter::apostrophes[] = {L'\'', L'\x2019'}; ElisionFilter::ElisionFilter(TokenStreamPtr input) : TokenFilter(input) { articles = newLucene(newCollection(L"l", L"m", L"t", L"qu", L"n", L"s", L"j"), true); termAtt = addAttribute(); } ElisionFilter::ElisionFilter(TokenStreamPtr input, HashSet articles) : TokenFilter(input) { setArticles(articles); termAtt = addAttribute(); } ElisionFilter::~ElisionFilter() { } void ElisionFilter::setArticles(HashSet articles) { this->articles = newLucene(articles, true); } bool ElisionFilter::incrementToken() { if (input->incrementToken()) { wchar_t* termBuffer = termAtt->termBufferArray(); int32_t termLength = termAtt->termLength(); int32_t minPoz = INT_MAX; for (int32_t i = 0; i < SIZEOF_ARRAY(apostrophes); ++i) { wchar_t apos = apostrophes[i]; for (int32_t poz = 0; poz < termLength; ++poz) { if (termBuffer[poz] == apos) { minPoz = std::min(poz, minPoz); break; } } } // An apostrophe has been found. If the prefix is an article strip it off. if (minPoz != INT_MAX && articles->contains(termBuffer, 0, minPoz)) termAtt->setTermBuffer(termBuffer, minPoz + 1, termLength - (minPoz + 1)); return true; } else return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/fr/FrenchAnalyzer.cpp000066400000000000000000000134141217574114600307160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "FrenchAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "FrenchStemFilter.h" namespace Lucene { const wchar_t* FrenchAnalyzer::_FRENCH_STOP_WORDS[] = { L"a", L"afin", L"ai", L"ainsi", L"apr\x00e8s", L"attendu", L"au", L"aujourd", L"auquel", L"aussi", L"autre", L"autres", L"aux", L"auxquelles", L"auxquels", L"avait", L"avant", L"avec", L"avoir", L"c", L"car", L"ce", L"ceci", L"cela", L"celle", L"celles", L"celui", L"cependant", L"certain", L"certaine", L"certaines", L"certains", L"ces", L"cet", L"cette", L"ceux", L"chez", L"ci", L"combien", L"comme", L"comment", L"concernant", L"contre", L"d", L"dans", L"de", L"debout", L"dedans", L"dehors", L"del\x00e0", L"depuis", L"derri\x00e8re", L"des", L"d\x00e9sormais", L"desquelles", L"desquels", L"dessous", L"dessus", L"devant", L"devers", L"devra", L"divers", L"diverse", L"diverses", L"doit", L"donc", L"dont", L"du", L"duquel", L"durant", L"d\x00e8s", L"elle", L"elles", L"en", L"entre", L"environ", L"est", L"et", L"etc", L"etre", L"eu", L"eux", L"except\x00e9", L"hormis", L"hors", L"h\x00e9las", L"hui", L"il", L"ils", L"j", L"je", L"jusqu", L"jusque", L"l", L"la", L"laquelle", L"le", L"lequel", L"les", L"lesquelles", L"lesquels", L"leur", L"leurs", L"lorsque", L"lui", L"l\x00e0", L"ma", L"mais", L"malgr\x00e9", L"me", L"merci", L"mes", L"mien", L"mienne", L"miennes", L"miens", L"moi", L"moins", L"mon", L"moyennant", L"m\x00eame", L"m\x00eames", L"n", L"ne", L"ni", L"non", L"nos", L"notre", L"nous", L"n\x00e9anmoins", L"n\x00f4tre", L"n\x00f4tres", L"on", L"ont", L"ou", L"outre", L"o\x00f9", L"par", L"parmi", L"partant", L"pas", L"pass\x00e9", L"pendant", L"plein", L"plus", L"plusieurs", L"pour", L"pourquoi", L"proche", L"pr\x00e8s", L"puisque", L"qu", L"quand", L"que", L"quel", L"quelle", L"quelles", L"quels", L"qui", L"quoi", L"quoique", L"revoici", L"revoil\x00e0", L"s", L"sa", L"sans", L"sauf", L"se", L"selon", L"seront", L"ses", L"si", L"sien", L"sienne", L"siennes", L"siens", L"sinon", L"soi", L"soit", L"son", L"sont", L"sous", L"suivant", L"sur", L"ta", L"te", L"tes", L"tien", L"tienne", L"tiennes", L"tiens", L"toi", L"ton", L"tous", L"tout", L"toute", L"toutes", L"tu", L"un", L"une", L"va", L"vers", L"voici", L"voil\x00e0", L"vos", L"votre", L"vous", L"vu", L"v\x00f4tre", L"v\x00f4tres", L"y", L"\x00e0", L"\x00e7a", L"\x00e8s", L"\x00e9t\x00e9", L"\x00eatre", L"\x00f4" }; FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { this->stoptable = stopwords; this->excltable = exclusions; this->matchVersion = matchVersion; } FrenchAnalyzer::~FrenchAnalyzer() { } const HashSet FrenchAnalyzer::getDefaultStopSet() { static HashSet stoptable; if (!stoptable) stoptable = HashSet::newInstance(_FRENCH_STOP_WORDS, _FRENCH_STOP_WORDS + SIZEOF_ARRAY(_FRENCH_STOP_WORDS)); return stoptable; } void FrenchAnalyzer::setStemExclusionTable(HashSet exclusions) { excltable = exclusions; setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created } TokenStreamPtr FrenchAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = newLucene(result, excltable); // Convert to lowercase after stemming result = newLucene(result); return result; } TokenStreamPtr FrenchAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { FrenchAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); streams->result = newLucene(streams->result, excltable); // Convert to lowercase after stemming streams->result = newLucene(streams->result); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } FrenchAnalyzerSavedStreams::~FrenchAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/fr/FrenchStemFilter.cpp000066400000000000000000000034451217574114600312120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "FrenchStemFilter.h" #include "FrenchStemmer.h" #include "TermAttribute.h" namespace Lucene { FrenchStemFilter::FrenchStemFilter(TokenStreamPtr input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } FrenchStemFilter::FrenchStemFilter(TokenStreamPtr input, HashSet exclusiontable) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); this->exclusions = exclusiontable; } FrenchStemFilter::~FrenchStemFilter() { } bool FrenchStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); // Check the exclusion table. if (!exclusions || !exclusions.contains(term)) { String s(stemmer->stem(term)); // If not stemmed, don't waste the time adjusting the token. if (!s.empty() && s != term) termAtt->setTermBuffer(s); } return true; } else return false; } void FrenchStemFilter::setStemmer(FrenchStemmerPtr stemmer) { if (stemmer) this->stemmer = stemmer; } void FrenchStemFilter::setExclusionSet(HashSet exclusiontable) { this->exclusions = exclusiontable; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/fr/FrenchStemmer.cpp000066400000000000000000000450361217574114600305520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include #include "FrenchStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { FrenchStemmer::FrenchStemmer() { suite = false; modified = false; } FrenchStemmer::~FrenchStemmer() { } String FrenchStemmer::stem(const String& term) { if (!isStemmable(term)) return term; // Use lowercase for medium stemming. stringBuffer = StringUtils::toLower(term); // reset the booleans modified = false; suite = false; treatVowels(stringBuffer); setStrings(); step1(); if (!modified || suite) { if (!RV.empty()) { suite = step2a(); if (!suite) step2b(); } } if (modified || suite) step3(); else step4(); step5(); step6(); return stringBuffer; } void FrenchStemmer::setStrings() { // set the strings R0 = stringBuffer; RV = retrieveRV(stringBuffer); R1 = retrieveR(stringBuffer); if (!R1.empty()) { tempBuffer = R1; R2 = retrieveR(tempBuffer); } else R2.clear(); } void FrenchStemmer::step1() { Collection suffix = newCollection(L"ances", L"iqUes", L"ismes", L"ables", L"istes", L"ance", L"iqUe", L"isme", L"able", L"iste"); deleteFrom(R2, suffix); replaceFrom(R2, newCollection(L"logies", L"logie"), L"log"); replaceFrom(R2, newCollection(L"usions", L"utions", L"usion", L"ution"), L"u"); replaceFrom(R2, newCollection(L"ences", L"ence"), L"ent"); Collection search = newCollection(L"atrices", L"ateurs", L"ations", L"atrice", L"ateur", L"ation"); deleteButSuffixFromElseReplace(R2, search, L"ic", true, R0, L"iqU"); deleteButSuffixFromElseReplace(R2, newCollection(L"ements", L"ement"), L"eus", false, R0, L"eux"); deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"ativ", false); deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"iv", false); deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"abl", false); deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"iqU", false); deleteFromIfTestVowelBeforeIn(R1, newCollection(L"issements", L"issement"), false, R0); deleteFrom(RV, newCollection(L"ements", L"ement")); deleteButSuffixFromElseReplace(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"abil", false, R0, L"abl"); deleteButSuffixFromElseReplace(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"ic", false, R0, L"iqU"); deleteButSuffixFrom(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"iv", true); Collection autre = newCollection(L"ifs", L"ives", L"if", L"ive"); deleteButSuffixFromElseReplace(R2, autre, L"icat", false, R0, L"iqU"); deleteButSuffixFromElseReplace(R2, autre, L"at", true, R2, L"iqU"); replaceFrom(R0, newCollection(L"eaux"), L"eau"); replaceFrom(R1, newCollection(L"aux"), L"al"); deleteButSuffixFromElseReplace(R2, newCollection(L"euses", L"euse"), L"", true, R1, L"eux"); deleteFrom(R2, newCollection(L"eux")); // if one of the next steps is performed, we will need to perform step2a if (replaceFrom(RV, newCollection(L"amment"), L"ant")) suite = true; if (replaceFrom(RV, newCollection(L"emment"), L"ent")) suite = true; if (deleteFromIfTestVowelBeforeIn(RV, newCollection(L"ments", L"ment"), true, RV)) suite = true; } bool FrenchStemmer::step2a() { static Collection search; if (!search) { static const wchar_t* _search[] = { L"\x00eemes", L"\x00eetes", L"iraIent", L"irait", L"irais", L"irai", L"iras", L"ira", L"irent", L"iriez", L"irez", L"irions", L"irons", L"iront", L"issaIent", L"issais", L"issantes", L"issante", L"issants", L"issant", L"issait", L"issais", L"issions", L"issons", L"issiez", L"issez", L"issent", L"isses", L"isse", L"ir", L"is", L"\x00eet", L"it", L"ies", L"ie", L"i" }; search = Collection::newInstance(_search, _search + SIZEOF_ARRAY(_search)); } return deleteFromIfTestVowelBeforeIn(RV, search, false, RV); } void FrenchStemmer::step2b() { static Collection suffix; if (!suffix) { static const wchar_t* _suffix[] = { L"eraIent", L"erais", L"erait", L"erai", L"eras", L"erions", L"eriez", L"erons", L"eront", L"erez", L"\x00e8rent", L"era", L"\x00e9es", L"iez", L"\x00e9e", L"\x00e9s", L"er", L"ez", L"\x00e9" }; suffix = Collection::newInstance(_suffix, _suffix + SIZEOF_ARRAY(_suffix)); } deleteFrom(RV, suffix); static Collection search; if (!search) { static const wchar_t* _search[] = { L"assions", L"assiez", L"assent", L"asses", L"asse", L"aIent", L"antes", L"aIent", L"Aient", L"ante", L"\x00e2mes", L"\x00e2tes", L"ants", L"ant", L"ait", L"a\x00eet", L"ais", L"Ait", L"A\x00eet", L"Ais", L"\x00e2t", L"as", L"ai", L"Ai", L"a" }; search = Collection::newInstance(_search, _search + SIZEOF_ARRAY(_search)); } deleteButSuffixFrom(RV, search, L"e", true); deleteFrom(R2, newCollection(L"ions")); } void FrenchStemmer::step3() { if (!stringBuffer.empty()) { wchar_t ch = stringBuffer[stringBuffer.length() - 1]; if (ch == L'Y') { stringBuffer[stringBuffer.length() - 1] = L'i'; setStrings(); } else if (ch == L'\x00e7') { stringBuffer[stringBuffer.length() - 1] = L'c'; setStrings(); } } } void FrenchStemmer::step4() { if (stringBuffer.length() > 1) { wchar_t ch = stringBuffer[stringBuffer.length() - 1]; if (ch == L's') { wchar_t b = stringBuffer[stringBuffer.length() - 2]; if (b != L'a' && b != L'i' && b != L'o' && b != L'u' && b != L'\x00e8' && b != L's') { stringBuffer.resize(stringBuffer.length() - 1); setStrings(); } } } if (!deleteFromIfPrecededIn(R2, newCollection(L"ion"), RV, L"s")) deleteFromIfPrecededIn(R2, newCollection(L"ion"), RV, L"t"); replaceFrom(RV, newCollection(L"I\x00e8re", L"i\x00e8re", L"Ier", L"ier"), L"i"); deleteFrom(RV, newCollection(L"e")); deleteFromIfPrecededIn(RV, newCollection(L"\x00eb"), R0, L"gu"); } void FrenchStemmer::step5() { if (!R0.empty()) { if (boost::ends_with(R0, L"enn") || boost::ends_with(R0, L"onn") || boost::ends_with(R0, L"ett") || boost::ends_with(R0, L"ell") || boost::ends_with(R0, L"eill")) { stringBuffer.resize(stringBuffer.length() - 1); setStrings(); } } } void FrenchStemmer::step6() { if (!R0.empty()) { bool seenVowel = false; bool seenConson = false; int32_t pos = -1; for (int32_t i = (int32_t)(R0.length() - 1); i > -1; --i) { wchar_t ch = R0[i]; if (isVowel(ch)) { if (!seenVowel) { if (ch == L'\x00e9' || ch == L'\x00e8') { pos = i; break; } } seenVowel = true; } else { if (seenVowel) break; else seenConson = true; } } if (pos > -1 && seenConson && !seenVowel) stringBuffer[pos] = L'e'; } } bool FrenchStemmer::deleteFromIfPrecededIn(const String& source, Collection search, const String& from, const String& prefix) { bool found = false; if (!source.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, search[i])) { if (!from.empty() && boost::ends_with(from, prefix + search[i])) { stringBuffer.resize(stringBuffer.length() - search[i].length()); found = true; setStrings(); break; } } } } return found; } bool FrenchStemmer::deleteFromIfTestVowelBeforeIn(const String& source, Collection search, bool vowel, const String& from) { bool found = false; if (!source.empty() && !from.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, search[i])) { if ((search[i].length() + 1) <= from.length()) { bool test = isVowel(stringBuffer[stringBuffer.length() - (search[i].length() + 1)]); if (test == vowel) { stringBuffer.resize(stringBuffer.length() - search[i].length()); modified = true; found = true; setStrings(); break; } } } } } return found; } void FrenchStemmer::deleteButSuffixFrom(const String& source, Collection search, const String& prefix, bool without) { if (!source.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, prefix + search[i])) { stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); modified = true; setStrings(); break; } else if (without && boost::ends_with(source, search[i])) { stringBuffer.resize(stringBuffer.length() - search[i].length()); modified = true; setStrings(); break; } } } } void FrenchStemmer::deleteButSuffixFromElseReplace(const String& source, Collection search, const String& prefix, bool without, const String& from, const String& replace) { if (!source.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, prefix + search[i])) { stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); modified = true; setStrings(); break; } else if (!from.empty() && boost::ends_with(from, prefix + search[i])) { stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); stringBuffer += replace; modified = true; setStrings(); break; } else if (without && boost::ends_with(source, search[i])) { stringBuffer.resize(stringBuffer.length() - search[i].length()); modified = true; setStrings(); break; } } } } bool FrenchStemmer::replaceFrom(const String& source, Collection search, const String& replace) { bool found = false; if (!source.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, search[i])) { stringBuffer.resize(stringBuffer.length() - search[i].length()); stringBuffer += replace; modified = true; found = true; setStrings(); break; } } } return found; } void FrenchStemmer::deleteFrom(const String& source, Collection suffix) { if (!source.empty()) { for (int32_t i = 0; i < suffix.size(); ++i) { if (boost::ends_with(source, suffix[i])) { stringBuffer.resize(stringBuffer.length() - suffix[i].length()); modified = true; setStrings(); break; } } } } bool FrenchStemmer::isVowel(wchar_t ch) { switch (ch) { case L'a': case L'e': case L'i': case L'o': case L'u': case L'y': case L'\x00e2': case L'\x00e0': case L'\x00eb': case L'\x00e9': case L'\x00ea': case L'\x00e8': case L'\x00ef': case L'\x00ee': case L'\x00f4': case L'\x00fc': case L'\x00f9': case L'\x00fb': return true; default: return false; } } String FrenchStemmer::retrieveR(const String& buffer) { int32_t len = (int32_t)buffer.length(); int32_t pos = -1; for (int32_t c = 0; c < len; ++c) { if (isVowel(buffer[c])) { pos = c; break; } } if (pos > -1) { int32_t consonne = -1; for (int32_t c = pos; c < len; ++c) { if (!isVowel(buffer[c])) { consonne = c; break; } } if (consonne > -1 && (consonne + 1) < len) return buffer.substr(consonne + 1); else return L""; } else return L""; } String FrenchStemmer::retrieveRV(const String& buffer) { int32_t len = (int32_t)buffer.length(); if (buffer.length() > 3) { if (isVowel(buffer[0]) && isVowel(buffer[1])) return buffer.substr(3); else { int32_t pos = 0; for (int32_t c = 1; c < len; ++c) { if (isVowel(buffer[c])) { pos = c; break; } } if (pos + 1 < len) return buffer.substr(pos + 1); else return L""; } } else return L""; } void FrenchStemmer::treatVowels(String& buffer) { for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { wchar_t ch = buffer[c]; if (c == 0) // first char { if (buffer.length() > 1) { if (ch == L'y' && isVowel(buffer[c + 1])) buffer[c] = L'Y'; } } else if (c == buffer.length() - 1) // last char { if (ch == L'u' && buffer[c - 1] == L'q') buffer[c] = L'U'; if (ch == L'y' && isVowel(buffer[c - 1])) buffer[c] = L'Y'; } else // other cases { if (ch == L'u') { if (buffer[c - 1] == L'q') buffer[c] = L'U'; else if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1])) buffer[c] = L'U'; } if (ch == L'i') { if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1])) buffer[c] = L'I'; } if (ch == L'y') { if (isVowel(buffer[c - 1]) || isVowel(buffer[c + 1])) buffer[c] = L'Y'; } } } } bool FrenchStemmer::isStemmable(const String& term) { bool upper = false; int32_t first = -1; for (int32_t c = 0; c < (int32_t)term.length(); ++c) { // Discard terms that contain non-letter characters. if (!UnicodeUtil::isAlpha(term[c])) return false; // Discard terms that contain multiple uppercase letters. if (UnicodeUtil::isUpper(term[c])) { if (upper) return false; else // First encountered uppercase letter, set flag and save position. { first = c; upper = true; } } } // Discard the term if it contains a single uppercase letter that // is not starting the term. if (first > 0) return false; return true; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/nl/000077500000000000000000000000001217574114600252765ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/nl/DutchAnalyzer.cpp000066400000000000000000000107361217574114600305660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "DutchAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "StopFilter.h" #include "DutchStemFilter.h" namespace Lucene { const wchar_t* DutchAnalyzer::_DUTCH_STOP_WORDS[] = { L"de", L"en", L"van", L"ik", L"te", L"dat", L"die", L"in", L"een", L"hij", L"het", L"niet", L"zijn", L"is", L"was", L"op", L"aan", L"met", L"als", L"voor", L"had", L"er", L"maar", L"om", L"hem", L"dan", L"zou", L"of", L"wat", L"mijn", L"men", L"dit", L"zo", L"door", L"over", L"ze", L"zich", L"bij", L"ook", L"tot", L"je", L"mij", L"uit", L"der", L"daar", L"haar", L"naar", L"heb", L"hoe", L"heeft", L"hebben", L"deze", L"u", L"want", L"nog", L"zal", L"me", L"zij", L"nu", L"ge", L"geen", L"omdat", L"iets", L"worden", L"toch", L"al", L"waren", L"veel", L"meer", L"doen", L"toen", L"moet", L"ben", L"zonder", L"kan", L"hun", L"dus", L"alles", L"onder", L"ja", L"eens", L"hier", L"wie", L"werd", L"altijd", L"doch", L"wordt", L"wezen", L"kunnen", L"ons", L"zelf", L"tegen", L"na", L"reeds", L"wil", L"kon", L"niets", L"uw", L"iemand", L"geweest", L"andere" }; DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->excltable = HashSet::newInstance(); this->stemdict = MapStringString::newInstance(); this->matchVersion = matchVersion; } DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->excltable = HashSet::newInstance(); this->matchVersion = matchVersion; } DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { this->stoptable = stopwords; this->excltable = exclusions; this->matchVersion = matchVersion; } DutchAnalyzer::~DutchAnalyzer() { } void DutchAnalyzer::initialize() { stemdict.put(L"fiets", L"fiets"); // otherwise fiet stemdict.put(L"bromfiets", L"bromfiets"); // otherwise bromfiet stemdict.put(L"ei", L"eier"); stemdict.put(L"kind", L"kinder"); } const HashSet DutchAnalyzer::getDefaultStopSet() { static HashSet stoptable; if (!stoptable) stoptable = HashSet::newInstance(_DUTCH_STOP_WORDS, _DUTCH_STOP_WORDS + SIZEOF_ARRAY(_DUTCH_STOP_WORDS)); return stoptable; } void DutchAnalyzer::setStemExclusionTable(HashSet exclusions) { excltable = exclusions; setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created } TokenStreamPtr DutchAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = newLucene(result, excltable); return result; } TokenStreamPtr DutchAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { DutchAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); streams->result = newLucene(streams->result, excltable); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } DutchAnalyzerSavedStreams::~DutchAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/nl/DutchStemFilter.cpp000066400000000000000000000044441217574114600310560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "DutchStemFilter.h" #include "DutchStemmer.h" #include "TermAttribute.h" namespace Lucene { DutchStemFilter::DutchStemFilter(TokenStreamPtr input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } DutchStemFilter::DutchStemFilter(TokenStreamPtr input, HashSet exclusiontable) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); this->exclusions = exclusiontable; } DutchStemFilter::DutchStemFilter(TokenStreamPtr input, HashSet exclusiontable, MapStringString stemdictionary) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); this->exclusions = exclusiontable; this->stemmer->setStemDictionary(stemdictionary); } DutchStemFilter::~DutchStemFilter() { } bool DutchStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); // Check the exclusion table. if (!exclusions || !exclusions.contains(term)) { String s(stemmer->stem(term)); // If not stemmed, don't waste the time adjusting the token. if (!s.empty() && s != term) termAtt->setTermBuffer(s); } return true; } else return false; } void DutchStemFilter::setStemmer(DutchStemmerPtr stemmer) { if (stemmer) this->stemmer = stemmer; } void DutchStemFilter::setExclusionSet(HashSet exclusiontable) { this->exclusions = exclusiontable; } void DutchStemFilter::setStemDictionary(MapStringString dict) { if (stemmer) this->stemmer->setStemDictionary(dict); } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/nl/DutchStemmer.cpp000066400000000000000000000215421217574114600304120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include #include "DutchStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { DutchStemmer::DutchStemmer() { removedE = false; R1 = 0; R2 = 0; } DutchStemmer::~DutchStemmer() { } String DutchStemmer::stem(const String& term) { // Use lowercase for medium stemming. buffer = StringUtils::toLower(term); if (!isStemmable()) return buffer; if (stemDict && stemDict.contains(term)) return stemDict.get(term); // Stemming starts here... substitute(); storeYandI(); R1 = getRIndex(0); R1 = std::max((int32_t)3, R1); step1(); step2(); R2 = getRIndex(R1); step3a(); step3b(); step4(); reStoreYandI(); return buffer; } bool DutchStemmer::enEnding() { Collection enend = newCollection(L"ene", L"en"); for (int32_t i = 0; i < enend.size(); ++i) { String end = enend[i]; int32_t index = (int32_t)(buffer.length() - end.length()); if (boost::ends_with(buffer, end) && index >= R1 && isValidEnEnding(index - 1)) { buffer.erase(index, end.length()); unDouble(index); return true; } } return false; } void DutchStemmer::step1() { if (R1 >= (int32_t)buffer.length()) return; int32_t lengthR1 = (int32_t)(buffer.length() - R1); int32_t index; if (boost::ends_with(buffer, L"heden")) { buffer.replace(R1, lengthR1, boost::replace_all_copy(buffer.substr(R1, lengthR1), L"heden", L"heid")); return; } if (enEnding()) return; index = (int32_t)buffer.length() - 2; if (boost::ends_with(buffer, L"se") && index >= R1 && isValidSEnding(index - 1)) { buffer.erase(index, 2); return; } index = (int32_t)(buffer.length() - 1); if (boost::ends_with(buffer, L"s") && index >= R1 && isValidSEnding(index - 1)) buffer.erase(index, 1); } void DutchStemmer::step2() { removedE = false; if (R1 >= (int32_t)buffer.length()) return; int32_t index = (int32_t)(buffer.length() - 1); if (index >= R1 && boost::ends_with(buffer, L"e") && !isVowel(buffer[index - 1])) { buffer.erase(index, 1); unDouble(); removedE = true; } } void DutchStemmer::step3a() { if (R2 >= (int32_t)buffer.length()) return; int32_t index = (int32_t)(buffer.length() - 4); if (boost::ends_with(buffer, L"heid") && index >= R2 && buffer[index - 1] != L'c') { buffer.erase(index, 4); // remove heid enEnding(); } } void DutchStemmer::step3b() { if (R2 >= (int32_t)buffer.length()) return; int32_t index = (int32_t)(buffer.length() - 3); if ((boost::ends_with(buffer, L"end") || boost::ends_with(buffer, L"ing")) && index >= R2) { buffer.erase(index, 3); if (buffer[index - 2] == L'i' && buffer[index - 1] == L'g') { if (buffer[index - 3] != L'e' && index - 2 >= R2) { index -= 2; buffer.erase(index, 2); } } else unDouble(index); return; } index = (int32_t)(buffer.length() - 2); if (boost::ends_with(buffer, L"ig") && index >= R2) { if (buffer[index - 1] != L'e') buffer.erase(index, 2); return; } index = (int32_t)(buffer.length() - 4); if (boost::ends_with(buffer, L"lijk") && index >= R2) { buffer.erase(index, 4); step2(); return; } index = (int32_t)(buffer.length() - 4); if (boost::ends_with(buffer, L"baar") && index >= R2) { buffer.erase(index, 4); return; } index = (int32_t)(buffer.length() - 3); if (boost::ends_with(buffer, L"bar") && index >= R2) { if (removedE) buffer.erase(index, 3); return; } } void DutchStemmer::step4() { if (buffer.length() < 4) return; String end(buffer.substr(buffer.length() - 4)); if (end[1] == end[2] && end[3] != L'I' && end[1] != L'i' && isVowel(end[1]) && !isVowel(end[3]) && !isVowel(end[0])) buffer.erase(buffer.length() - 2, 1); } bool DutchStemmer::isStemmable() { for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { if (!UnicodeUtil::isAlnum(buffer[c])) return false; } return true; } void DutchStemmer::substitute() { for (int32_t i = 0; i < (int32_t)buffer.length(); ++i) { switch (buffer[i]) { case L'\x00e4': case L'\x00e1': buffer[i] = L'a'; break; case L'\x00eb': case L'\x00e9': buffer[i] = L'e'; break; case L'\x00fc': case L'\x00fa': buffer[i] = L'u'; break; case L'\x00ef': case L'i': buffer[i] = L'i'; break; case L'\x00f6': case L'\x00f3': buffer[i] = L'o'; break; } } } bool DutchStemmer::isValidSEnding(int32_t index) { wchar_t c = buffer[index]; if (isVowel(c) || c == L'j') return false; return true; } bool DutchStemmer::isValidEnEnding(int32_t index) { wchar_t c = buffer[index]; if (isVowel(c)) return false; if (c < 3) return false; // ends with "gem"? if (c == L'm' && buffer[index - 2] == L'g' && buffer[index - 1] == L'e') return false; return true; } void DutchStemmer::unDouble() { unDouble((int32_t)buffer.length()); } void DutchStemmer::unDouble(int32_t endIndex) { String s = buffer.substr(0, endIndex); if (boost::ends_with(s, L"kk") || boost::ends_with(s, L"tt") || boost::ends_with(s, L"dd") || boost::ends_with(s, L"nn") || boost::ends_with(s, L"mm") || boost::ends_with(s, L"ff")) buffer.resize(endIndex - 1); } int32_t DutchStemmer::getRIndex(int32_t start) { if (start == 0) start = 1; int32_t i = start; for (; i < (int32_t)buffer.length(); ++i) { // first non-vowel preceded by a vowel if (!isVowel(buffer[i]) && isVowel(buffer[i - 1])) return i + 1; } return i + 1; } void DutchStemmer::storeYandI() { if (buffer[0] == L'y') buffer[0] = L'Y'; int32_t last = (int32_t)(buffer.length() - 1); for (int32_t i = 1; i < last; i++) { switch (buffer[i]) { case L'i': if (isVowel(buffer[i - 1]) && isVowel(buffer[i + 1])) buffer[i] = L'I'; break; case L'y': if (isVowel(buffer[i - 1])) buffer[i] = L'Y'; break; } } if (last > 0 && buffer[last] == L'y' && isVowel(buffer[last - 1])) buffer[last] = L'Y'; } void DutchStemmer::reStoreYandI() { boost::replace_all(buffer, L"I", L"i"); boost::replace_all(buffer, L"Y", L"y"); } bool DutchStemmer::isVowel(wchar_t c) { switch (c) { case L'e': case L'a': case L'o': case L'i': case L'u': case L'y': case L'\x00e8': return true; default: return false; } } void DutchStemmer::setStemDictionary(MapStringString dict) { stemDict = dict; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/reverse/000077500000000000000000000000001217574114600263405ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/reverse/ReverseStringFilter.cpp000066400000000000000000000041341217574114600330160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ReverseStringFilter.h" #include "TermAttribute.h" namespace Lucene { const wchar_t ReverseStringFilter::NOMARKER = (wchar_t)0xffff; /// Example marker character: U+0001 (START OF HEADING) const wchar_t ReverseStringFilter::START_OF_HEADING_MARKER = (wchar_t)0x0001; /// Example marker character: U+001F (INFORMATION SEPARATOR ONE) const wchar_t ReverseStringFilter::INFORMATION_SEPARATOR_MARKER = (wchar_t)0x001f; /// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) const wchar_t ReverseStringFilter::PUA_EC00_MARKER = (wchar_t)0xec00; /// Example marker character: U+200F (RIGHT-TO-LEFT MARK) const wchar_t ReverseStringFilter::RTL_DIRECTION_MARKER = (wchar_t)0x200f; ReverseStringFilter::ReverseStringFilter(TokenStreamPtr input) : TokenFilter(input) { this->marker = NOMARKER; termAtt = addAttribute(); } ReverseStringFilter::ReverseStringFilter(TokenStreamPtr input, wchar_t marker) : TokenFilter(input) { this->marker = marker; termAtt = addAttribute(); } ReverseStringFilter::~ReverseStringFilter() { } bool ReverseStringFilter::incrementToken() { if (input->incrementToken()) { int32_t len = termAtt->termLength(); if (marker != NOMARKER) { ++len; termAtt->resizeTermBuffer(len); termAtt->termBuffer()[len - 1] = marker; } CharArray term(termAtt->termBuffer()); std::reverse(term.get(), term.get() + len); termAtt->setTermLength(len); return true; } else return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ru/000077500000000000000000000000001217574114600253135ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ru/RussianAnalyzer.cpp000066400000000000000000000164761217574114600311670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianAnalyzer.h" #include "RussianLetterTokenizer.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "RussianStemFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Russian stopwords in UTF-8 format. const uint8_t RussianAnalyzer::DEFAULT_STOPWORD_FILE[] = { 0xd0, 0xb0, 0x0a, 0xd0, 0xb1, 0xd0, 0xb5, 0xd0, 0xb7, 0x0a, 0xd0, 0xb1, 0xd0, 0xbe, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xb0, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xbe, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd1, 0x82, 0xd1, 0x8c, 0x0a, 0xd0, 0xb2, 0x0a, 0xd0, 0xb2, 0xd0, 0xb0, 0xd0, 0xbc, 0x0a, 0xd0, 0xb2, 0xd0, 0xb0, 0xd1, 0x81, 0x0a, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x8c, 0x0a, 0xd0, 0xb2, 0xd0, 0xbe, 0x0a, 0xd0, 0xb2, 0xd0, 0xbe, 0xd1, 0x82, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, 0xb5, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, 0xb5, 0xd1, 0x85, 0x0a, 0xd0, 0xb2, 0xd1, 0x8b, 0x0a, 0xd0, 0xb3, 0xd0, 0xb4, 0xd0, 0xb5, 0x0a, 0xd0, 0xb4, 0xd0, 0xb0, 0x0a, 0xd0, 0xb4, 0xd0, 0xb0, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd0, 0xb4, 0xd0, 0xbb, 0xd1, 0x8f, 0x0a, 0xd0, 0xb4, 0xd0, 0xbe, 0x0a, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xb5, 0xd0, 0xb9, 0x0a, 0xd0, 0xb5, 0xd1, 0x8e, 0x0a, 0xd0, 0xb5, 0xd1, 0x81, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd1, 0x8c, 0x0a, 0xd0, 0xb5, 0xd1, 0x89, 0xd0, 0xb5, 0x0a, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd0, 0xb7, 0xd0, 0xb0, 0x0a, 0xd0, 0xb7, 0xd0, 0xb4, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x8c, 0x0a, 0xd0, 0xb8, 0x0a, 0xd0, 0xb8, 0xd0, 0xb7, 0x0a, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xb8, 0xd0, 0xbc, 0x0a, 0xd0, 0xb8, 0xd1, 0x85, 0x0a, 0xd0, 0xba, 0x0a, 0xd0, 0xba, 0xd0, 0xb0, 0xd0, 0xba, 0x0a, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb3, 0xd0, 0xb4, 0xd0, 0xb0, 0x0a, 0xd0, 0xba, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb1, 0xd0, 0xbe, 0x0a, 0xd0, 0xbc, 0xd0, 0xbd, 0xd0, 0xb5, 0x0a, 0xd0, 0xbc, 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb5, 0xd1, 0x82, 0x0a, 0xd0, 0xbc, 0xd1, 0x8b, 0x0a, 0xd0, 0xbd, 0xd0, 0xb0, 0x0a, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xb4, 0xd0, 0xbe, 0x0a, 0xd0, 0xbd, 0xd0, 0xb0, 0xd1, 0x88, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x82, 0x0a, 0xd0, 0xbd, 0xd0, 0xb8, 0x0a, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x85, 0x0a, 0xd0, 0xbd, 0xd0, 0xbe, 0x0a, 0xd0, 0xbd, 0xd1, 0x83, 0x0a, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd0, 0xb1, 0x0a, 0xd0, 0xbe, 0xd0, 0xb4, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xb0, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xb8, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd1, 0x82, 0x0a, 0xd0, 0xbe, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xbd, 0xd1, 0x8c, 0x0a, 0xd0, 0xbf, 0xd0, 0xbe, 0x0a, 0xd0, 0xbf, 0xd0, 0xbe, 0xd0, 0xb4, 0x0a, 0xd0, 0xbf, 0xd1, 0x80, 0xd0, 0xb8, 0x0a, 0xd1, 0x81, 0x0a, 0xd1, 0x81, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb9, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xbc, 0x0a, 0xd1, 0x82, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xb5, 0xd0, 0xbc, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb9, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xbb, 0xd1, 0x8c, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xbc, 0x0a, 0xd1, 0x82, 0xd1, 0x8b, 0x0a, 0xd1, 0x83, 0x0a, 0xd1, 0x83, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd1, 0x85, 0xd0, 0xbe, 0xd1, 0x82, 0xd1, 0x8f, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xb9, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xbc, 0x0a, 0xd1, 0x87, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x87, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb1, 0xd1, 0x8b, 0x0a, 0xd1, 0x87, 0xd1, 0x8c, 0xd0, 0xb5, 0x0a, 0xd1, 0x87, 0xd1, 0x8c, 0xd1, 0x8f, 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xb0, 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x8f, 0x0a }; RussianAnalyzer::RussianAnalyzer(LuceneVersion::Version matchVersion) { this->stopSet = getDefaultStopSet(); this->matchVersion = matchVersion; } RussianAnalyzer::RussianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stopSet = stopwords; this->matchVersion = matchVersion; } RussianAnalyzer::~RussianAnalyzer() { } const HashSet RussianAnalyzer::getDefaultStopSet() { static HashSet stopSet; if (!stopSet) { String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); } return stopSet; } TokenStreamPtr RussianAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); result = newLucene(result); return result; } TokenStreamPtr RussianAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { RussianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(streams->source); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); streams->result = newLucene(streams->result); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } RussianAnalyzerSavedStreams::~RussianAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ru/RussianLetterTokenizer.cpp000066400000000000000000000020631217574114600325170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianLetterTokenizer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { RussianLetterTokenizer::RussianLetterTokenizer(ReaderPtr input) : CharTokenizer(input) { } RussianLetterTokenizer::RussianLetterTokenizer(AttributeSourcePtr source, ReaderPtr input) : CharTokenizer(source, input) { } RussianLetterTokenizer::RussianLetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : CharTokenizer(factory, input) { } RussianLetterTokenizer::~RussianLetterTokenizer() { } bool RussianLetterTokenizer::isTokenChar(wchar_t c) { return (UnicodeUtil::isAlpha(c) || UnicodeUtil::isDigit(c)); } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ru/RussianLowerCaseFilter.cpp000066400000000000000000000021211217574114600324120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianLowerCaseFilter.h" #include "TermAttribute.h" #include "CharFolder.h" namespace Lucene { RussianLowerCaseFilter::RussianLowerCaseFilter(TokenStreamPtr input) : TokenFilter(input) { termAtt = addAttribute(); } RussianLowerCaseFilter::~RussianLowerCaseFilter() { } bool RussianLowerCaseFilter::incrementToken() { if (input->incrementToken()) { wchar_t* buffer = termAtt->termBufferArray(); int32_t length = termAtt->termLength(); for (int32_t i = 0; i < length; ++i) buffer[i] = CharFolder::toLower(buffer[i]); return true; } else return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ru/RussianStemFilter.cpp000066400000000000000000000023011217574114600314360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianStemFilter.h" #include "RussianStemmer.h" #include "TermAttribute.h" namespace Lucene { RussianStemFilter::RussianStemFilter(TokenStreamPtr input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } RussianStemFilter::~RussianStemFilter() { } bool RussianStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); String s(stemmer->stem(term)); if (!s.empty() && s != term) termAtt->setTermBuffer(s); return true; } else return false; } void RussianStemFilter::setStemmer(RussianStemmerPtr stemmer) { if (stemmer) this->stemmer = stemmer; } } LucenePlusPlus-rel_3.0.4/src/contrib/analyzers/common/analysis/ru/RussianStemmer.cpp000066400000000000000000000512041217574114600310020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { const wchar_t RussianStemmer::A = L'\x0430'; const wchar_t RussianStemmer::V = L'\x0432'; const wchar_t RussianStemmer::G = L'\x0433'; const wchar_t RussianStemmer::E = L'\x0435'; const wchar_t RussianStemmer::I = L'\x0438'; const wchar_t RussianStemmer::I_ = L'\x0439'; const wchar_t RussianStemmer::L = L'\x043b'; const wchar_t RussianStemmer::M = L'\x043c'; const wchar_t RussianStemmer::N = L'\x043d'; const wchar_t RussianStemmer::O = L'\x043e'; const wchar_t RussianStemmer::S = L'\x0441'; const wchar_t RussianStemmer::T = L'\x0442'; const wchar_t RussianStemmer::U = L'\x0443'; const wchar_t RussianStemmer::X = L'\x0445'; const wchar_t RussianStemmer::SH = L'\x0448'; const wchar_t RussianStemmer::SHCH = L'\x0449'; const wchar_t RussianStemmer::Y = L'\x044b'; const wchar_t RussianStemmer::SOFT = L'\x044c'; const wchar_t RussianStemmer::AE = L'\x044d'; const wchar_t RussianStemmer::IU = L'\x044e'; const wchar_t RussianStemmer::IA = L'\x044f'; const wchar_t RussianStemmer::vowels[] = {A, E, I, O, U, Y, AE, IU, IA}; RussianStemmer::RussianStemmer() { RV = 0; R1 = 0; R2 = 0; } RussianStemmer::~RussianStemmer() { } Collection RussianStemmer::perfectiveGerundEndings1() { static Collection _perfectiveGerundEndings1; if (!_perfectiveGerundEndings1) { _perfectiveGerundEndings1 = Collection::newInstance(); _perfectiveGerundEndings1.add(String(L"") + V); _perfectiveGerundEndings1.add(String(L"") + V + SH + I); _perfectiveGerundEndings1.add(String(L"") + V + SH + I + S + SOFT); } return _perfectiveGerundEndings1; } Collection RussianStemmer::perfectiveGerund1Predessors() { static Collection _perfectiveGerund1Predessors; if (!_perfectiveGerund1Predessors) { _perfectiveGerund1Predessors = Collection::newInstance(); _perfectiveGerund1Predessors.add(String(L"") + A); _perfectiveGerund1Predessors.add(String(L"") + IA); } return _perfectiveGerund1Predessors; } Collection RussianStemmer::perfectiveGerundEndings2() { static Collection _perfectiveGerundEndings2; if (!_perfectiveGerundEndings2) { _perfectiveGerundEndings2 = Collection::newInstance(); _perfectiveGerundEndings2.add(String(L"") + I + V); _perfectiveGerundEndings2.add(String(L"") + Y + V); _perfectiveGerundEndings2.add(String(L"") + I + V + SH + I); _perfectiveGerundEndings2.add(String(L"") + Y + V + SH + I); _perfectiveGerundEndings2.add(String(L"") + I + V + SH + I + S + SOFT); _perfectiveGerundEndings2.add(String(L"") + Y + V + SH + I + S + SOFT); } return _perfectiveGerundEndings2; } Collection RussianStemmer::adjectiveEndings() { static Collection _adjectiveEndings; if (!_adjectiveEndings) { _adjectiveEndings = Collection::newInstance(); _adjectiveEndings.add(String(L"") + E + E); _adjectiveEndings.add(String(L"") + I + E); _adjectiveEndings.add(String(L"") + Y + E); _adjectiveEndings.add(String(L"") + O + E); _adjectiveEndings.add(String(L"") + E + I_); _adjectiveEndings.add(String(L"") + I + I_); _adjectiveEndings.add(String(L"") + Y + I_); _adjectiveEndings.add(String(L"") + O + I_); _adjectiveEndings.add(String(L"") + E + M); _adjectiveEndings.add(String(L"") + I + M); _adjectiveEndings.add(String(L"") + Y + M); _adjectiveEndings.add(String(L"") + O + M); _adjectiveEndings.add(String(L"") + I + X); _adjectiveEndings.add(String(L"") + Y + X); _adjectiveEndings.add(String(L"") + U + IU); _adjectiveEndings.add(String(L"") + IU + IU); _adjectiveEndings.add(String(L"") + A + IA); _adjectiveEndings.add(String(L"") + IA + IA); _adjectiveEndings.add(String(L"") + O + IU); _adjectiveEndings.add(String(L"") + E + IU); _adjectiveEndings.add(String(L"") + I + M + I); _adjectiveEndings.add(String(L"") + Y + M + I); _adjectiveEndings.add(String(L"") + E + G + O); _adjectiveEndings.add(String(L"") + O + G + O); _adjectiveEndings.add(String(L"") + E + M + U); _adjectiveEndings.add(String(L"") + O + M + U); } return _adjectiveEndings; } Collection RussianStemmer::participleEndings1() { static Collection _participleEndings1; if (!_participleEndings1) { _participleEndings1 = Collection::newInstance(); _participleEndings1.add(String(L"") + SHCH); _participleEndings1.add(String(L"") + E + M); _participleEndings1.add(String(L"") + N + N); _participleEndings1.add(String(L"") + V + SH); _participleEndings1.add(String(L"") + IU + SHCH); } return _participleEndings1; } Collection RussianStemmer::participleEndings2() { static Collection _participleEndings2; if (!_participleEndings2) { _participleEndings2 = Collection::newInstance(); _participleEndings2.add(String(L"") + I + V + SH); _participleEndings2.add(String(L"") + Y + V + SH); _participleEndings2.add(String(L"") + U + IU + SHCH); } return _participleEndings2; } Collection RussianStemmer::participle1Predessors() { static Collection _participle1Predessors; if (!_participle1Predessors) { _participle1Predessors = Collection::newInstance(); _participle1Predessors.add(String(L"") + A); _participle1Predessors.add(String(L"") + IA); } return _participle1Predessors; } Collection RussianStemmer::reflexiveEndings() { static Collection _participle1Predessors; if (!_participle1Predessors) { _participle1Predessors = Collection::newInstance(); _participle1Predessors.add(String(L"") + S + IA); _participle1Predessors.add(String(L"") + S + SOFT); } return _participle1Predessors; } Collection RussianStemmer::verbEndings1() { static Collection _verbEndings1; if (!_verbEndings1) { _verbEndings1 = Collection::newInstance(); _verbEndings1.add(String(L"") + I_); _verbEndings1.add(String(L"") + L); _verbEndings1.add(String(L"") + N); _verbEndings1.add(String(L"") + L + O); _verbEndings1.add(String(L"") + N + O); _verbEndings1.add(String(L"") + E + T); _verbEndings1.add(String(L"") + IU + T); _verbEndings1.add(String(L"") + L + A); _verbEndings1.add(String(L"") + N + A); _verbEndings1.add(String(L"") + L + I); _verbEndings1.add(String(L"") + E + M); _verbEndings1.add(String(L"") + N + Y); _verbEndings1.add(String(L"") + E + T + E); _verbEndings1.add(String(L"") + I_ + T + E); _verbEndings1.add(String(L"") + T + SOFT); _verbEndings1.add(String(L"") + E + SH + SOFT); _verbEndings1.add(String(L"") + N + N + O); } return _verbEndings1; } Collection RussianStemmer::verbEndings2() { static Collection _verbEndings2; if (!_verbEndings2) { _verbEndings2 = Collection::newInstance(); _verbEndings2.add(String(L"") + IU); _verbEndings2.add(String(L"") + U + IU); _verbEndings2.add(String(L"") + E + N); _verbEndings2.add(String(L"") + E + I_); _verbEndings2.add(String(L"") + IA + T); _verbEndings2.add(String(L"") + U + I_); _verbEndings2.add(String(L"") + I + L); _verbEndings2.add(String(L"") + Y + L); _verbEndings2.add(String(L"") + I + M); _verbEndings2.add(String(L"") + Y + M); _verbEndings2.add(String(L"") + I + T); _verbEndings2.add(String(L"") + Y + T); _verbEndings2.add(String(L"") + I + L + A); _verbEndings2.add(String(L"") + Y + L + A); _verbEndings2.add(String(L"") + E + N + A); _verbEndings2.add(String(L"") + I + T + E); _verbEndings2.add(String(L"") + I + L + I); _verbEndings2.add(String(L"") + Y + L + I); _verbEndings2.add(String(L"") + I + L + O); _verbEndings2.add(String(L"") + Y + L + O); _verbEndings2.add(String(L"") + E + N + O); _verbEndings2.add(String(L"") + U + E + T); _verbEndings2.add(String(L"") + U + IU + T); _verbEndings2.add(String(L"") + E + N + Y); _verbEndings2.add(String(L"") + I + T + SOFT); _verbEndings2.add(String(L"") + Y + T + SOFT); _verbEndings2.add(String(L"") + I + SH + SOFT); _verbEndings2.add(String(L"") + E + I_ + T + E); _verbEndings2.add(String(L"") + U + I_ + T + E); } return _verbEndings2; } Collection RussianStemmer::verb1Predessors() { static Collection _verb1Predessors; if (!_verb1Predessors) { _verb1Predessors = Collection::newInstance(); _verb1Predessors.add(String(L"") + A); _verb1Predessors.add(String(L"") + IA); } return _verb1Predessors; } Collection RussianStemmer::nounEndings() { static Collection _nounEndings; if (!_nounEndings) { _nounEndings = Collection::newInstance(); _nounEndings.add(String(L"") + A); _nounEndings.add(String(L"") + U); _nounEndings.add(String(L"") + I_); _nounEndings.add(String(L"") + O); _nounEndings.add(String(L"") + U); _nounEndings.add(String(L"") + E); _nounEndings.add(String(L"") + Y); _nounEndings.add(String(L"") + I); _nounEndings.add(String(L"") + SOFT); _nounEndings.add(String(L"") + IA); _nounEndings.add(String(L"") + E + V); _nounEndings.add(String(L"") + O + V); _nounEndings.add(String(L"") + I + E); _nounEndings.add(String(L"") + SOFT + E); _nounEndings.add(String(L"") + IA + X); _nounEndings.add(String(L"") + I + IU); _nounEndings.add(String(L"") + E + I); _nounEndings.add(String(L"") + I + I); _nounEndings.add(String(L"") + E + I_); _nounEndings.add(String(L"") + O + I_); _nounEndings.add(String(L"") + E + M); _nounEndings.add(String(L"") + A + M); _nounEndings.add(String(L"") + O + M); _nounEndings.add(String(L"") + A + X); _nounEndings.add(String(L"") + SOFT + IU); _nounEndings.add(String(L"") + I + IA); _nounEndings.add(String(L"") + SOFT + IA); _nounEndings.add(String(L"") + I + I_); _nounEndings.add(String(L"") + IA + M); _nounEndings.add(String(L"") + IA + M + I); _nounEndings.add(String(L"") + A + M + I); _nounEndings.add(String(L"") + I + E + I_); _nounEndings.add(String(L"") + I + IA + M); _nounEndings.add(String(L"") + I + E + M); _nounEndings.add(String(L"") + I + IA + X); _nounEndings.add(String(L"") + I + IA + M + I); } return _nounEndings; } Collection RussianStemmer::superlativeEndings() { static Collection _superlativeEndings; if (!_superlativeEndings) { _superlativeEndings = Collection::newInstance(); _superlativeEndings.add(String(L"") + E + I_ + SH); _superlativeEndings.add(String(L"") + E + I_ + SH + E); } return _superlativeEndings; } Collection RussianStemmer::derivationalEndings() { static Collection _derivationalEndings; if (!_derivationalEndings) { _derivationalEndings = Collection::newInstance(); _derivationalEndings.add(String(L"") + O + S + T); _derivationalEndings.add(String(L"") + O + S + T + SOFT); } return _derivationalEndings; } Collection RussianStemmer::doubleN() { static Collection _doubleN; if (!_doubleN) { _doubleN = Collection::newInstance(); _doubleN.add(String(L"") + N + N); } return _doubleN; } String RussianStemmer::stem(const String& input) { markPositions(input); if (RV == 0) return input; // RV wasn't detected, nothing to stem String stemmingZone(input.substr(RV)); // stemming goes on in RV // Step 1 if (!perfectiveGerund(stemmingZone)) { reflexive(stemmingZone); if (!adjectival(stemmingZone)) { if (!verb(stemmingZone)) noun(stemmingZone); } } // Step 2 removeI(stemmingZone); // Step 3 derivational(stemmingZone); // Step 4 superlative(stemmingZone); undoubleN(stemmingZone); removeSoft(stemmingZone); // return result return input.substr(0, RV) + stemmingZone; } String RussianStemmer::stemWord(const String& word) { return newLucene()->stem(word); } bool RussianStemmer::adjectival(String& stemmingZone) { // look for adjective ending in a stemming zone if (!findAndRemoveEnding(stemmingZone, adjectiveEndings())) return false; if (!findAndRemoveEnding(stemmingZone, participleEndings1(), participle1Predessors())) findAndRemoveEnding(stemmingZone, participleEndings2()); return true; } bool RussianStemmer::derivational(String& stemmingZone) { int32_t endingLength = findEnding(stemmingZone, derivationalEndings()); if (endingLength == 0) return false; // no derivational ending found else { // Ensure that the ending locates in R2 if (R2 - RV <= (int32_t)stemmingZone.length() - endingLength) { stemmingZone.resize(stemmingZone.length() - endingLength); return true; } else return false; } } int32_t RussianStemmer::findEnding(String& stemmingZone, int32_t startIndex, Collection theEndingClass) { bool match = false; for (int32_t i = theEndingClass.size() - 1; i >= 0; --i) { String theEnding(theEndingClass[i]); // check if the ending is bigger than stemming zone if (startIndex < (int32_t)theEnding.length() - 1) { match = false; continue; } match = true; int32_t stemmingIndex = startIndex; for (int32_t j = (int32_t)theEnding.length() - 1; j >= 0; --j) { if (stemmingZone[stemmingIndex--] != theEnding[j]) { match = false; break; } } // check if ending was found if (match) return (int32_t)theEndingClass[i].size(); // cut ending } return 0; } int32_t RussianStemmer::findEnding(String& stemmingZone, Collection theEndingClass) { return findEnding(stemmingZone, (int32_t)(stemmingZone.length() - 1), theEndingClass); } bool RussianStemmer::findAndRemoveEnding(String& stemmingZone, Collection theEndingClass) { int32_t endingLength = findEnding(stemmingZone, theEndingClass); if (endingLength == 0) return false; // not found else { stemmingZone.resize(stemmingZone.length() - endingLength); return true; // cut the ending found } } bool RussianStemmer::findAndRemoveEnding(String& stemmingZone, Collection theEndingClass, Collection thePredessors) { int32_t endingLength = findEnding(stemmingZone, theEndingClass); if (endingLength == 0) return false; // not found else { int32_t predessorLength = findEnding(stemmingZone, (int32_t)(stemmingZone.length() - endingLength - 1), thePredessors); if (predessorLength == 0) return false; else { stemmingZone.resize(stemmingZone.length() - endingLength); return true; // cut the ending found } } } void RussianStemmer::markPositions(const String& word) { RV = 0; R1 = 0; R2 = 0; int32_t i = 0; // find RV while ((int32_t)word.length() > i && !isVowel(word[i])) ++i; if ((int32_t)word.length() - 1 < ++i) return; // RV zone is empty RV = i; // find R1 while ((int32_t)word.length() > i && isVowel(word[i])) ++i; if ((int32_t)word.length() - 1 < ++i) return; // R1 zone is empty R1 = i; // find R2 while ((int32_t)word.length() > i && !isVowel(word[i])) ++i; if ((int32_t)word.length() - 1 < ++i) return; // R2 zone is empty while ((int32_t)word.length() > i && isVowel(word[i])) ++i; if ((int32_t)word.length() - 1 < ++i) return; // R2 zone is empty R2 = i; } bool RussianStemmer::isVowel(wchar_t letter) { for (int32_t i = 0; i < SIZEOF_ARRAY(vowels); ++i) { if (letter == vowels[i]) return true; } return false; } bool RussianStemmer::noun(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, nounEndings()); } bool RussianStemmer::perfectiveGerund(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, perfectiveGerundEndings1(), perfectiveGerund1Predessors()) || findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2()); } bool RussianStemmer::reflexive(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, reflexiveEndings()); } bool RussianStemmer::removeI(String& stemmingZone) { if ((int32_t)stemmingZone.length() > 0 && stemmingZone[stemmingZone.length() - 1] == I) { stemmingZone.resize(stemmingZone.length() - 1); return true; } else return false; } bool RussianStemmer::removeSoft(String& stemmingZone) { if ((int32_t)stemmingZone.length() > 0 && stemmingZone[stemmingZone.length() - 1] == SOFT) { stemmingZone.resize(stemmingZone.length() - 1); return true; } return false; } bool RussianStemmer::superlative(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, superlativeEndings()); } bool RussianStemmer::undoubleN(String& stemmingZone) { if (findEnding(stemmingZone, doubleN()) != 0) { stemmingZone.resize(stemmingZone.length() - 1); return true; } else return false; } bool RussianStemmer::verb(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, verbEndings1(), verb1Predessors()) || findAndRemoveEnding(stemmingZone, verbEndings2()); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/000077500000000000000000000000001217574114600220405ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/DefaultEncoder.cpp000066400000000000000000000010771217574114600254350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "DefaultEncoder.h" namespace Lucene { DefaultEncoder::~DefaultEncoder() { } String DefaultEncoder::encodeText(const String& originalText) { return originalText; } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/Encoder.cpp000066400000000000000000000011031217574114600241160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "Encoder.h" namespace Lucene { Encoder::~Encoder() { } String Encoder::encodeText(const String& originalText) { BOOST_ASSERT(false); return L""; // override } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/Formatter.cpp000066400000000000000000000011501217574114600245040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "Formatter.h" namespace Lucene { Formatter::~Formatter() { } String Formatter::highlightTerm(const String& originalText, TokenGroupPtr tokenGroup) { BOOST_ASSERT(false); return L""; // override } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/Fragmenter.cpp000066400000000000000000000013151217574114600246360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "Fragmenter.h" namespace Lucene { Fragmenter::~Fragmenter() { } void Fragmenter::start(const String& originalText, TokenStreamPtr tokenStream) { BOOST_ASSERT(false); // override } bool Fragmenter::isNewFragment() { BOOST_ASSERT(false); return false; // override } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/GradientFormatter.cpp000066400000000000000000000123021217574114600261630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GradientFormatter.h" #include "TokenGroup.h" #include "StringUtils.h" namespace Lucene { GradientFormatter::GradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor) { highlightForeground = (!minForegroundColor.empty() && !maxForegroundColor.empty()); if (highlightForeground) { if (minForegroundColor.length() != 7) boost::throw_exception(IllegalArgumentException(L"minForegroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); if (maxForegroundColor.length() != 7) boost::throw_exception(IllegalArgumentException(L"maxForegroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); fgRMin = hexToInt(minForegroundColor.substr(1, 2)); fgGMin = hexToInt(minForegroundColor.substr(3, 2)); fgBMin = hexToInt(minForegroundColor.substr(5, 2)); fgRMax = hexToInt(maxForegroundColor.substr(1, 2)); fgGMax = hexToInt(maxForegroundColor.substr(3, 2)); fgBMax = hexToInt(maxForegroundColor.substr(5, 2)); } highlightBackground = (!minBackgroundColor.empty() && !maxBackgroundColor.empty()); if (highlightBackground) { if (minBackgroundColor.length() != 7) boost::throw_exception(IllegalArgumentException(L"minBackgroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); if (maxBackgroundColor.length() != 7) boost::throw_exception(IllegalArgumentException(L"maxBackgroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); bgRMin = hexToInt(minBackgroundColor.substr(1, 2)); bgGMin = hexToInt(minBackgroundColor.substr(3, 2)); bgBMin = hexToInt(minBackgroundColor.substr(5, 2)); bgRMax = hexToInt(maxBackgroundColor.substr(1, 2)); bgGMax = hexToInt(maxBackgroundColor.substr(3, 2)); bgBMax = hexToInt(maxBackgroundColor.substr(5, 2)); } this->maxScore = maxScore; } GradientFormatter::~GradientFormatter() { } String GradientFormatter::highlightTerm(const String& originalText, TokenGroupPtr tokenGroup) { if (tokenGroup->getTotalScore() == 0) return originalText; double score = tokenGroup->getTotalScore(); if (score == 0.0) return originalText; StringStream buffer; buffer << L"" << originalText << L""; return buffer.str(); } String GradientFormatter::getForegroundColorString(double score) { int32_t rVal = getColorVal(fgRMin, fgRMax, score); int32_t gVal = getColorVal(fgGMin, fgGMax, score); int32_t bVal = getColorVal(fgBMin, fgBMax, score); StringStream buffer; buffer << L"#" << intToHex(rVal) << intToHex(gVal) << intToHex(bVal); return buffer.str(); } String GradientFormatter::getBackgroundColorString(double score) { int32_t rVal = getColorVal(bgRMin, bgRMax, score); int32_t gVal = getColorVal(bgGMin, bgGMax, score); int32_t bVal = getColorVal(bgBMin, bgBMax, score); StringStream buffer; buffer << L"#" << intToHex(rVal) << intToHex(gVal) << intToHex(bVal); return buffer.str(); } int32_t GradientFormatter::getColorVal(int32_t colorMin, int32_t colorMax, double score) { if (colorMin == colorMax) return colorMin; double scale = std::abs((double)(colorMin - colorMax)); double relScorePercent = std::min(maxScore, score) / maxScore; double colScore = scale * relScorePercent; return std::min(colorMin, colorMax) + (int32_t)colScore; } String GradientFormatter::intToHex(int32_t i) { static const wchar_t* hexDigits = L"0123456789abcdef"; StringStream buffer; buffer << hexDigits[(i & 0xf0) >> 4] << hexDigits[i & 0x0f]; return buffer.str(); } int32_t GradientFormatter::hexToInt(const String& hex) { int32_t len = (int32_t)hex.length(); if (len > 16) boost::throw_exception(NumberFormatException()); int32_t l = 0; for (int32_t i = 0; i < len; ++i) { l <<= 4; int32_t c = (int32_t)StringUtils::toLong(hex.substr(i, 1), 16); if (c < 0) boost::throw_exception(NumberFormatException()); l |= c; } return l; } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/Highlighter.cpp000066400000000000000000000336161217574114600250130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "Highlighter.h" #include "HighlighterScorer.h" #include "SimpleHTMLFormatter.h" #include "DefaultEncoder.h" #include "Scorer.h" #include "TokenStream.h" #include "StringReader.h" #include "Analyzer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "PositionIncrementAttribute.h" #include "TextFragment.h" #include "TokenGroup.h" #include "SimpleFragmenter.h" #include "StringUtils.h" namespace Lucene { const int32_t Highlighter::DEFAULT_MAX_CHARS_TO_ANALYZE = 50 * 1024; Highlighter::Highlighter(HighlighterScorerPtr fragmentScorer) { this->formatter = newLucene(); this->encoder = newLucene(); this->fragmentScorer = fragmentScorer; this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; this->textFragmenter = newLucene(); } Highlighter::Highlighter(FormatterPtr formatter, HighlighterScorerPtr fragmentScorer) { this->formatter = formatter; this->encoder = newLucene(); this->fragmentScorer = fragmentScorer; this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; this->textFragmenter = newLucene(); } Highlighter::Highlighter(FormatterPtr formatter, EncoderPtr encoder, HighlighterScorerPtr fragmentScorer) { this->formatter = formatter; this->encoder = encoder; this->fragmentScorer = fragmentScorer; this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; this->textFragmenter = newLucene(); } Highlighter::~Highlighter() { } String Highlighter::getBestFragment(AnalyzerPtr analyzer, const String& fieldName, const String& text) { TokenStreamPtr tokenStream(analyzer->tokenStream(fieldName, newLucene(text))); return getBestFragment(tokenStream, text); } String Highlighter::getBestFragment(TokenStreamPtr tokenStream, const String& text) { Collection results(getBestFragments(tokenStream,text, 1)); return results.empty() ? L"" : results[0]; } Collection Highlighter::getBestFragments(AnalyzerPtr analyzer, const String& fieldName, const String& text, int32_t maxNumFragments) { TokenStreamPtr tokenStream(analyzer->tokenStream(fieldName, newLucene(text))); return getBestFragments(tokenStream, text, maxNumFragments); } Collection Highlighter::getBestFragments(TokenStreamPtr tokenStream, const String& text, int32_t maxNumFragments) { maxNumFragments = std::max((int32_t)1, maxNumFragments); //sanity check Collection frag(getBestTextFragments(tokenStream, text, true, maxNumFragments)); // Get text Collection fragTexts(Collection::newInstance()); for (int32_t i = 0; i < frag.size(); ++i) { if (frag[i] && frag[i]->getScore() > 0) fragTexts.add(frag[i]->toString()); } return fragTexts; } Collection Highlighter::getBestTextFragments(TokenStreamPtr tokenStream, const String& text, bool merge, int32_t maxNumFragments) { Collection docFrags(Collection::newInstance()); StringBufferPtr newText(newLucene()); TermAttributePtr termAtt(tokenStream->addAttribute()); OffsetAttributePtr offsetAtt(tokenStream->addAttribute()); tokenStream->addAttribute(); tokenStream->reset(); TextFragmentPtr currentFrag(newLucene(newText, newText->length(), docFrags.size())); TokenStreamPtr newStream(fragmentScorer->init(tokenStream)); if (newStream) tokenStream = newStream; fragmentScorer->startFragment(currentFrag); docFrags.add(currentFrag); FragmentQueuePtr fragQueue(newLucene(maxNumFragments)); Collection frag; LuceneException finally; try { textFragmenter->start(text, tokenStream); TokenGroupPtr tokenGroup(newLucene(tokenStream)); String tokenText; int32_t startOffset = 0; int32_t endOffset = 0; int32_t lastEndOffset = 0; for (bool next = tokenStream->incrementToken(); next && offsetAtt->startOffset() < maxDocCharsToAnalyze; next = tokenStream->incrementToken()) { if (offsetAtt->endOffset() > (int32_t)text.length() || offsetAtt->startOffset() > (int32_t)text.length()) boost::throw_exception(RuntimeException(L"InvalidTokenOffsets: Token " + termAtt->term() + L" exceeds length of provided text sized " + StringUtils::toString(text.length()))); if (tokenGroup->numTokens > 0 && tokenGroup->isDistinct()) { // the current token is distinct from previous tokens - markup the cached token group info startOffset = tokenGroup->matchStartOffset; endOffset = tokenGroup->matchEndOffset; tokenText = text.substr(startOffset, endOffset - startOffset); String markedUpText(formatter->highlightTerm(encoder->encodeText(tokenText), tokenGroup)); // store any whitespace etc from between this and last group if (startOffset > lastEndOffset) newText->append(encoder->encodeText(text.substr(lastEndOffset, startOffset - lastEndOffset))); newText->append(markedUpText); lastEndOffset = std::max(endOffset, lastEndOffset); tokenGroup->clear(); // check if current token marks the start of a new fragment if (textFragmenter->isNewFragment()) { currentFrag->setScore(fragmentScorer->getFragmentScore()); // record stats for a new fragment currentFrag->textEndPos = newText->length(); currentFrag = newLucene(newText, newText->length(), docFrags.size()); fragmentScorer->startFragment(currentFrag); docFrags.add(currentFrag); } } tokenGroup->addToken(fragmentScorer->getTokenScore()); } currentFrag->setScore(fragmentScorer->getFragmentScore()); if (tokenGroup->numTokens > 0) { // flush the accumulated text (same code as in above loop) startOffset = tokenGroup->matchStartOffset; endOffset = tokenGroup->matchEndOffset; tokenText = text.substr(startOffset, endOffset - startOffset); String markedUpText(formatter->highlightTerm(encoder->encodeText(tokenText), tokenGroup)); // store any whitespace etc from between this and last group if (startOffset > lastEndOffset) newText->append(encoder->encodeText(text.substr(lastEndOffset, startOffset - lastEndOffset))); newText->append(markedUpText); lastEndOffset = std::max(lastEndOffset, endOffset); } // Test what remains of the original text beyond the point where we stopped analyzing if (lastEndOffset < (int32_t)text.length() && (int32_t)text.length() <= maxDocCharsToAnalyze) { // append it to the last fragment newText->append(encoder->encodeText(text.substr(lastEndOffset))); } currentFrag->textEndPos = newText->length(); // sort the most relevant sections of the text for (Collection::iterator i = docFrags.begin(); i != docFrags.end(); ++i) fragQueue->addOverflow(*i); // return the most relevant fragments frag = Collection::newInstance(fragQueue->size()); for (int32_t i = frag.size() - 1; i >= 0; --i) frag[i] = fragQueue->pop(); // merge any contiguous fragments to improve readability if (merge) { mergeContiguousFragments(frag); Collection fragTexts(Collection::newInstance()); for (int32_t i = 0; i < frag.size(); ++i) { if (frag[i] && frag[i]->getScore() > 0) fragTexts.add(frag[i]); } frag = fragTexts; } } catch (LuceneException& e) { finally = e; } if (tokenStream) { try { tokenStream->close(); } catch (...) { } } finally.throwException(); return frag; } void Highlighter::mergeContiguousFragments(Collection frag) { if (frag.size() > 1) { bool mergingStillBeingDone = false; do { mergingStillBeingDone = false; // initialise loop control flag // for each fragment, scan other frags looking for contiguous blocks for (int32_t i = 0; i < frag.size(); ++i) { if (!frag[i]) continue; // merge any contiguous blocks for (int32_t x = 0; x < frag.size(); ++x) { if (!frag[x]) continue; if (!frag[i]) break; TextFragmentPtr frag1; TextFragmentPtr frag2; int32_t frag1Num = 0; int32_t frag2Num = 0; int32_t bestScoringFragNum = 0; int32_t worstScoringFragNum = 0; // if blocks are contiguous if (frag[i]->follows(frag[x])) { frag1 = frag[x]; frag1Num = x; frag2 = frag[i]; frag2Num = i; } else if (frag[x]->follows(frag[i])) { frag1 = frag[i]; frag1Num = i; frag2 = frag[x]; frag2Num = x; } // merging required if (frag1) { if (frag1->getScore() > frag2->getScore()) { bestScoringFragNum = frag1Num; worstScoringFragNum = frag2Num; } else { bestScoringFragNum = frag2Num; worstScoringFragNum = frag1Num; } frag1->merge(frag2); frag[worstScoringFragNum].reset(); mergingStillBeingDone = true; frag[bestScoringFragNum] = frag1; } } } } while (mergingStillBeingDone); } } String Highlighter::getBestFragments(TokenStreamPtr tokenStream, const String& text, int32_t maxNumFragments, const String& separator) { Collection sections(getBestFragments(tokenStream, text, maxNumFragments)); StringStream result; for (int32_t i = 0; i < sections.size(); ++i) { if (i > 0) result << separator; result << sections[i]; } return result.str(); } int32_t Highlighter::getMaxDocCharsToAnalyze() { return maxDocCharsToAnalyze; } void Highlighter::setMaxDocCharsToAnalyze(int32_t maxDocCharsToAnalyze) { this->maxDocCharsToAnalyze = maxDocCharsToAnalyze; } FragmenterPtr Highlighter::getTextFragmenter() { return textFragmenter; } void Highlighter::setTextFragmenter(FragmenterPtr fragmenter) { textFragmenter = fragmenter; } HighlighterScorerPtr Highlighter::getFragmentScorer() { return fragmentScorer; } void Highlighter::setFragmentScorer(HighlighterScorerPtr scorer) { fragmentScorer = scorer; } EncoderPtr Highlighter::getEncoder() { return encoder; } void Highlighter::setEncoder(EncoderPtr encoder) { this->encoder = encoder; } FragmentQueue::FragmentQueue(int32_t size) : PriorityQueue(size) { } FragmentQueue::~FragmentQueue() { } bool FragmentQueue::lessThan(const TextFragmentPtr& first, const TextFragmentPtr& second) { if (first->getScore() == second->getScore()) return first->fragNum > second->fragNum; else return first->getScore() < second->getScore(); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/HighlighterScorer.cpp000066400000000000000000000017661217574114600261720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "HighlighterScorer.h" namespace Lucene { HighlighterScorer::~HighlighterScorer() { } TokenStreamPtr HighlighterScorer::init(TokenStreamPtr tokenStream) { BOOST_ASSERT(false); return TokenStreamPtr(); // override } void HighlighterScorer::startFragment(TextFragmentPtr newFragment) { BOOST_ASSERT(false); // override } double HighlighterScorer::getTokenScore() { BOOST_ASSERT(false); return 0; // override } double HighlighterScorer::getFragmentScore() { BOOST_ASSERT(false); return 0; // override } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/MapWeightedSpanTerm.cpp000066400000000000000000000021731217574114600264170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "MapWeightedSpanTerm.h" namespace Lucene { MapWeightedSpanTerm::MapWeightedSpanTerm() { map = MapStringWeightedSpanTerm::newInstance(); } MapWeightedSpanTerm::~MapWeightedSpanTerm() { } MapStringWeightedSpanTerm::iterator MapWeightedSpanTerm::begin() { return map.begin(); } MapStringWeightedSpanTerm::iterator MapWeightedSpanTerm::end() { return map.end(); } void MapWeightedSpanTerm::put(const String& key, WeightedSpanTermPtr val) { return map.put(key, val); } WeightedSpanTermPtr MapWeightedSpanTerm::get(const String& key) const { return map.get(key); } void MapWeightedSpanTerm::clear() { map.clear(); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/NullFragmenter.cpp000066400000000000000000000012071217574114600254710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "NullFragmenter.h" namespace Lucene { NullFragmenter::~NullFragmenter() { } void NullFragmenter::start(const String& originalText, TokenStreamPtr tokenStream) { } bool NullFragmenter::isNewFragment() { return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/QueryScorer.cpp000066400000000000000000000124471217574114600250370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "QueryScorer.h" #include "WeightedSpanTerm.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "TokenStream.h" #include "MapWeightedSpanTerm.h" #include "WeightedSpanTermExtractor.h" namespace Lucene { QueryScorer::QueryScorer(QueryPtr query) { init(query, L"", IndexReaderPtr(), true); } QueryScorer::QueryScorer(QueryPtr query, const String& field) { init(query, field, IndexReaderPtr(), true); } QueryScorer::QueryScorer(QueryPtr query, IndexReaderPtr reader, const String& field) { init(query, field, reader, true); } QueryScorer::QueryScorer(QueryPtr query, IndexReaderPtr reader, const String& field, const String& defaultField) { this->defaultField = defaultField; init(query, field, reader, true); } QueryScorer::QueryScorer(QueryPtr query, const String& field, const String& defaultField) { this->defaultField = defaultField; init(query, field, IndexReaderPtr(), true); } QueryScorer::QueryScorer(Collection weightedTerms) { init(QueryPtr(), L"", IndexReaderPtr(), true); this->fieldWeightedSpanTerms = newLucene(); for (int32_t i = 0; i < weightedTerms.size(); ++i) { WeightedSpanTermPtr existingTerm(fieldWeightedSpanTerms->get(weightedTerms[i]->term)); if (!existingTerm || existingTerm->weight < weightedTerms[i]->weight) { // if a term is defined more than once, always use the highest scoring weight fieldWeightedSpanTerms->put(weightedTerms[i]->term, weightedTerms[i]); maxTermWeight = std::max(maxTermWeight, weightedTerms[i]->getWeight()); } } skipInitExtractor = true; } QueryScorer::~QueryScorer() { } void QueryScorer::init(QueryPtr query, const String& field, IndexReaderPtr reader, bool expandMultiTermQuery) { this->totalScore = 0; this->maxTermWeight = 0; this->position = -1; this->skipInitExtractor = false; this->wrapToCaching = true; this->reader = reader; this->expandMultiTermQuery = expandMultiTermQuery; this->query = query; this->field = field; } double QueryScorer::getFragmentScore() { return totalScore; } double QueryScorer::getMaxTermWeight() { return maxTermWeight; } double QueryScorer::getTokenScore() { position += posIncAtt->getPositionIncrement(); String termText(termAtt->term()); WeightedSpanTermPtr weightedSpanTerm(fieldWeightedSpanTerms->get(termText)); if (!weightedSpanTerm) return 0.0; if (weightedSpanTerm->positionSensitive && !weightedSpanTerm->checkPosition(position)) return 0.0; double score = weightedSpanTerm->getWeight(); // found a query term - is it unique in this doc? if (!foundTerms.contains(termText)) { totalScore += score; foundTerms.add(termText); } return score; } TokenStreamPtr QueryScorer::init(TokenStreamPtr tokenStream) { position = -1; termAtt = tokenStream->addAttribute(); posIncAtt = tokenStream->addAttribute(); if (!skipInitExtractor) { if (fieldWeightedSpanTerms) fieldWeightedSpanTerms->clear(); return initExtractor(tokenStream); } return TokenStreamPtr(); } WeightedSpanTermPtr QueryScorer::getWeightedSpanTerm(const String& token) { return fieldWeightedSpanTerms->get(token); } TokenStreamPtr QueryScorer::initExtractor(TokenStreamPtr tokenStream) { WeightedSpanTermExtractorPtr qse(newLucene(defaultField)); qse->setExpandMultiTermQuery(expandMultiTermQuery); qse->setWrapIfNotCachingTokenFilter(wrapToCaching); if (!reader) this->fieldWeightedSpanTerms = qse->getWeightedSpanTerms(query, tokenStream, field); else this->fieldWeightedSpanTerms = qse->getWeightedSpanTermsWithScores(query, tokenStream, field, reader); if (qse->isCachedTokenStream()) return qse->getTokenStream(); return TokenStreamPtr(); } void QueryScorer::startFragment(TextFragmentPtr newFragment) { foundTerms = HashSet::newInstance(); totalScore = 0; } bool QueryScorer::isExpandMultiTermQuery() { return expandMultiTermQuery; } void QueryScorer::setExpandMultiTermQuery(bool expandMultiTermQuery) { this->expandMultiTermQuery = expandMultiTermQuery; } void QueryScorer::setWrapIfNotCachingTokenFilter(bool wrap) { this->wrapToCaching = wrap; } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/QueryTermExtractor.cpp000066400000000000000000000101511217574114600263730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "QueryTermExtractor.h" #include "Term.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "FilteredQuery.h" #include "WeightedTerm.h" #include "IndexReader.h" #include "MiscUtils.h" namespace Lucene { QueryTermExtractor::~QueryTermExtractor() { } Collection QueryTermExtractor::getTerms(QueryPtr query) { return getTerms(query, false); } Collection QueryTermExtractor::getIdfWeightedTerms(QueryPtr query, IndexReaderPtr reader, const String& fieldName) { Collection terms(getTerms(query, false, fieldName)); int32_t totalNumDocs = reader->numDocs(); for (int32_t i = 0; i < terms.size(); ++i) { try { int32_t docFreq = reader->docFreq(newLucene(fieldName, terms[i]->term)); // docFreq counts deletes if (totalNumDocs < docFreq) docFreq = totalNumDocs; // IDF algorithm taken from DefaultSimilarity class double idf = (double)(std::log((double)totalNumDocs / (double)(docFreq + 1)) + 1.0); terms[i]->weight *= idf; } catch (...) { // ignore } } return terms; } Collection QueryTermExtractor::getTerms(QueryPtr query, bool prohibited, const String& fieldName) { SetWeightedTerm terms(SetWeightedTerm::newInstance()); getTerms(query, terms, prohibited, fieldName); return Collection::newInstance(terms.begin(), terms.end()); } Collection QueryTermExtractor::getTerms(QueryPtr query, bool prohibited) { SetWeightedTerm terms(SetWeightedTerm::newInstance()); getTerms(query, terms, prohibited, L""); return Collection::newInstance(terms.begin(), terms.end()); } void QueryTermExtractor::getTerms(QueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName) { try { if (MiscUtils::typeOf(query)) getTermsFromBooleanQuery(boost::dynamic_pointer_cast(query), terms, prohibited, fieldName); else if (MiscUtils::typeOf(query)) getTermsFromFilteredQuery(boost::dynamic_pointer_cast(query), terms, prohibited, fieldName); else { SetTerm nonWeightedTerms(SetTerm::newInstance()); query->extractTerms(nonWeightedTerms); for (SetTerm::iterator term = nonWeightedTerms.begin(); term != nonWeightedTerms.end(); ++term) { if (fieldName.empty() || (*term)->field() == fieldName) terms.add(newLucene(query->getBoost(), (*term)->text())); } } } catch (UnsupportedOperationException&) { // this is non-fatal for our purposes } } void QueryTermExtractor::getTermsFromBooleanQuery(BooleanQueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName) { Collection queryClauses(query->getClauses()); for (int32_t i = 0; i < queryClauses.size(); ++i) { if (prohibited || queryClauses[i]->getOccur() != BooleanClause::MUST_NOT) getTerms(queryClauses[i]->getQuery(), terms, prohibited, fieldName); } } void QueryTermExtractor::getTermsFromFilteredQuery(FilteredQueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName) { getTerms(query->getQuery(), terms, prohibited, fieldName); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/QueryTermScorer.cpp000066400000000000000000000063151217574114600256640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "QueryTermScorer.h" #include "QueryTermExtractor.h" #include "TermAttribute.h" #include "WeightedTerm.h" #include "TokenStream.h" namespace Lucene { QueryTermScorer::QueryTermScorer(QueryPtr query) { ConstructQueryTermScorer(QueryTermExtractor::getTerms(query)); } QueryTermScorer::QueryTermScorer(QueryPtr query, const String& fieldName) { ConstructQueryTermScorer(QueryTermExtractor::getTerms(query, false, fieldName)); } QueryTermScorer::QueryTermScorer(QueryPtr query, IndexReaderPtr reader, const String& fieldName) { ConstructQueryTermScorer(QueryTermExtractor::getIdfWeightedTerms(query, reader, fieldName)); } QueryTermScorer::QueryTermScorer(Collection weightedTerms) { ConstructQueryTermScorer(weightedTerms); } QueryTermScorer::~QueryTermScorer() { } void QueryTermScorer::ConstructQueryTermScorer(Collection weightedTerms) { totalScore = 0; maxTermWeight = 0; termsToFind = MapStringWeightedTerm::newInstance(); for (int32_t i = 0; i < weightedTerms.size(); ++i) { WeightedTermPtr existingTerm(termsToFind.get(weightedTerms[i]->term)); if (!existingTerm || existingTerm->weight < weightedTerms[i]->weight) { // if a term is defined more than once, always use the highest scoring weight termsToFind.put(weightedTerms[i]->term, weightedTerms[i]); maxTermWeight = std::max(maxTermWeight, weightedTerms[i]->getWeight()); } } } TokenStreamPtr QueryTermScorer::init(TokenStreamPtr tokenStream) { termAtt = tokenStream->addAttribute(); return TokenStreamPtr(); } void QueryTermScorer::startFragment(TextFragmentPtr newFragment) { uniqueTermsInFragment = HashSet::newInstance(); currentTextFragment = newFragment; totalScore = 0; } double QueryTermScorer::getTokenScore() { String termText(termAtt->term()); WeightedTermPtr queryTerm(termsToFind.get(termText)); if (!queryTerm) return 0.0; // not a query term - return // found a query term - is it unique in this doc? if (!uniqueTermsInFragment.contains(termText)) { totalScore += queryTerm->getWeight();; uniqueTermsInFragment.add(termText); } return queryTerm->getWeight(); } double QueryTermScorer::getFragmentScore() { return totalScore; } void QueryTermScorer::allFragmentsProcessed() { // this class has no special operations to perform at end of processing } double QueryTermScorer::getMaxTermWeight() { return maxTermWeight; } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/SimpleFragmenter.cpp000066400000000000000000000030071217574114600260100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SimpleFragmenter.h" #include "TokenGroup.h" #include "OffsetAttribute.h" #include "TokenStream.h" namespace Lucene { const int32_t SimpleFragmenter::DEFAULT_FRAGMENT_SIZE = 100; SimpleFragmenter::SimpleFragmenter() { this->currentNumFrags = 0; this->fragmentSize = DEFAULT_FRAGMENT_SIZE; } SimpleFragmenter::SimpleFragmenter(int32_t fragmentSize) { this->currentNumFrags = 0; this->fragmentSize = fragmentSize; } SimpleFragmenter::~SimpleFragmenter() { } void SimpleFragmenter::start(const String& originalText, TokenStreamPtr tokenStream) { offsetAtt = tokenStream->addAttribute(); currentNumFrags = 1; } bool SimpleFragmenter::isNewFragment() { bool isNewFrag = (offsetAtt->endOffset() >= (fragmentSize * currentNumFrags)); if (isNewFrag) ++currentNumFrags; return isNewFrag; } int32_t SimpleFragmenter::getFragmentSize() { return fragmentSize; } void SimpleFragmenter::setFragmentSize(int32_t size) { fragmentSize = size; } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/SimpleHTMLEncoder.cpp000066400000000000000000000031231217574114600257610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SimpleHTMLEncoder.h" namespace Lucene { SimpleHTMLEncoder::~SimpleHTMLEncoder() { } String SimpleHTMLEncoder::encodeText(const String& originalText) { return htmlEncode(originalText); } String SimpleHTMLEncoder::htmlEncode(const String& plainText) { if (plainText.empty()) return L""; StringStream result; for (int32_t index = 0; index < (int32_t)plainText.length(); ++index) { wchar_t ch = plainText[index]; switch (ch) { case L'\"': result << L"""; break; case L'&': result << L"&"; break; case L'<': result << L"<"; break; case L'>': result << L">"; break; default: if (ch < 128) result << ch; else result << L"&#" << (int32_t)ch << L";"; break; } } return result.str(); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/SimpleHTMLFormatter.cpp000066400000000000000000000023551217574114600263530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SimpleHTMLFormatter.h" #include "TokenGroup.h" namespace Lucene { const String SimpleHTMLFormatter::DEFAULT_PRE_TAG = L""; const String SimpleHTMLFormatter::DEFAULT_POST_TAG = L""; SimpleHTMLFormatter::SimpleHTMLFormatter() { this->preTag = DEFAULT_PRE_TAG; this->postTag = DEFAULT_POST_TAG; } SimpleHTMLFormatter::SimpleHTMLFormatter(const String& preTag, const String& postTag) { this->preTag = preTag; this->postTag = postTag; } SimpleHTMLFormatter::~SimpleHTMLFormatter() { } String SimpleHTMLFormatter::highlightTerm(const String& originalText, TokenGroupPtr tokenGroup) { if (tokenGroup->getTotalScore() == 0) return originalText; StringStream buffer; buffer << preTag << originalText << postTag; return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/SimpleSpanFragmenter.cpp000066400000000000000000000054431217574114600266400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SimpleSpanFragmenter.h" #include "WeightedSpanTerm.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "QueryScorer.h" #include "TokenStream.h" #include "MiscUtils.h" namespace Lucene { const int32_t SimpleSpanFragmenter::DEFAULT_FRAGMENT_SIZE = 100; SimpleSpanFragmenter::SimpleSpanFragmenter(QueryScorerPtr queryScorer) { this->currentNumFrags = 0; this->position = -1; this->waitForPos = -1; this->textSize = 0; this->queryScorer = queryScorer; this->fragmentSize = DEFAULT_FRAGMENT_SIZE; } SimpleSpanFragmenter::SimpleSpanFragmenter(QueryScorerPtr queryScorer, int32_t fragmentSize) { this->currentNumFrags = 0; this->position = -1; this->waitForPos = -1; this->textSize = 0; this->queryScorer = queryScorer; this->fragmentSize = fragmentSize; } SimpleSpanFragmenter::~SimpleSpanFragmenter() { } bool SimpleSpanFragmenter::isNewFragment() { position += posIncAtt->getPositionIncrement(); if (waitForPos == position) waitForPos = -1; else if (waitForPos != -1) return false; WeightedSpanTermPtr wSpanTerm(queryScorer->getWeightedSpanTerm(termAtt->term())); if (wSpanTerm) { Collection positionSpans(wSpanTerm->getPositionSpans()); for (int32_t i = 0; i < positionSpans.size(); ++i) { if (positionSpans[i]->start == position) { waitForPos = positionSpans[i]->end + 1; break; } } } bool isNewFrag = (offsetAtt->endOffset() >= (fragmentSize * currentNumFrags) && (textSize - offsetAtt->endOffset()) >= MiscUtils::unsignedShift(fragmentSize, 1)); if (isNewFrag) ++currentNumFrags; return isNewFrag; } void SimpleSpanFragmenter::start(const String& originalText, TokenStreamPtr tokenStream) { position = -1; currentNumFrags = 1; textSize = originalText.length(); termAtt = tokenStream->addAttribute(); posIncAtt = tokenStream->addAttribute(); offsetAtt = tokenStream->addAttribute(); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/SpanGradientFormatter.cpp000066400000000000000000000027751217574114600270220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SpanGradientFormatter.h" #include "TokenGroup.h" namespace Lucene { SpanGradientFormatter::SpanGradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor) : GradientFormatter(maxScore, minForegroundColor, maxForegroundColor, minBackgroundColor, maxBackgroundColor) { } SpanGradientFormatter::~SpanGradientFormatter() { } String SpanGradientFormatter::highlightTerm(const String& originalText, TokenGroupPtr tokenGroup) { if (tokenGroup->getTotalScore() == 0) return originalText; double score = tokenGroup->getTotalScore(); if (score == 0.0) return originalText; StringStream buffer; buffer << L"" << originalText << L""; return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/TextFragment.cpp000066400000000000000000000034171217574114600251610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "TextFragment.h" namespace Lucene { TextFragment::TextFragment(StringBufferPtr markedUpText, int32_t textStartPos, int32_t fragNum) { this->markedUpText = markedUpText; this->textStartPos = textStartPos; this->textEndPos = 0; this->fragNum = fragNum; this->score = 0; } TextFragment::~TextFragment() { } void TextFragment::setScore(double score) { this->score = score; } double TextFragment::getScore() { return score; } void TextFragment::merge(TextFragmentPtr frag2) { textEndPos = frag2->textEndPos; score = std::max(score, frag2->score); } bool TextFragment::follows(TextFragmentPtr fragment) { return (textStartPos == fragment->textEndPos); } int32_t TextFragment::getFragNum() { return fragNum; } String TextFragment::toString() { return markedUpText->toString().substr(textStartPos, textEndPos - textStartPos); } StringBuffer::~StringBuffer() { } int32_t StringBuffer::length() { return buffer.str().length(); } String StringBuffer::toString() { return buffer.str(); } void StringBuffer::append(const String& str) { buffer << str; } void StringBuffer::clear() { buffer.str(L""); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/TokenGroup.cpp000066400000000000000000000064071217574114600246500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "TokenGroup.h" #include "OffsetAttribute.h" #include "TermAttribute.h" #include "TokenStream.h" #include "Token.h" namespace Lucene { const int32_t TokenGroup::MAX_NUM_TOKENS_PER_GROUP = 50; TokenGroup::TokenGroup(TokenStreamPtr tokenStream) { offsetAtt = tokenStream->addAttribute(); termAtt = tokenStream->addAttribute(); tokens = Collection::newInstance(MAX_NUM_TOKENS_PER_GROUP); scores = Collection::newInstance(MAX_NUM_TOKENS_PER_GROUP); numTokens = 0; startOffset = 0; endOffset = 0; tot = 0.0; matchStartOffset = 0; matchEndOffset = 0; } TokenGroup::~TokenGroup() { } void TokenGroup::addToken(double score) { if (numTokens < MAX_NUM_TOKENS_PER_GROUP) { int32_t termStartOffset = offsetAtt->startOffset(); int32_t termEndOffset = offsetAtt->endOffset(); if (numTokens == 0) { matchStartOffset = termStartOffset; startOffset = termStartOffset; matchEndOffset = termEndOffset; endOffset = termEndOffset; tot += score; } else { startOffset = std::min(startOffset, termStartOffset); endOffset = std::max(endOffset, termEndOffset); if (score > 0) { if (tot == 0) { matchStartOffset = offsetAtt->startOffset(); matchEndOffset = offsetAtt->endOffset(); } else { matchStartOffset = std::min(matchStartOffset, termStartOffset); matchEndOffset = std::max(matchEndOffset, termEndOffset); } tot += score; } } TokenPtr token(newLucene(termStartOffset, termEndOffset)); token->setTermBuffer(termAtt->term()); tokens[numTokens] = token; scores[numTokens] = score; ++numTokens; } } bool TokenGroup::isDistinct() { return (offsetAtt->startOffset() >= endOffset); } void TokenGroup::clear() { numTokens = 0; tot = 0; } TokenPtr TokenGroup::getToken(int32_t index) { return tokens[index]; } double TokenGroup::getScore(int32_t index) { return scores[index]; } int32_t TokenGroup::getEndOffset() { return endOffset; } int32_t TokenGroup::getNumTokens() { return numTokens; } int32_t TokenGroup::getStartOffset() { return startOffset; } double TokenGroup::getTotalScore() { return tot; } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/TokenSources.cpp000066400000000000000000000167331217574114600252020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "TokenSources.h" #include "IndexReader.h" #include "Document.h" #include "Analyzer.h" #include "TokenStream.h" #include "TermFreqVector.h" #include "TermPositionVector.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "TermVectorOffsetInfo.h" #include "Token.h" #include "StringReader.h" #include "StringUtils.h" namespace Lucene { TokenSources::~TokenSources() { } TokenStreamPtr TokenSources::getAnyTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, DocumentPtr doc, AnalyzerPtr analyzer) { TokenStreamPtr ts; TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); if (tfv) { if (boost::dynamic_pointer_cast(tfv)) ts = getTokenStream(boost::dynamic_pointer_cast(tfv)); } // No token info stored so fall back to analyzing raw content if (!ts) ts = getTokenStream(doc, field, analyzer); return ts; } TokenStreamPtr TokenSources::getAnyTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, AnalyzerPtr analyzer) { TokenStreamPtr ts; TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); if (tfv) { if (boost::dynamic_pointer_cast(tfv)) ts = getTokenStream(boost::dynamic_pointer_cast(tfv)); } // No token info stored so fall back to analyzing raw content if (!ts) ts = getTokenStream(reader, docId, field, analyzer); return ts; } TokenStreamPtr TokenSources::getTokenStream(TermPositionVectorPtr tpv) { // assumes the worst and makes no assumptions about token position sequences. return getTokenStream(tpv, false); } struct lessTokenOffset { inline bool operator()(const TokenPtr& first, const TokenPtr& second) const { if (first->startOffset() < second->startOffset()) return true; return (first->startOffset() > second->endOffset()); } }; TokenStreamPtr TokenSources::getTokenStream(TermPositionVectorPtr tpv, bool tokenPositionsGuaranteedContiguous) { // code to reconstruct the original sequence of Tokens Collection terms(tpv->getTerms()); Collection freq(tpv->getTermFrequencies()); int32_t totalTokens = 0; for (int32_t t = 0; t < freq.size(); ++t) totalTokens += freq[t]; Collection tokensInOriginalOrder(Collection::newInstance(totalTokens)); Collection unsortedTokens; for (int32_t t = 0; t < freq.size(); ++t) { Collection offsets(tpv->getOffsets(t)); if (!offsets) return TokenStreamPtr(); Collection pos; if (tokenPositionsGuaranteedContiguous) { // try get the token position info to speed up assembly of tokens into sorted sequence pos = tpv->getTermPositions(t); } if (!pos) { // tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later if (!unsortedTokens) unsortedTokens = Collection::newInstance(); for (int32_t tp = 0; tp < offsets.size(); ++tp) { TokenPtr token(newLucene(offsets[tp]->getStartOffset(), offsets[tp]->getEndOffset())); token->setTermBuffer(terms[t]); unsortedTokens.add(token); } } else { // We have positions stored and a guarantee that the token position information is contiguous // This may be fast BUT wont work if Tokenizers used which create >1 token in same position or // creates jumps in position numbers - this code would fail under those circumstances // Tokens stored with positions - can use this to index straight into sorted array for (int32_t tp = 0; tp < pos.size(); ++tp) { TokenPtr token(newLucene(terms[t], offsets[tp]->getStartOffset(), offsets[tp]->getEndOffset())); tokensInOriginalOrder[pos[tp]] = token; } } } // If the field has been stored without position data we must perform a sort if (unsortedTokens) { tokensInOriginalOrder = unsortedTokens; std::sort(tokensInOriginalOrder.begin(), tokensInOriginalOrder.end(), lessTokenOffset()); } return newLucene(tokensInOriginalOrder); } TokenStreamPtr TokenSources::getTokenStream(IndexReaderPtr reader, int32_t docId, const String& field) { TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); if (!tfv) boost::throw_exception(IllegalArgumentException(field + L" in doc #" + StringUtils::toString(docId) + L"does not have any term position data stored")); if (boost::dynamic_pointer_cast(tfv)) { TermPositionVectorPtr tpv(boost::dynamic_pointer_cast(reader->getTermFreqVector(docId, field))); return getTokenStream(tpv); } boost::throw_exception(IllegalArgumentException(field + L" in doc #" + StringUtils::toString(docId) + L"does not have any term position data stored")); return TokenStreamPtr(); } TokenStreamPtr TokenSources::getTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, AnalyzerPtr analyzer) { DocumentPtr doc(reader->document(docId)); return getTokenStream(doc, field, analyzer); } TokenStreamPtr TokenSources::getTokenStream(DocumentPtr doc, const String& field, AnalyzerPtr analyzer) { String contents(doc->get(field)); if (contents.empty()) boost::throw_exception(IllegalArgumentException(L"Field " + field + L" in document is not stored and cannot be analyzed")); return getTokenStream(field, contents, analyzer); } TokenStreamPtr TokenSources::getTokenStream(const String& field, const String& contents, AnalyzerPtr analyzer) { return analyzer->tokenStream(field, newLucene(contents)); } StoredTokenStream::StoredTokenStream(Collection tokens) { this->tokens = tokens; this->termAtt = addAttribute(); this->offsetAtt = addAttribute(); } StoredTokenStream::~StoredTokenStream() { } bool StoredTokenStream::incrementToken() { if (currentToken >= tokens.size()) return false; clearAttributes(); TokenPtr token(tokens[currentToken++]); termAtt->setTermBuffer(token->term()); offsetAtt->setOffset(token->startOffset(), token->endOffset()); return true; } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/WeightedSpanTerm.cpp000066400000000000000000000037501217574114600257630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "WeightedSpanTerm.h" namespace Lucene { WeightedSpanTerm::WeightedSpanTerm(double weight, const String& term, bool positionSensitive) : WeightedTerm(weight, term) { this->positionSensitive = positionSensitive; this->positionSpans = Collection::newInstance(); } WeightedSpanTerm::~WeightedSpanTerm() { } bool WeightedSpanTerm::checkPosition(int32_t position) { // There would probably be a slight speed improvement if PositionSpans where kept in some sort of priority queue - // that way this method could bail early without checking each PositionSpan. for (Collection::iterator posSpan = positionSpans.begin(); posSpan != positionSpans.end(); ++posSpan) { if (position >= (*posSpan)->start && position <= (*posSpan)->end) return true; } return false; } void WeightedSpanTerm::addPositionSpans(Collection positionSpans) { this->positionSpans.addAll(positionSpans.begin(), positionSpans.end()); } bool WeightedSpanTerm::isPositionSensitive() { return positionSensitive; } void WeightedSpanTerm::setPositionSensitive(bool positionSensitive) { this->positionSensitive = positionSensitive; } Collection WeightedSpanTerm::getPositionSpans() { return positionSpans; } PositionSpan::PositionSpan(int32_t start, int32_t end) { this->start = start; this->end = end; } PositionSpan::~PositionSpan() { } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/WeightedSpanTermExtractor.cpp000066400000000000000000000463441217574114600276650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "WeightedSpanTermExtractor.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "PhraseQuery.h" #include "Term.h" #include "SpanQuery.h" #include "SpanTermQuery.h" #include "SpanNearQuery.h" #include "TermQuery.h" #include "FilteredQuery.h" #include "DisjunctionMaxQuery.h" #include "MultiTermQuery.h" #include "MultiPhraseQuery.h" #include "WeightedSpanTerm.h" #include "CachingTokenFilter.h" #include "Spans.h" #include "FieldMaskingSpanQuery.h" #include "SpanFirstQuery.h" #include "SpanNotQuery.h" #include "SpanOrQuery.h" #include "MemoryIndex.h" #include "MiscUtils.h" namespace Lucene { WeightedSpanTermExtractor::WeightedSpanTermExtractor(const String& defaultField) { this->defaultField = defaultField; this->expandMultiTermQuery = false; this->cachedTokenStream = false; this->wrapToCaching = true; this->readers = MapStringIndexReader::newInstance(); } WeightedSpanTermExtractor::~WeightedSpanTermExtractor() { } void WeightedSpanTermExtractor::closeReaders() { for (MapStringIndexReader::iterator reader = readers.begin(); reader != readers.end(); ++reader) { try { reader->second->close(); } catch (...) { } } } void WeightedSpanTermExtractor::extract(QueryPtr query, MapWeightedSpanTermPtr terms) { if (MiscUtils::typeOf(query)) { Collection queryClauses(boost::dynamic_pointer_cast(query)->getClauses()); for (int32_t i = 0; i < queryClauses.size(); ++i) { if (!queryClauses[i]->isProhibited()) extract(queryClauses[i]->getQuery(), terms); } } else if (MiscUtils::typeOf(query)) { PhraseQueryPtr phraseQuery(boost::dynamic_pointer_cast(query)); Collection phraseQueryTerms(phraseQuery->getTerms()); Collection clauses(Collection::newInstance(phraseQueryTerms.size())); for (int32_t i = 0; i < phraseQueryTerms.size(); ++i) clauses[i] = newLucene(phraseQueryTerms[i]); int32_t slop = phraseQuery->getSlop(); Collection positions(phraseQuery->getPositions()); // add largest position increment to slop if (!positions.empty()) { int32_t lastPos = positions[0]; int32_t largestInc = 0; int32_t sz = positions.size(); for (int32_t i = 1; i < sz; ++i) { int32_t pos = positions[i]; int32_t inc = pos - lastPos; if (inc > largestInc) largestInc = inc; lastPos = pos; } if (largestInc > 1) slop += largestInc; } bool inorder = (slop == 0); SpanNearQueryPtr sp(newLucene(clauses, slop, inorder)); sp->setBoost(query->getBoost()); extractWeightedSpanTerms(terms, sp); } else if (MiscUtils::typeOf(query)) extractWeightedTerms(terms, query); else if (MiscUtils::typeOf(query)) extractWeightedSpanTerms(terms, boost::dynamic_pointer_cast(query)); else if (MiscUtils::typeOf(query)) extract(boost::dynamic_pointer_cast(query)->getQuery(), terms); else if (MiscUtils::typeOf(query)) { DisjunctionMaxQueryPtr dmq(boost::dynamic_pointer_cast(query)); for (Collection::iterator q = dmq->begin(); q != dmq->end(); ++q) extract(*q, terms); } else if (MiscUtils::typeOf(query) && expandMultiTermQuery) { MultiTermQueryPtr mtq(boost::dynamic_pointer_cast(query)); if (mtq->getRewriteMethod() != MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()) { mtq = boost::dynamic_pointer_cast(mtq->clone()); mtq->setRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); query = mtq; } FakeReaderPtr fReader(newLucene()); MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()->rewrite(fReader, mtq); if (!fReader->field.empty()) { IndexReaderPtr ir(getReaderForField(fReader->field)); extract(query->rewrite(ir), terms); } } else if (MiscUtils::typeOf(query)) { MultiPhraseQueryPtr mpq(boost::dynamic_pointer_cast(query)); Collection< Collection > termArrays(mpq->getTermArrays()); Collection positions(mpq->getPositions()); if (!positions.empty()) { int32_t maxPosition = positions[positions.size() - 1]; for (int32_t i = 0; i < positions.size() - 1; ++i) { if (positions[i] > maxPosition) maxPosition = positions[i]; } Collection< Collection > disjunctLists(Collection< Collection >::newInstance(maxPosition + 1)); int32_t distinctPositions = 0; for (int32_t i = 0; i < termArrays.size(); ++i) { Collection termArray(termArrays[i]); Collection disjuncts(disjunctLists[positions[i]]); if (!disjuncts) { disjunctLists[positions[i]] = Collection::newInstance(); disjuncts = disjunctLists[positions[i]]; ++distinctPositions; } for (int32_t j = 0; j < termArray.size(); ++j) disjuncts.add(newLucene(termArray[j])); } int32_t positionGaps = 0; int32_t position = 0; Collection clauses(Collection::newInstance(distinctPositions)); for (int32_t i = 0; i < disjunctLists.size(); ++i) { Collection disjuncts(disjunctLists[i]); if (disjuncts) clauses[position++] = newLucene(disjuncts); else ++positionGaps; } int32_t slop = mpq->getSlop(); bool inorder = (slop == 0); SpanNearQueryPtr sp(newLucene(clauses, slop + positionGaps, inorder)); sp->setBoost(query->getBoost()); extractWeightedSpanTerms(terms, sp); } } } void WeightedSpanTermExtractor::extractWeightedSpanTerms(MapWeightedSpanTermPtr terms, SpanQueryPtr spanQuery) { HashSet fieldNames(HashSet::newInstance()); if (fieldName.empty()) collectSpanQueryFields(spanQuery, fieldNames); else fieldNames.add(fieldName); // To support the use of the default field name if (!defaultField.empty()) fieldNames.add(defaultField); MapStringSpanQuery queries(MapStringSpanQuery::newInstance()); SetTerm nonWeightedTerms(SetTerm::newInstance()); bool rewriteQuery = mustRewriteQuery(spanQuery); if (rewriteQuery) { for (HashSet::iterator field = fieldNames.begin(); field != fieldNames.end(); ++field) { SpanQueryPtr rewrittenQuery(boost::dynamic_pointer_cast(spanQuery->rewrite(getReaderForField(*field)))); queries.put(*field, rewrittenQuery); rewrittenQuery->extractTerms(nonWeightedTerms); } } else spanQuery->extractTerms(nonWeightedTerms); Collection spanPositions(Collection::newInstance()); for (HashSet::iterator field = fieldNames.begin(); field != fieldNames.end(); ++field) { IndexReaderPtr reader(getReaderForField(*field)); SpansPtr spans; if (rewriteQuery) spans = queries.get(*field)->getSpans(reader); else spans = spanQuery->getSpans(reader); // collect span positions while (spans->next()) spanPositions.add(newLucene(spans->start(), spans->end() - 1)); } if (spanPositions.empty()) { // no spans found return; } for (SetTerm::iterator queryTerm = nonWeightedTerms.begin(); queryTerm != nonWeightedTerms.end(); ++queryTerm) { if (fieldNameComparator((*queryTerm)->field())) { WeightedSpanTermPtr weightedSpanTerm(terms->get((*queryTerm)->text())); if (!weightedSpanTerm) { weightedSpanTerm = newLucene(spanQuery->getBoost(), (*queryTerm)->text()); weightedSpanTerm->addPositionSpans(spanPositions); weightedSpanTerm->positionSensitive = true; terms->put((*queryTerm)->text(), weightedSpanTerm); } else { if (!spanPositions.empty()) weightedSpanTerm->addPositionSpans(spanPositions); } } } } void WeightedSpanTermExtractor::extractWeightedTerms(MapWeightedSpanTermPtr terms, QueryPtr query) { SetTerm nonWeightedTerms(SetTerm::newInstance()); query->extractTerms(nonWeightedTerms); for (SetTerm::iterator queryTerm = nonWeightedTerms.begin(); queryTerm != nonWeightedTerms.end(); ++queryTerm) { if (fieldNameComparator((*queryTerm)->field())) { WeightedSpanTermPtr weightedSpanTerm(newLucene(query->getBoost(), (*queryTerm)->text())); terms->put((*queryTerm)->text(), weightedSpanTerm); } } } bool WeightedSpanTermExtractor::fieldNameComparator(const String& fieldNameToCheck) { return (fieldName.empty() || fieldNameToCheck == fieldName || fieldNameToCheck == defaultField); } IndexReaderPtr WeightedSpanTermExtractor::getReaderForField(const String& field) { if (wrapToCaching && !cachedTokenStream && !MiscUtils::typeOf(tokenStream)) { tokenStream = newLucene(tokenStream); cachedTokenStream = true; } IndexReaderPtr reader(readers.get(field)); if (!reader) { MemoryIndexPtr indexer(newLucene()); indexer->addField(field, tokenStream); tokenStream->reset(); IndexSearcherPtr searcher(indexer->createSearcher()); reader = searcher->getIndexReader(); readers.put(field, reader); } return reader; } MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTerms(QueryPtr query, TokenStreamPtr tokenStream) { return getWeightedSpanTerms(query, tokenStream, L""); } MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTerms(QueryPtr query, TokenStreamPtr tokenStream, const String& fieldName) { if (!fieldName.empty()) this->fieldName = fieldName; else this->fieldName.clear(); MapWeightedSpanTermPtr terms(newLucene()); this->tokenStream = tokenStream; LuceneException finally; try { extract(query, terms); } catch (LuceneException& e) { finally = e; } closeReaders(); finally.throwException(); return terms; } MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTermsWithScores(QueryPtr query, TokenStreamPtr tokenStream, const String& fieldName, IndexReaderPtr reader) { if (!fieldName.empty()) this->fieldName = fieldName; else this->fieldName.clear(); MapWeightedSpanTermPtr terms(newLucene()); extract(query, terms); int32_t totalNumDocs = reader->numDocs(); LuceneException finally; try { for (MapStringWeightedSpanTerm::iterator weightedSpanTerm = terms->begin(); weightedSpanTerm != terms->end(); ++weightedSpanTerm) { int32_t docFreq = reader->docFreq(newLucene(fieldName, weightedSpanTerm->second->term)); // docFreq counts deletes if (totalNumDocs < docFreq) docFreq = totalNumDocs; // IDF algorithm taken from DefaultSimilarity class double idf = (double)(std::log((double)totalNumDocs / (double)(docFreq + 1)) + 1.0); weightedSpanTerm->second->weight *= idf; } } catch (LuceneException& e) { finally = e; } closeReaders(); finally.throwException(); return terms; } void WeightedSpanTermExtractor::collectSpanQueryFields(SpanQueryPtr spanQuery, HashSet fieldNames) { if (MiscUtils::typeOf(spanQuery)) collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getMaskedQuery(), fieldNames); else if (MiscUtils::typeOf(spanQuery)) collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getMatch(), fieldNames); else if (MiscUtils::typeOf(spanQuery)) { Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) collectSpanQueryFields(*clause, fieldNames); } else if (MiscUtils::typeOf(spanQuery)) collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getInclude(), fieldNames); else if (MiscUtils::typeOf(spanQuery)) { Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) collectSpanQueryFields(*clause, fieldNames); } else fieldNames.add(spanQuery->getField()); } bool WeightedSpanTermExtractor::mustRewriteQuery(SpanQueryPtr spanQuery) { if (!expandMultiTermQuery) return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery. else if (MiscUtils::typeOf(spanQuery)) return mustRewriteQuery(boost::dynamic_pointer_cast(spanQuery)->getMaskedQuery()); else if (MiscUtils::typeOf(spanQuery)) return mustRewriteQuery(boost::dynamic_pointer_cast(spanQuery)->getMatch()); else if (MiscUtils::typeOf(spanQuery)) { Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (mustRewriteQuery(*clause)) return true; } return false; } else if (MiscUtils::typeOf(spanQuery)) { SpanNotQueryPtr spanNotQuery(boost::dynamic_pointer_cast(spanQuery)); return mustRewriteQuery(spanNotQuery->getInclude()) || mustRewriteQuery(spanNotQuery->getExclude()); } else if (MiscUtils::typeOf(spanQuery)) { Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (mustRewriteQuery(*clause)) return true; } return false; } else if (MiscUtils::typeOf(spanQuery)) return false; else return true; } bool WeightedSpanTermExtractor::getExpandMultiTermQuery() { return expandMultiTermQuery; } void WeightedSpanTermExtractor::setExpandMultiTermQuery(bool expandMultiTermQuery) { this->expandMultiTermQuery = expandMultiTermQuery; } bool WeightedSpanTermExtractor::isCachedTokenStream() { return cachedTokenStream; } TokenStreamPtr WeightedSpanTermExtractor::getTokenStream() { return tokenStream; } void WeightedSpanTermExtractor::setWrapIfNotCachingTokenFilter(bool wrap) { this->wrapToCaching = wrap; } PositionCheckingMap::~PositionCheckingMap() { } void PositionCheckingMap::put(const String& key, WeightedSpanTermPtr val) { MapStringWeightedSpanTerm::iterator prev = map.find(key); if (prev == map.end()) { map.put(key, val); return; } bool positionSensitive = prev->second->positionSensitive; prev->second = val; if (!positionSensitive) prev->second->positionSensitive = false; } FakeReader::FakeReader() : FilterIndexReader(EMPTY_MEMORY_INDEX_READER()) { } FakeReader::~FakeReader() { } IndexReaderPtr FakeReader::EMPTY_MEMORY_INDEX_READER() { static IndexReaderPtr _EMPTY_MEMORY_INDEX_READER; if (!_EMPTY_MEMORY_INDEX_READER) { _EMPTY_MEMORY_INDEX_READER = newLucene()->createSearcher()->getIndexReader(); CycleCheck::addStatic(_EMPTY_MEMORY_INDEX_READER); } return _EMPTY_MEMORY_INDEX_READER; } TermEnumPtr FakeReader::terms(TermPtr t) { // only set first fieldname if (t && field.empty()) field = t->field(); return FilterIndexReader::terms(t); } } LucenePlusPlus-rel_3.0.4/src/contrib/highlighter/WeightedTerm.cpp000066400000000000000000000016531217574114600251410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "WeightedTerm.h" namespace Lucene { WeightedTerm::WeightedTerm(double weight, const String& term) { this->weight = weight; this->term = term; } WeightedTerm::~WeightedTerm() { } String WeightedTerm::getTerm() { return term; } double WeightedTerm::getWeight() { return weight; } void WeightedTerm::setTerm(const String& term) { this->term = term; } void WeightedTerm::setWeight(double weight) { this->weight = weight; } } LucenePlusPlus-rel_3.0.4/src/contrib/include/000077500000000000000000000000001217574114600211655ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/include/ArabicAnalyzer.h000066400000000000000000000063441217574114600242340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICANALYZER_H #define ARABICANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Arabic. /// /// This analyzer implements light-stemming as specified by: /// Light Stemming for Arabic Information Retrieval /// /// http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf /// /// The analysis package contains three primary components: ///
    ///
  • {@link ArabicNormalizationFilter}: Arabic orthographic normalization. ///
  • {@link ArabicStemFilter}: Arabic light stemming. ///
  • Arabic stop words file: a set of default Arabic stop words. ///
class LPPCONTRIBAPI ArabicAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. ArabicAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. ArabicAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~ArabicAnalyzer(); LUCENE_CLASS(ArabicAnalyzer); public: /// Default Arabic stopwords in UTF-8 format. /// /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html /// The stopword list is BSD-Licensed. static const uint8_t DEFAULT_STOPWORD_FILE[]; protected: /// Contains the stopwords used with the StopFilter. HashSet stoptable; LuceneVersion::Version matchVersion; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and /// {@link ArabicStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and /// {@link ArabicStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI ArabicAnalyzerSavedStreams : public LuceneObject { public: virtual ~ArabicAnalyzerSavedStreams(); LUCENE_CLASS(ArabicAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ArabicLetterTokenizer.h000066400000000000000000000027701217574114600256000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICLETTERTOKENIZER_H #define ARABICLETTERTOKENIZER_H #include "LuceneContrib.h" #include "LetterTokenizer.h" namespace Lucene { /// Tokenizer that breaks text into runs of letters and diacritics. /// /// The problem with the standard Letter tokenizer is that it fails on diacritics. /// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc. /// class LPPCONTRIBAPI ArabicLetterTokenizer : public LetterTokenizer { public: /// Construct a new ArabicLetterTokenizer. ArabicLetterTokenizer(ReaderPtr input); /// Construct a new ArabicLetterTokenizer using a given {@link AttributeSource}. ArabicLetterTokenizer(AttributeSourcePtr source, ReaderPtr input); /// Construct a new ArabicLetterTokenizer using a given {@link AttributeFactory}. ArabicLetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input); virtual ~ArabicLetterTokenizer(); LUCENE_CLASS(ArabicLetterTokenizer); public: /// Allows for Letter category or NonspacingMark category virtual bool isTokenChar(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ArabicNormalizationFilter.h000066400000000000000000000017471217574114600264450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICNORMALIZATIONFILTER_H #define ARABICNORMALIZATIONFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography. class LPPCONTRIBAPI ArabicNormalizationFilter : public TokenFilter { public: ArabicNormalizationFilter(TokenStreamPtr input); virtual ~ArabicNormalizationFilter(); LUCENE_CLASS(ArabicNormalizationFilter); protected: ArabicNormalizerPtr normalizer; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ArabicNormalizer.h000066400000000000000000000045651217574114600245740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICNORMALIZER_H #define ARABICNORMALIZER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Normalizer for Arabic. /// /// Normalization is done in-place for efficiency, operating on a termbuffer. /// /// Normalization is defined as: ///
    ///
  • Normalization of hamza with alef seat to a bare alef. ///
  • Normalization of teh marbuta to heh ///
  • Normalization of dotless yeh (alef maksura) to yeh. ///
  • Removal of Arabic diacritics (the harakat) ///
  • Removal of tatweel (stretching character). ///
class LPPCONTRIBAPI ArabicNormalizer : public LuceneObject { public: virtual ~ArabicNormalizer(); LUCENE_CLASS(ArabicNormalizer); public: static const wchar_t ALEF; static const wchar_t ALEF_MADDA; static const wchar_t ALEF_HAMZA_ABOVE; static const wchar_t ALEF_HAMZA_BELOW; static const wchar_t YEH; static const wchar_t DOTLESS_YEH; static const wchar_t TEH_MARBUTA; static const wchar_t HEH; static const wchar_t TATWEEL; static const wchar_t FATHATAN; static const wchar_t DAMMATAN; static const wchar_t KASRATAN; static const wchar_t FATHA; static const wchar_t DAMMA; static const wchar_t KASRA; static const wchar_t SHADDA; static const wchar_t SUKUN; public: /// Normalize an input buffer of Arabic text /// @param s input buffer /// @param len length of input buffer /// @return length of input buffer after normalization int32_t normalize(wchar_t* s, int32_t len); /// Delete a character in-place /// @param s Input Buffer /// @param pos Position of character to delete /// @param len length of input buffer /// @return length of input buffer after deletion int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ArabicStemFilter.h000066400000000000000000000016401217574114600245170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICSTEMFILTER_H #define ARABICSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words. class LPPCONTRIBAPI ArabicStemFilter : public TokenFilter { public: ArabicStemFilter(TokenStreamPtr input); virtual ~ArabicStemFilter(); LUCENE_CLASS(ArabicStemFilter); protected: ArabicStemmerPtr stemmer; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ArabicStemmer.h000066400000000000000000000067761217574114600240740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICSTEMMER_H #define ARABICSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Stemmer for Arabic. /// /// Stemming is done in-place for efficiency, operating on a termbuffer. /// /// Stemming is defined as: ///
    ///
  • Removal of attached definite article, conjunction, and prepositions. ///
  • Stemming of common suffixes. ///
class LPPCONTRIBAPI ArabicStemmer : public LuceneObject { public: virtual ~ArabicStemmer(); LUCENE_CLASS(ArabicStemmer); public: static const wchar_t ALEF; static const wchar_t BEH; static const wchar_t TEH_MARBUTA; static const wchar_t TEH; static const wchar_t FEH; static const wchar_t KAF; static const wchar_t LAM; static const wchar_t NOON; static const wchar_t HEH; static const wchar_t WAW; static const wchar_t YEH; public: static const Collection prefixes(); static const Collection suffixes(); /// Stem an input buffer of Arabic text. /// @param s input buffer /// @param len length of input buffer /// @return length of input buffer after normalization int32_t stem(wchar_t* s, int32_t len); /// Stem a prefix off an Arabic word. /// @param s input buffer /// @param len length of input buffer /// @return new length of input buffer after stemming. int32_t stemPrefix(wchar_t* s, int32_t len); /// Stem suffix(es) off an Arabic word. /// @param s input buffer /// @param len length of input buffer /// @return new length of input buffer after stemming int32_t stemSuffix(wchar_t* s, int32_t len); /// Returns true if the prefix matches and can be stemmed /// @param s input buffer /// @param len length of input buffer /// @param prefix prefix to check /// @return true if the prefix matches and can be stemmed bool startsWith(wchar_t* s, int32_t len, const String& prefix); /// Returns true if the suffix matches and can be stemmed /// @param s input buffer /// @param len length of input buffer /// @param suffix suffix to check /// @return true if the suffix matches and can be stemmed bool endsWith(wchar_t* s, int32_t len, const String& suffix); protected: /// Delete n characters in-place /// @param s Input Buffer /// @param pos Position of character to delete /// @param len Length of input buffer /// @param chars number of characters to delete /// @return length of input buffer after deletion int32_t deleteChars(wchar_t* s, int32_t pos, int32_t len, int32_t chars); /// Delete a character in-place /// @param s Input Buffer /// @param pos Position of character to delete /// @param len length of input buffer /// @return length of input buffer after deletion int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/BrazilianAnalyzer.h000066400000000000000000000065041217574114600247640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BRAZILIANANALYZER_H #define BRAZILIANANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Brazilian Portuguese language. /// /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of /// exclusions (words that will not be stemmed, but indexed). /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI BrazilianAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. BrazilianAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); /// Builds an analyzer with the given stop words and stemming exclusion words. BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); virtual ~BrazilianAnalyzer(); LUCENE_CLASS(BrazilianAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; /// Contains words that should be indexed but not stemmed. HashSet excltable; LuceneVersion::Version matchVersion; /// List of typical Brazilian Portuguese stopwords. static const wchar_t* _BRAZILIAN_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); void setStemExclusionTable(HashSet exclusions); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and {@link BrazilianStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link BrazilianLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StopFilter}, {@link BrazilianNormalizationFilter} and /// {@link BrazilianStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI BrazilianAnalyzerSavedStreams : public LuceneObject { public: virtual ~BrazilianAnalyzerSavedStreams(); LUCENE_CLASS(BrazilianAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/BrazilianStemFilter.h000066400000000000000000000021501217574114600252460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BRAZILIANSTEMFILTER_H #define BRAZILIANSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that applies {@link BrazilianStemmer}. class LPPCONTRIBAPI BrazilianStemFilter : public TokenFilter { public: BrazilianStemFilter(TokenStreamPtr input); BrazilianStemFilter(TokenStreamPtr input, HashSet exclusiontable); virtual ~BrazilianStemFilter(); LUCENE_CLASS(BrazilianStemFilter); protected: /// {@link BrazilianStemmer} in use by this filter. BrazilianStemmerPtr stemmer; HashSet exclusions; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/BrazilianStemmer.h000066400000000000000000000103331217574114600246060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BRAZILIANSTEMMER_H #define BRAZILIANSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A stemmer for Brazilian Portuguese words. class LPPCONTRIBAPI BrazilianStemmer : public LuceneObject { public: virtual ~BrazilianStemmer(); LUCENE_CLASS(BrazilianStemmer); protected: String TERM; String CT; String R1; String R2; String RV; public: /// Stems the given term to a unique discriminator. /// /// @param term The term that should be stemmed. /// @return Discriminator for term. String stem(const String& term); protected: /// Checks a term if it can be processed correctly. /// @return true if, and only if, the given term consists in letters. bool isStemmable(const String& term); /// Checks a term if it can be processed indexed. /// @return true if it can be indexed bool isIndexable(const String& term); /// See if string is 'a','e','i','o','u' /// @return true if is vowel bool isVowel(wchar_t value); /// Gets R1. /// R1 - is the region after the first non-vowel following a vowel, or is the null region at the end of the /// word if there is no such non-vowel. /// @return null or a string representing R1 String getR1(const String& value); /// Gets RV. /// RV - if the second letter is a consonant, RV is the region after the next following vowel, /// /// OR if the first two letters are vowels, RV is the region after the next consonant, /// /// AND otherwise (consonant-vowel case) RV is the region after the third letter. /// /// BUT RV is the end of the word if this positions cannot be found. /// @return null or a string representing RV String getRV(const String& value); /// 1) Turn to lowercase /// 2) Remove accents /// 3) -> a ; -> o /// 4) -> c /// @return null or a string transformed String changeTerm(const String& value); /// Check if a string ends with a suffix. /// @return true if the string ends with the specified suffix. bool checkSuffix(const String& value, const String& suffix); /// Replace a string suffix by another /// @return the replaced String String replaceSuffix(const String& value, const String& toReplace, const String& changeTo); /// Remove a string suffix. /// @return the String without the suffix; String removeSuffix(const String& value, const String& toRemove); /// See if a suffix is preceded by a String. /// @return true if the suffix is preceded. bool suffixPreceded(const String& value, const String& suffix, const String& preceded); /// Creates CT (changed term) , substituting * '' and '' for 'a~' and 'o~'. void createCT(const String& term); /// Standard suffix removal. /// @return false if no ending was removed bool step1(); /// Verb suffixes. /// Search for the longest among the following suffixes in RV, and if found, delete. /// @return false if no ending was removed bool step2(); /// Delete suffix 'i' if in RV and preceded by 'c' void step3(); /// Residual suffix /// If the word ends with one of the suffixes (os a i o ) in RV, delete it. void step4(); /// If the word ends with one of (e ) in RV,delete it, and if preceded by 'gu' (or 'ci') with /// the 'u' (or 'i') in RV, delete the 'u' (or 'i') /// /// Or if the word ends remove the cedilha. void step5(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/CJKAnalyzer.h000066400000000000000000000045601217574114600234600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CJKANALYZER_H #define CJKANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// An {@link Analyzer} that tokenizes text with {@link CJKTokenizer} and filters with {@link StopFilter} class LPPCONTRIBAPI CJKAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. CJKAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. CJKAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~CJKAnalyzer(); LUCENE_CLASS(CJKAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; LuceneVersion::Version matchVersion; /// List of typical English stopwords. static const wchar_t* _STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with {@link StopFilter} virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with {@link StopFilter} virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI CJKAnalyzerSavedStreams : public LuceneObject { public: virtual ~CJKAnalyzerSavedStreams(); LUCENE_CLASS(CJKAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/CJKTokenizer.h000066400000000000000000000066141217574114600236470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CJKTOKENIZER_H #define CJKTOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// CJKTokenizer is designed for Chinese, Japanese, and Korean languages. /// /// The tokens returned are every two adjacent characters with overlap match. /// /// Example: "lucene C1C2C3C4" will be segmented to: "lucene" "C1C2" "C2C3" "C3C4". /// /// Additionally, the following is applied to Latin text (such as English): ///
    ///
  • Text is converted to lowercase. ///
  • Numeric digits, '+', '#', and '_' are tokenized as letters. ///
  • Full-width forms are converted to half-width forms. ///
/// For more info on Asian language (Chinese, Japanese, and Korean) text segmentation: /// please search google class LPPCONTRIBAPI CJKTokenizer : public Tokenizer { public: CJKTokenizer(ReaderPtr input); CJKTokenizer(AttributeSourcePtr source, ReaderPtr input); CJKTokenizer(AttributeFactoryPtr factory, ReaderPtr input); virtual ~CJKTokenizer(); LUCENE_CLASS(CJKTokenizer); public: /// Word token type static const int32_t WORD_TYPE; /// Single byte token type static const int32_t SINGLE_TOKEN_TYPE; /// Double byte token type static const int32_t DOUBLE_TOKEN_TYPE; /// Names for token types static const wchar_t* TOKEN_TYPE_NAMES[]; protected: /// Max word length static const int32_t MAX_WORD_LEN; static const int32_t IO_BUFFER_SIZE; enum UnicodeBlock { NONE, BASIC_LATIN, HALFWIDTH_AND_FULLWIDTH_FORMS }; protected: /// word offset, used to imply which character(in) is parsed int32_t offset; /// the index used only for ioBuffer int32_t bufferIndex; /// data length int32_t dataLen; /// character buffer, store the characters which are used to compose the returned Token CharArray buffer; /// I/O buffer, used to store the content of the input (one of the members of Tokenizer) CharArray ioBuffer; /// word type: single=>ASCII double=>non-ASCII word=>default int32_t tokenType; /// tag: previous character is a cached double-byte character "C1C2C3C4" /// ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened) /// C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4" bool preIsTokened; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; TypeAttributePtr typeAtt; protected: /// return unicode block for given character (see http://unicode.org/Public/UNIDATA/Blocks.txt) UnicodeBlock unicodeBlock(wchar_t c); public: virtual void initialize(); virtual bool incrementToken(); virtual void end(); virtual void reset(); virtual void reset(ReaderPtr input); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ChineseAnalyzer.h000066400000000000000000000033141217574114600244230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHINESEANALYZER_H #define CHINESEANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// An {@link Analyzer} that tokenizes text with {@link ChineseTokenizer} and filters with {@link ChineseFilter} class LPPCONTRIBAPI ChineseAnalyzer : public Analyzer { public: virtual ~ChineseAnalyzer(); LUCENE_CLASS(ChineseAnalyzer); public: /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link ChineseTokenizer}, filtered with {@link ChineseFilter} virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link ChineseTokenizer}, filtered with {@link ChineseFilter} virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI ChineseAnalyzerSavedStreams : public LuceneObject { public: virtual ~ChineseAnalyzerSavedStreams(); LUCENE_CLASS(ChineseAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ChineseFilter.h000066400000000000000000000022531217574114600240640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHINESEFILTER_H #define CHINESEFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} with a stop word table. ///
    ///
  • Numeric tokens are removed. ///
  • English tokens must be larger than 1 character. ///
  • One Chinese character as one Chinese word. ///
class LPPCONTRIBAPI ChineseFilter : public TokenFilter { public: ChineseFilter(TokenStreamPtr input); virtual ~ChineseFilter(); LUCENE_CLASS(ChineseFilter); public: /// Only English now, Chinese to be added later. static const wchar_t* STOP_WORDS[]; protected: HashSet stopTable; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ChineseTokenizer.h000066400000000000000000000050131217574114600246060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHINESETOKENIZER_H #define CHINESETOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// Tokenize Chinese text as individual Chinese characters. /// /// The difference between ChineseTokenizer and ChineseTokenizer is that they have different /// token parsing logic. /// /// For example, if the Chinese text "C1C2C3C4" is to be indexed: ///
    ///
  • The tokens returned from ChineseTokenizer are C1, C2, C3, C4. ///
  • The tokens returned from the ChineseTokenizer are C1C2, C2C3, C3C4. ///
/// /// Therefore the index created by ChineseTokenizer is much larger. /// /// The problem is that when searching for C1, C1C2, C1C3, C4C2, C1C2C3 ... the /// ChineseTokenizer works, but the ChineseTokenizer will not work. class LPPCONTRIBAPI ChineseTokenizer : public Tokenizer { public: ChineseTokenizer(ReaderPtr input); ChineseTokenizer(AttributeSourcePtr source, ReaderPtr input); ChineseTokenizer(AttributeFactoryPtr factory, ReaderPtr input); virtual ~ChineseTokenizer(); LUCENE_CLASS(ChineseTokenizer); protected: /// Max word length static const int32_t MAX_WORD_LEN; static const int32_t IO_BUFFER_SIZE; protected: /// word offset, used to imply which character(in) is parsed int32_t offset; /// the index used only for ioBuffer int32_t bufferIndex; /// data length int32_t dataLen; /// character buffer, store the characters which are used to compose the returned Token CharArray buffer; /// I/O buffer, used to store the content of the input (one of the members of Tokenizer) CharArray ioBuffer; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; int32_t length; int32_t start; public: virtual void initialize(); virtual bool incrementToken(); virtual void end(); virtual void reset(); virtual void reset(ReaderPtr input); protected: void push(wchar_t c); bool flush(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ContribInc.h000066400000000000000000000007411217574114600233720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifdef _WIN32 #include "targetver.h" #define WIN32_LEAN_AND_MEAN #define NOMINMAX #include #endif #include "LuceneContrib.h" LucenePlusPlus-rel_3.0.4/src/contrib/include/CzechAnalyzer.h000066400000000000000000000054211217574114600241020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CZECHANALYZER_H #define CZECHANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Czech language. /// /// Supports an external list of stopwords (words that will not be indexed at all). /// A default set of stopwords is used unless an alternative list is specified. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI CzechAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. CzechAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. CzechAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~CzechAnalyzer(); LUCENE_CLASS(CzechAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; LuceneVersion::Version matchVersion; /// Default Czech stopwords in UTF-8 format. static const uint8_t _CZECH_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link StandardTokenizer}, filtered with {@link StandardFilter}, /// {@link LowerCaseFilter}, and {@link StopFilter} virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link StandardTokenizer}, filtered with {@link StandardFilter}, /// {@link LowerCaseFilter}, and {@link StopFilter} virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI CzechAnalyzerSavedStreams : public LuceneObject { public: virtual ~CzechAnalyzerSavedStreams(); LUCENE_CLASS(CzechAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/DefaultEncoder.h000066400000000000000000000014071217574114600242240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DEFAULTENCODER_H #define DEFAULTENCODER_H #include "Encoder.h" namespace Lucene { /// Simple {@link Encoder} implementation that does not modify the output. class LPPCONTRIBAPI DefaultEncoder : public Encoder, public LuceneObject { public: virtual ~DefaultEncoder(); LUCENE_CLASS(DefaultEncoder); public: virtual String encodeText(const String& originalText); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/DutchAnalyzer.h000066400000000000000000000065611217574114600241230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DUTCHANALYZER_H #define DUTCHANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Dutch language. /// /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of /// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an /// alternative list is specified, but the exclusion list is empty by default. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI DutchAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. DutchAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); /// Builds an analyzer with the given stop words and stemming exclusion words. DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); virtual ~DutchAnalyzer(); LUCENE_CLASS(DutchAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; /// Contains words that should be indexed but not stemmed. HashSet excltable; MapStringString stemdict; LuceneVersion::Version matchVersion; /// List of typical Dutch stopwords. static const wchar_t* _DUTCH_STOP_WORDS[]; public: virtual void initialize(); /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); void setStemExclusionTable(HashSet exclusions); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link StandardFilter}, {@link StopFilter} and {@link DutchStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link StandardFilter}, {@link StopFilter} and {@link DutchStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI DutchAnalyzerSavedStreams : public LuceneObject { public: virtual ~DutchAnalyzerSavedStreams(); LUCENE_CLASS(DutchAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/DutchStemFilter.h000066400000000000000000000045621217574114600244130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DUTCHSTEMFILTER_H #define DUTCHSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that stems Dutch words. /// /// It supports a table of words that should not be stemmed at all. The stemmer used can /// be changed at runtime after the filter object is created (as long as it is a /// {@link DutchStemmer}). /// /// NOTE: This stemmer does not implement the Snowball algorithm correctly, specifically /// doubled consonants. It is recommended that you consider using the "Dutch" stemmer in /// the snowball package instead. This stemmer will likely be deprecated in a future release. class LPPCONTRIBAPI DutchStemFilter : public TokenFilter { public: DutchStemFilter(TokenStreamPtr input); /// Builds a DutchStemFilter that uses an exclusion table. DutchStemFilter(TokenStreamPtr input, HashSet exclusiontable); /// Builds a DutchStemFilter that uses an exclusion table and dictionary of word stem /// pairs, that overrule the algorithm. DutchStemFilter(TokenStreamPtr input, HashSet exclusiontable, MapStringString stemdictionary); virtual ~DutchStemFilter(); LUCENE_CLASS(DutchStemFilter); protected: /// {@link DutchStemmer} in use by this filter. DutchStemmerPtr stemmer; HashSet exclusions; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Set a alternative/custom {@link DutchStemmer} for this filter. void setStemmer(DutchStemmerPtr stemmer); /// Set an alternative exclusion list for this filter. void setExclusionSet(HashSet exclusiontable); /// Set dictionary for stemming, this dictionary overrules the algorithm, so you can /// correct for a particular unwanted word-stem pair. void setStemDictionary(MapStringString dict); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/DutchStemmer.h000066400000000000000000000061051217574114600237440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DUTCHSTEMMER_H #define DUTCHSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A stemmer for Dutch words. /// /// The algorithm is an implementation of the /// dutch stemming /// algorithm in Martin Porter's snowball project. class LPPCONTRIBAPI DutchStemmer : public LuceneObject { public: DutchStemmer(); virtual ~DutchStemmer(); LUCENE_CLASS(DutchStemmer); protected: /// Buffer for the terms while stemming them. String buffer; bool removedE; MapStringString stemDict; int32_t R1; int32_t R2; public: /// Stems the given term to a unique discriminator. /// /// @param term The term that should be stemmed. /// @return Discriminator for term. String stem(const String& term); void setStemDictionary(MapStringString dict); protected: bool enEnding(); void step1(); /// Delete suffix e if in R1 and preceded by a non-vowel, and then undouble the ending. void step2(); /// Delete "heid" void step3a(); /// A d-suffix, or derivational suffix, enables a new word, often with a different grammatical /// category, or with a different sense, to be built from another word. Whether a d-suffix can /// be attached is discovered not from the rules of grammar, but by referring to a dictionary. /// So in English, ness can be added to certain adjectives to form corresponding nouns /// (littleness, kindness, foolishness ...) but not to all adjectives (not for example, to big, /// cruel, wise ...) d-suffixes can be used to change meaning, often in rather exotic ways. /// Remove "ing", "end", "ig", "lijk", "baar" and "bar" void step3b(); /// Undouble vowel. If the words ends CVD, where C is a non-vowel, D is a non-vowel other than /// I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, /// brood -> brod). void step4(); /// Checks if a term could be stemmed. bool isStemmable(); /// Substitute , , , , , , , , , void substitute(); bool isValidSEnding(int32_t index); bool isValidEnEnding(int32_t index); void unDouble(); void unDouble(int32_t endIndex); int32_t getRIndex(int32_t start); void storeYandI(); void reStoreYandI(); bool isVowel(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ElisionFilter.h000066400000000000000000000027031217574114600241100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ELISIONFILTER_H #define ELISIONFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be /// tokenized as "avion" (plane). /// /// Note that {@link StandardTokenizer} sees " ' " as a space, and cuts it out. /// @see Elision in Wikipedia class LPPCONTRIBAPI ElisionFilter : public TokenFilter { public: /// Constructs an elision filter with standard stop words. ElisionFilter(TokenStreamPtr input); /// Constructs an elision filter with a Set of stop words ElisionFilter(TokenStreamPtr input, HashSet articles); virtual ~ElisionFilter(); LUCENE_CLASS(ElisionFilter); protected: static const wchar_t apostrophes[]; CharArraySetPtr articles; TermAttributePtr termAtt; public: void setArticles(HashSet articles); virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/Encoder.h000066400000000000000000000013631217574114600227200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ENCODER_H #define ENCODER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Encodes original text. The Encoder works with the {@link Formatter} to generate output. class LPPCONTRIBAPI Encoder { public: virtual ~Encoder(); LUCENE_INTERFACE(Encoder); public: virtual String encodeText(const String& originalText); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/Formatter.h000066400000000000000000000020231217574114600232760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATTER_H #define FORMATTER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Processes terms found in the original text, typically by applying some form of mark-up to highlight /// terms in HTML search results pages. class LPPCONTRIBAPI Formatter { public: virtual ~Formatter(); LUCENE_INTERFACE(Formatter); public: /// @param originalText The section of text being considered for markup /// @param tokenGroup contains one or several overlapping Tokens along with their scores and positions. virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/Fragmenter.h000066400000000000000000000027721217574114600234400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FRAGMENTER_H #define FRAGMENTER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Implements the policy for breaking text into multiple fragments for consideration by the /// {@link Highlighter} class. A sophisticated implementation may do this on the basis of /// detecting end of sentences in the text. class LPPCONTRIBAPI Fragmenter { public: virtual ~Fragmenter(); LUCENE_INTERFACE(Fragmenter); public: /// Initializes the Fragmenter. You can grab references to the Attributes you are /// interested in from tokenStream and then access the values in {@link #isNewFragment()}. /// @param originalText the original source text. /// @param tokenStream the {@link TokenStream} to be fragmented. virtual void start(const String& originalText, TokenStreamPtr tokenStream); /// Test to see if this token from the stream should be held in a new TextFragment. /// Every time this is called, the TokenStream passed to start(String, TokenStream) /// will have been incremented. virtual bool isNewFragment(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/FrenchAnalyzer.h000066400000000000000000000065361217574114600242630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FRENCHANALYZER_H #define FRENCHANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for French language. /// /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of /// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an /// alternative list is specified, but the exclusion list is empty by default. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI FrenchAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. FrenchAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); /// Builds an analyzer with the given stop words and stemming exclusion words. FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); virtual ~FrenchAnalyzer(); LUCENE_CLASS(FrenchAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; /// Contains words that should be indexed but not stemmed. HashSet excltable; LuceneVersion::Version matchVersion; /// List of typical French stopwords. static const wchar_t* _FRENCH_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); void setStemExclusionTable(HashSet exclusions); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link StandardFilter}, {@link StopFilter}, {@link FrenchStemFilter}, and {@link LowerCaseFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link StandardTokenizer} filtered with /// {@link StandardFilter}, {@link StopFilter}, {@link FrenchStemFilter} and {@link LowerCaseFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI FrenchAnalyzerSavedStreams : public LuceneObject { public: virtual ~FrenchAnalyzerSavedStreams(); LUCENE_CLASS(FrenchAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/FrenchStemFilter.h000066400000000000000000000036421217574114600245470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FRENCHSTEMFILTER_H #define FRENCHSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that stems French words. /// /// It supports a table of words that should not be stemmed at all. The stemmer used can /// be changed at runtime after the filter object is created (as long as it is a /// {@link FrenchStemmer}). /// /// NOTE: This stemmer does not implement the Snowball algorithm correctly, especially /// involving case problems. It is recommended that you consider using the "French" stemmer /// in the snowball package instead. This stemmer will likely be deprecated in a future release. class LPPCONTRIBAPI FrenchStemFilter : public TokenFilter { public: FrenchStemFilter(TokenStreamPtr input); /// Builds a FrenchStemFilter that uses an exclusion table. FrenchStemFilter(TokenStreamPtr input, HashSet exclusiontable); virtual ~FrenchStemFilter(); LUCENE_CLASS(FrenchStemFilter); protected: /// {@link FrenchStemmer} in use by this filter. FrenchStemmerPtr stemmer; HashSet exclusions; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Set a alternative/custom {@link FrenchStemmer} for this filter. void setStemmer(FrenchStemmerPtr stemmer); /// Set an alternative exclusion list for this filter. void setExclusionSet(HashSet exclusiontable); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/FrenchStemmer.h000066400000000000000000000206551217574114600241100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FRENCHSTEMMER_H #define FRENCHSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A stemmer for French words. /// /// The algorithm is based on the work of Dr Martin Porter on his snowball project refer to /// http://snowball.sourceforge.net/french/stemmer.html (French stemming algorithm) for details. class LPPCONTRIBAPI FrenchStemmer : public LuceneObject { public: FrenchStemmer(); virtual ~FrenchStemmer(); LUCENE_CLASS(FrenchStemmer); protected: /// Buffer for the terms while stemming them. String stringBuffer; /// A temporary buffer, used to reconstruct R2. String tempBuffer; /// Region R0 is equal to the whole buffer. String R0; /// Region RV /// /// "If the word begins with two vowels, RV is the region after the third letter, otherwise /// the region after the first vowel not at the beginning of the word, or the end of the /// word if these positions cannot be found." String RV; /// Region R1 /// /// "R1 is the region after the first non-vowel following a vowel or is the null region at /// the end of the word if there is no such non-vowel" String R1; /// Region R2 /// /// "R2 is the region after the first non-vowel in R1 following a vowel or is the null region /// at the end of the word if there is no such non-vowel" String R2; /// Set to true if we need to perform step 2 bool suite; /// Set to true if the buffer was modified bool modified; public: /// Stems the given term to a unique discriminator. /// /// @param term The term that should be stemmed. /// @return Discriminator for term. String stem(const String& term); protected: /// Sets the search region Strings it needs to be done each time the buffer was modified. void setStrings(); /// First step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step1(); /// Second step (A) of the Porter Algorithm. /// Will be performed if nothing changed from the first step or changed were done in the amment, /// emment, ments or ment suffixes. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. /// @return true if something changed in the buffer bool step2a(); /// Second step (B) of the Porter Algorithm. /// Will be performed if step 2 A was performed unsuccessfully. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step2b(); /// Third step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step3(); /// Fourth step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step4(); /// Fifth step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step5(); /// Sixth step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step6(); /// Delete a suffix searched in zone "source" if zone "from" contains prefix + search string. /// @param source String - the primary source zone for search. /// @param search String[] - the strings to search for suppression. /// @param from String - the secondary source zone for search. /// @param prefix String - the prefix to add to the search string to test. /// @return true if modified bool deleteFromIfPrecededIn(const String& source, Collection search, const String& from, const String& prefix); /// Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel. /// @param source String - the primary source zone for search. /// @param search String[] - the strings to search for suppression. /// @param vowel boolean - true if we need a vowel before the search string. /// @param from String - the secondary source zone for search (where vowel could be). /// @return true if modified bool deleteFromIfTestVowelBeforeIn(const String& source, Collection search, bool vowel, const String& from); /// Delete a suffix searched in zone "source" if preceded by the prefix. /// @param source String - the primary source zone for search. /// @param search String[] - the strings to search for suppression. /// @param prefix String - the prefix to add to the search string to test. /// @param without boolean - true if it will be deleted even without prefix found. void deleteButSuffixFrom(const String& source, Collection search, const String& prefix, bool without); /// Delete a suffix searched in zone "source" if preceded by prefix or replace it with the /// replace string if preceded by the prefix in the zone "from" or delete the suffix if specified. /// @param source String - the primary source zone for search. /// @param search String[] - the strings to search for suppression. /// @param prefix String - the prefix to add to the search string to test. /// @param without boolean - true if it will be deleted even without prefix found. void deleteButSuffixFromElseReplace(const String& source, Collection search, const String& prefix, bool without, const String& from, const String& replace); /// Replace a search string with another within the source zone. /// @param source String - the source zone for search. /// @param search String[] - the strings to search for replacement. /// @param replace String - the replacement string. bool replaceFrom(const String& source, Collection search, const String& replace); /// Delete a search string within the source zone. /// @param source the source zone for search. /// @param suffix the strings to search for suppression. void deleteFrom(const String& source, Collection suffix); /// Test if a char is a French vowel, including accentuated ones. /// @param ch the char to test. /// @return true if the char is a vowel bool isVowel(wchar_t ch); /// Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string. /// "R is the region after the first non-vowel following a vowel or is the null region at the /// end of the word if there is no such non-vowel". /// @param buffer the in buffer. /// @return the resulting string. String retrieveR(const String& buffer); /// Retrieve the "RV zone" from a buffer an return the corresponding string. /// "If the word begins with two vowels, RV is the region after the third letter, otherwise the /// region after the first vowel not at the beginning of the word, or the end of the word if /// these positions cannot be found." /// @param buffer the in buffer /// @return the resulting string String retrieveRV(const String& buffer); /// Turns u and i preceded AND followed by a vowel to UpperCase<. /// Turns y preceded OR followed by a vowel to UpperCase. /// Turns u preceded by q to UpperCase. /// @param buffer the buffer to treat void treatVowels(String& buffer); /// Checks a term if it can be processed correctly. /// @return boolean - true if, and only if, the given term consists in letters. bool isStemmable(const String& term); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/GermanAnalyzer.h000066400000000000000000000065731217574114600242700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GERMANANALYZER_H #define GERMANANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for German language. /// /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of /// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an /// alternative list is specified, but the exclusion list is empty by default. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI GermanAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. GermanAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); /// Builds an analyzer with the given stop words and stemming exclusion words. GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); virtual ~GermanAnalyzer(); LUCENE_CLASS(GermanAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stopSet; /// Contains words that should be indexed but not stemmed. HashSet exclusionSet; LuceneVersion::Version matchVersion; /// List of typical German stopwords. static const wchar_t* _GERMAN_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); void setStemExclusionTable(HashSet exclusions); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and {@link GermanStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link GermanLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StopFilter}, {@link GermanNormalizationFilter} and /// {@link GermanStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI GermanAnalyzerSavedStreams : public LuceneObject { public: virtual ~GermanAnalyzerSavedStreams(); LUCENE_CLASS(GermanAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/GermanStemFilter.h000066400000000000000000000031661217574114600245540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GERMANSTEMFILTER_H #define GERMANSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that stems German words. /// /// It supports a table of words that should not be stemmed at all. The stemmer used can /// be changed at runtime after the filter object is created (as long as it is a /// {@link GermanStemmer}). class LPPCONTRIBAPI GermanStemFilter : public TokenFilter { public: GermanStemFilter(TokenStreamPtr input); /// Builds a GermanStemFilter that uses an exclusion table. GermanStemFilter(TokenStreamPtr input, HashSet exclusionSet); virtual ~GermanStemFilter(); LUCENE_CLASS(GermanStemFilter); protected: /// {@link GermanStemmer} in use by this filter. GermanStemmerPtr stemmer; HashSet exclusionSet; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Set a alternative/custom {@link GermanStemmer} for this filter. void setStemmer(GermanStemmerPtr stemmer); /// Set an alternative exclusion list for this filter. void setExclusionSet(HashSet exclusionSet); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/GermanStemmer.h000066400000000000000000000055061217574114600241120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GERMANSTEMMER_H #define GERMANSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A stemmer for German words. /// /// The algorithm is based on the report "A Fast and Simple Stemming Algorithm for German Words" by Jörg /// Caumanns (joerg.caumanns at isst.fhg.de). class LPPCONTRIBAPI GermanStemmer : public LuceneObject { public: GermanStemmer(); virtual ~GermanStemmer(); LUCENE_CLASS(GermanStemmer); protected: /// Buffer for the terms while stemming them. String buffer; /// Amount of characters that are removed with substitute() while stemming. int32_t substCount; public: /// Stems the given term to a unique discriminator. /// /// @param term The term that should be stemmed. /// @return Discriminator for term. String stem(const String& term); protected: /// Checks if a term could be stemmed. /// @return true if, and only if, the given term consists in letters. bool isStemmable(); /// Suffix stripping (stemming) on the current term. The stripping is reduced to the seven "base" /// suffixes "e", "s", "n", "t", "em", "er" and * "nd", from which all regular suffixes are build /// of. The simplification causes some overstemming, and way more irregular stems, but still /// provides unique. /// Discriminators in the most of those cases. /// The algorithm is context free, except of the length restrictions. void strip(); /// Does some optimizations on the term. This optimisations are contextual. void optimize(); /// Removes a particle denotion ("ge") from a term. void removeParticleDenotion(); /// Do some substitutions for the term to reduce overstemming: /// /// - Substitute Umlauts with their corresponding vowel: -> aou, "" is substituted by "ss" /// - Substitute a second char of a pair of equal characters with an asterisk: ?? -> ?* /// - Substitute some common character combinations with a token: sch/ch/ei/ie/ig/st -> $//%/&/#/! void substitute(); /// Undoes the changes made by substitute(). That are character pairs and character combinations. /// Umlauts will remain as their corresponding vowel, as "" remains as "ss". void resubstitute(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/GradientFormatter.h000066400000000000000000000034431217574114600247630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GRADIENTFORMATTER_H #define GRADIENTFORMATTER_H #include "Formatter.h" namespace Lucene { /// Formats text with different color intensity depending on the score of the term. class LPPCONTRIBAPI GradientFormatter : public Formatter, public LuceneObject { public: GradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor); virtual ~GradientFormatter(); LUCENE_CLASS(GradientFormatter); protected: double maxScore; bool highlightForeground; bool highlightBackground; public: int32_t fgRMin; int32_t fgGMin; int32_t fgBMin; int32_t fgRMax; int32_t fgGMax; int32_t fgBMax; int32_t bgRMin; int32_t bgGMin; int32_t bgBMin; int32_t bgRMax; int32_t bgGMax; int32_t bgBMax; public: virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); protected: String getForegroundColorString(double score); String getBackgroundColorString(double score); int32_t getColorVal(int32_t colorMin, int32_t colorMax, double score); static String intToHex(int32_t i); /// Converts a hex string into an int. static int32_t hexToInt(const String& hex); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/GreekAnalyzer.h000066400000000000000000000053541217574114600241100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GREEKANALYZER_H #define GREEKANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Greek language. /// /// Supports an external list of stopwords (words that will not be indexed at all). A default set of stopwords /// is used unless an alternative list is specified. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI GreekAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. GreekAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. GreekAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~GreekAnalyzer(); LUCENE_CLASS(GreekAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stopSet; LuceneVersion::Version matchVersion; /// Default Greek stopwords in UTF-8 format. static const uint8_t _GREEK_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link GreekLowerCaseFilter} and {@link StopFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link GreekLetterTokenizer} filtered with /// {@link GreekLowerCaseFilter} and {@link StopFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI GreekAnalyzerSavedStreams : public LuceneObject { public: virtual ~GreekAnalyzerSavedStreams(); LUCENE_CLASS(GreekAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/GreekLowerCaseFilter.h000066400000000000000000000017761217574114600253610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GREEKLOWERCASEFILTER_H #define GREEKLOWERCASEFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// Normalizes token text to lower case, removes some Greek diacritics, and standardizes /// final sigma to sigma. class LPPCONTRIBAPI GreekLowerCaseFilter : public TokenFilter { public: GreekLowerCaseFilter(TokenStreamPtr input); virtual ~GreekLowerCaseFilter(); LUCENE_CLASS(GreekLowerCaseFilter); protected: TermAttributePtr termAtt; public: virtual bool incrementToken(); protected: wchar_t lowerCase(wchar_t codepoint); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/Highlighter.h000066400000000000000000000153541217574114600236040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HIGHLIGHTER_H #define HIGHLIGHTER_H #include "LuceneContrib.h" #include "PriorityQueue.h" namespace Lucene { /// Class used to markup highlighted terms found in the best sections of a text, using configurable /// {@link Fragmenter}, {@link Scorer}, {@link Formatter}, {@link Encoder} and tokenizers. class LPPCONTRIBAPI Highlighter : public LuceneObject { public: Highlighter(HighlighterScorerPtr fragmentScorer); Highlighter(FormatterPtr formatter, HighlighterScorerPtr fragmentScorer); Highlighter(FormatterPtr formatter, EncoderPtr encoder, HighlighterScorerPtr fragmentScorer); virtual ~Highlighter(); LUCENE_CLASS(Highlighter); public: static const int32_t DEFAULT_MAX_CHARS_TO_ANALYZE; protected: int32_t maxDocCharsToAnalyze; FormatterPtr formatter; EncoderPtr encoder; FragmenterPtr textFragmenter; HighlighterScorerPtr fragmentScorer; public: /// Highlights chosen terms in a text, extracting the most relevant section. This is a convenience /// method that calls {@link #getBestFragment(TokenStreamPtr, const String&)} /// /// @param analyzer The analyzer that will be used to split text into chunks /// @param text Text to highlight terms in /// @param fieldName Name of field used to influence analyzer's tokenization policy /// @return highlighted text fragment or null if no terms found String getBestFragment(AnalyzerPtr analyzer, const String& fieldName, const String& text); /// Highlights chosen terms in a text, extracting the most relevant section. The document text is /// analyzed in chunks to record hit statistics across the document. After accumulating stats, the /// fragment with the highest score is returned. /// /// @param tokenStream A stream of tokens identified in the text parameter, including offset /// information. This is typically produced by an analyzer re-parsing a document's text. Some /// work may be done on retrieving TokenStreams more efficiently by adding support for storing /// original text position data in the Lucene index but this support is not currently available. /// @param text Text to highlight terms in /// @return highlighted text fragment or null if no terms found String getBestFragment(TokenStreamPtr tokenStream, const String& text); /// Highlights chosen terms in a text, extracting the most relevant sections. This is a convenience /// method that calls {@link #getBestFragments(TokenStreamPtr, const String&, int32_t)} /// /// @param analyzer The analyzer that will be used to split text into chunks /// @param fieldName The name of the field being highlighted (used by analyzer) /// @param text Text to highlight terms in /// @param maxNumFragments The maximum number of fragments. /// @return highlighted text fragments (between 0 and maxNumFragments number of fragments) Collection getBestFragments(AnalyzerPtr analyzer, const String& fieldName, const String& text, int32_t maxNumFragments); /// Highlights chosen terms in a text, extracting the most relevant sections. The document text is /// analyzed in chunks to record hit statistics across the document. After accumulating stats, the /// fragments with the highest scores are returned as an array of strings in order of score (contiguous /// fragments are merged into one in their original order to improve readability) /// /// @param text Text to highlight terms in /// @param maxNumFragments The maximum number of fragments. /// @return highlighted Text fragments (between 0 and maxNumFragments number of fragments) Collection getBestFragments(TokenStreamPtr tokenStream, const String& text, int32_t maxNumFragments); /// Low level api to get the most relevant (formatted) sections of the document. /// This method has been made public to allow visibility of score information held in TextFragment objects. Collection getBestTextFragments(TokenStreamPtr tokenStream, const String& text, bool merge, int32_t maxNumFragments); /// Improves readability of a score-sorted list of TextFragments by merging any fragments that were /// contiguous in the original text into one larger fragment with the correct order. This will leave /// a "null" in the array entry for the lesser scored fragment. /// /// @param frag An array of document fragments in descending score void mergeContiguousFragments(Collection frag); /// Highlights terms in the text , extracting the most relevant sections and concatenating the chosen /// fragments with a separator (typically "..."). The document text is analyzed in chunks to record /// hit statistics across the document. After accumulating stats, the fragments with the highest scores /// are returned in order as "separator" delimited strings. /// /// @param text Text to highlight terms in /// @param maxNumFragments The maximum number of fragments. /// @param separator The separator used to intersperse the document fragments (typically "...") /// @return highlighted text String getBestFragments(TokenStreamPtr tokenStream, const String& text, int32_t maxNumFragments, const String& separator); int32_t getMaxDocCharsToAnalyze(); void setMaxDocCharsToAnalyze(int32_t maxDocCharsToAnalyze); FragmenterPtr getTextFragmenter(); void setTextFragmenter(FragmenterPtr fragmenter); /// @return Object used to score each text fragment HighlighterScorerPtr getFragmentScorer(); void setFragmentScorer(HighlighterScorerPtr scorer); EncoderPtr getEncoder(); void setEncoder(EncoderPtr encoder); }; class LPPCONTRIBAPI FragmentQueue : public PriorityQueue { public: FragmentQueue(int32_t size); virtual ~FragmentQueue(); LUCENE_CLASS(FragmentQueue); protected: virtual bool lessThan(const TextFragmentPtr& first, const TextFragmentPtr& second); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/HighlighterScorer.h000066400000000000000000000044361217574114600247610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HIGHLIGHTERSCORER_H #define HIGHLIGHTERSCORER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A HighlighterScorer is responsible for scoring a stream of tokens. These token scores /// can then be used to compute {@link TextFragment} scores. class LPPCONTRIBAPI HighlighterScorer { public: virtual ~HighlighterScorer(); LUCENE_INTERFACE(HighlighterScorer); public: /// Called to init the Scorer with a {@link TokenStream}. You can grab references to the /// attributes you are interested in here and access them from {@link #getTokenScore()}. /// /// @param tokenStream the {@link TokenStream} that will be scored. /// @return either a {@link TokenStream} that the Highlighter should continue using (eg /// if you read the tokenSream in this method) or null to continue using the same {@link /// TokenStream} that was passed in. virtual TokenStreamPtr init(TokenStreamPtr tokenStream); /// Called when a new fragment is started for consideration. /// /// @param newFragment the fragment that will be scored next virtual void startFragment(TextFragmentPtr newFragment); /// Called for each token in the current fragment. The {@link Highlighter} will increment /// the {@link TokenStream} passed to init on every call. /// /// @return a score which is passed to the {@link Highlighter} class to influence the /// mark-up of the text (this return value is NOT used to score the fragment) virtual double getTokenScore(); /// Called when the {@link Highlighter} has no more tokens for the current fragment - the /// Scorer returns the weighting it has derived for the most recent fragment, typically /// based on the results of {@link #getTokenScore()}. virtual double getFragmentScore(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/LuceneContrib.h000066400000000000000000000111501217574114600240700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENECONTRIB_H #define LUCENECONTRIB_H #include "Lucene.h" namespace Lucene { // analyzers DECLARE_SHARED_PTR(ArabicAnalyzer) DECLARE_SHARED_PTR(ArabicAnalyzerSavedStreams) DECLARE_SHARED_PTR(ArabicLetterTokenizer) DECLARE_SHARED_PTR(ArabicNormalizationFilter) DECLARE_SHARED_PTR(ArabicNormalizer) DECLARE_SHARED_PTR(ArabicStemFilter) DECLARE_SHARED_PTR(ArabicStemmer) DECLARE_SHARED_PTR(BrazilianAnalyzer) DECLARE_SHARED_PTR(BrazilianAnalyzerSavedStreams) DECLARE_SHARED_PTR(BrazilianStemFilter) DECLARE_SHARED_PTR(BrazilianStemmer) DECLARE_SHARED_PTR(CJKAnalyzer) DECLARE_SHARED_PTR(CJKAnalyzerSavedStreams) DECLARE_SHARED_PTR(CJKTokenizer) DECLARE_SHARED_PTR(ChineseAnalyzer) DECLARE_SHARED_PTR(ChineseAnalyzerSavedStreams) DECLARE_SHARED_PTR(ChineseFilter) DECLARE_SHARED_PTR(ChineseTokenizer) DECLARE_SHARED_PTR(CzechAnalyzer) DECLARE_SHARED_PTR(CzechAnalyzerSavedStreams) DECLARE_SHARED_PTR(DutchAnalyzer) DECLARE_SHARED_PTR(DutchAnalyzerSavedStreams) DECLARE_SHARED_PTR(DutchStemFilter) DECLARE_SHARED_PTR(DutchStemmer) DECLARE_SHARED_PTR(ElisionFilter) DECLARE_SHARED_PTR(FrenchAnalyzer) DECLARE_SHARED_PTR(FrenchAnalyzerSavedStreams) DECLARE_SHARED_PTR(FrenchStemFilter) DECLARE_SHARED_PTR(FrenchStemmer) DECLARE_SHARED_PTR(GermanAnalyzer) DECLARE_SHARED_PTR(GermanAnalyzerSavedStreams) DECLARE_SHARED_PTR(GermanStemFilter) DECLARE_SHARED_PTR(GermanStemmer) DECLARE_SHARED_PTR(GreekLowerCaseFilter) DECLARE_SHARED_PTR(GreekAnalyzer) DECLARE_SHARED_PTR(GreekAnalyzerSavedStreams) DECLARE_SHARED_PTR(PersianAnalyzer) DECLARE_SHARED_PTR(PersianAnalyzerSavedStreams) DECLARE_SHARED_PTR(PersianNormalizationFilter) DECLARE_SHARED_PTR(PersianNormalizer) DECLARE_SHARED_PTR(ReverseStringFilter) DECLARE_SHARED_PTR(RussianAnalyzer) DECLARE_SHARED_PTR(RussianAnalyzerSavedStreams) DECLARE_SHARED_PTR(RussianLetterTokenizer) DECLARE_SHARED_PTR(RussianLowerCaseFilter) DECLARE_SHARED_PTR(RussianStemFilter) DECLARE_SHARED_PTR(RussianStemmer) DECLARE_SHARED_PTR(SnowballFilter) DECLARE_SHARED_PTR(SnowballAnalyzer) DECLARE_SHARED_PTR(SnowballAnalyzerSavedStreams) // highlighter DECLARE_SHARED_PTR(DefaultEncoder) DECLARE_SHARED_PTR(Encoder) DECLARE_SHARED_PTR(FakeReader) DECLARE_SHARED_PTR(Formatter) DECLARE_SHARED_PTR(Fragmenter) DECLARE_SHARED_PTR(FragmentQueue) DECLARE_SHARED_PTR(GradientFormatter) DECLARE_SHARED_PTR(Highlighter) DECLARE_SHARED_PTR(HighlighterScorer) DECLARE_SHARED_PTR(MapWeightedSpanTerm) DECLARE_SHARED_PTR(NullFragmenter) DECLARE_SHARED_PTR(PositionCheckingMap) DECLARE_SHARED_PTR(PositionSpan) DECLARE_SHARED_PTR(QueryScorer) DECLARE_SHARED_PTR(QueryTermExtractor) DECLARE_SHARED_PTR(QueryTermScorer) DECLARE_SHARED_PTR(SimpleFragmenter) DECLARE_SHARED_PTR(SimpleHTMLEncoder) DECLARE_SHARED_PTR(SimpleHTMLFormatter) DECLARE_SHARED_PTR(SimpleSpanFragmenter) DECLARE_SHARED_PTR(SpanGradientFormatter) DECLARE_SHARED_PTR(StringBuffer) DECLARE_SHARED_PTR(TextFragment) DECLARE_SHARED_PTR(TokenGroup) DECLARE_SHARED_PTR(TokenSources) DECLARE_SHARED_PTR(WeightedSpanTerm) DECLARE_SHARED_PTR(WeightedSpanTermExtractor) DECLARE_SHARED_PTR(WeightedTerm) // memory DECLARE_SHARED_PTR(MemoryIndex) DECLARE_SHARED_PTR(MemoryIndexInfo) DECLARE_SHARED_PTR(MemoryIndexReader) typedef HashMap< String, WeightedSpanTermPtr > MapStringWeightedSpanTerm; typedef HashMap< String, WeightedTermPtr > MapStringWeightedTerm; typedef HashMap< String, SpanQueryPtr > MapStringSpanQuery; typedef HashMap< String, Collection > MapStringIntCollection; typedef HashMap< String, MemoryIndexInfoPtr > MapStringMemoryIndexInfo; typedef std::pair< String, Collection > PairStringIntCollection; typedef Collection< PairStringIntCollection > CollectionStringIntCollection; typedef std::pair< String, MemoryIndexInfoPtr > PairStringMemoryIndexInfo; typedef Collection< PairStringMemoryIndexInfo > CollectionStringMemoryIndexInfo; typedef HashSet< WeightedTermPtr, luceneHash, luceneEquals > SetWeightedTerm; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/MapWeightedSpanTerm.h000066400000000000000000000022001217574114600252000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAPWEIGHTEDSPANTERM_H #define MAPWEIGHTEDSPANTERM_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Utility class that encapsulates a StringWeightedSpanTerm map that can be overridden. class LPPCONTRIBAPI MapWeightedSpanTerm : public LuceneObject { public: MapWeightedSpanTerm(); virtual ~MapWeightedSpanTerm(); LUCENE_CLASS(MapWeightedSpanTerm); protected: MapStringWeightedSpanTerm map; public: virtual MapStringWeightedSpanTerm::iterator begin(); virtual MapStringWeightedSpanTerm::iterator end(); virtual void put(const String& key, WeightedSpanTermPtr val); virtual WeightedSpanTermPtr get(const String& key) const; virtual void clear(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/MemoryIndex.h000066400000000000000000000367711217574114600236140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MEMORYINDEX_H #define MEMORYINDEX_H #include "LuceneContrib.h" #include "IndexReader.h" #include "TermEnum.h" #include "Collector.h" #include "TermPositions.h" #include "TermPositionVector.h" namespace Lucene { /// High-performance single-document main memory Lucene fulltext search index. /// /// Overview /// /// This class is a replacement/substitute for a large subset of {@link RAMDirectory} functionality. /// It is designed to enable maximum efficiency for on-the-fly matchmaking combining structured and /// fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML message /// queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and /// distribution systems, application level routers, firewalls, classifiers, etc. Rather than /// targeting fulltext search of infrequent queries over huge persistent data archives (historic /// search), this class targets fulltext search of huge numbers of queries over comparatively small /// transient realtime data (prospective search). /// /// For example as in ///
    /// double score = search(const String& text, QueryPtr query)
    /// 
/// /// Each instance can hold at most one Lucene "document", with a document containing zero or more /// "fields", each field having a name and a fulltext value. The fulltext value is tokenized /// (split and transformed) into zero or more index terms (aka words) on addField(), according to /// the policy implemented by an Analyzer. For example, Lucene analyzers can split on whitespace, /// normalize to lower case for case insensitivity, ignore common terms with little discriminatory /// value such as "he", "in", "and" (stop words), reduce the terms to their natural linguistic root /// form such as "fishing" being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri /// (upon indexing and/or querying), etc. /// /// Note that a Lucene query selects on the field names and associated (indexed) tokenized terms, /// not on the original fulltext(s) - the latter are not stored but rather thrown away immediately /// after tokenization. /// /// For some interesting background information on search technology, see Bob Wyman's Prospective Search, /// Jim Gray's /// A Call to Arms - Custom subscriptions, and Tim Bray's On Search, the Series. /// /// /// Example Usage ///
    /// AnalyzerPtr analyzer = newLucene();
    /// MemoryIndexPtr index = newLucene();
    /// index->addField(L"content", L"Readings about Salmons and other select Alaska fishing Manuals", analyzer);
    /// index->addField(L"author", L"Tales of James", analyzer);
    /// QueryParserPtr parser = newLucene(L"content", analyzer);
    /// double score = index->search(parser->parse(L"+author:james +salmon~ +fish* manual~"));
    /// if (score > 0.0)
    /// {
    ///     // it's a match
    /// }
    /// else
    /// {
    ///     // no match found
    /// }
    /// 
/// /// /// Performance Notes /// /// Internally there's a new data structure geared towards efficient indexing and searching, plus /// the necessary support code to seamlessly plug into the Lucene framework. /// /// This class performs very well for very small texts (eg. 10 chars) as well as for large texts /// (eg. 10 MB) and everything in between. Typically, it is about 10-100 times faster than /// RAMDirectory. Note that RAMDirectory has particularly large efficiency overheads for small to /// medium sized texts, both in time and space. Indexing a field with N tokens takes O(N) in the /// best case, and O(N logN) in the worst case. Memory consumption is probably larger than for /// RAMDirectory. /// class LPPCONTRIBAPI MemoryIndex : public LuceneObject { public: /// Constructs an empty instance that can optionally store the start and end character offset /// of each token term in the text. This can be useful for highlighting of hit locations with /// the Lucene highlighter package. Private until the highlighter package matures, so that /// this can actually be meaningfully integrated. /// @param storeOffsets Whether or not to store the start and end character offset of each /// token term in the text. MemoryIndex(bool storeOffsets = false); virtual ~MemoryIndex(); LUCENE_CLASS(MemoryIndex); protected: /// info for each field MapStringMemoryIndexInfo fields; /// fields sorted ascending by fieldName; lazily computed on demand CollectionStringMemoryIndexInfo sortedFields; /// pos: positions[3 * i], startOffset: positions[3 * i + 1], endOffset: positions[3 * i + 2] int32_t stride; static const double docBoost; public: /// Convenience method; Tokenizes the given field text and adds the resulting terms to the /// index; Equivalent to adding an indexed non-keyword Lucene {@link Field} that is {@link /// Field::INDEX_ANALYZED tokenized}, {@link Field::STORE_NO not stored}, {@link /// Field::TERM_VECTOR_WITH_POSITIONS termVectorStored with positions} (or {@link /// Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS termVectorStored with positions and offsets}) /// @param fieldName A name to be associated with the text /// @param text The text to tokenize and index. /// @param analyzer The analyzer to use for tokenization void addField(const String& fieldName, const String& text, AnalyzerPtr analyzer); /// Iterates over the given token stream and adds the resulting terms to the index; /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored, Lucene {@link /// Field}. Finally closes the token stream. Note that untokenized keywords can be added /// with this method via {@link #keywordTokenStream(Collection)}, the Lucene contrib /// KeywordTokenizer or similar utilities. /// @param fieldName A name to be associated with the text. /// @param stream The token stream to retrieve tokens from. /// @param boost The boost factor for hits for this field. /// @see Field#setBoost(double) void addField(const String& fieldName, TokenStreamPtr stream, double boost = 1.0); /// Creates and returns a searcher that can be used to execute arbitrary Lucene queries /// and to collect the resulting query results as hits. /// @return a searcher IndexSearcherPtr createSearcher(); /// Convenience method that efficiently returns the relevance score by matching this index /// against the given Lucene query expression. /// @param query An arbitrary Lucene query to run against this index /// @return the relevance score of the matchmaking; A number in the range [0.0 .. 1.0], /// with 0.0 indicating no match. The higher the number the better the match. double search(QueryPtr query); protected: int32_t numPositions(Collection positions); /// sorts into ascending order (on demand), reusing memory along the way void sortFields(); friend class MemoryIndexReader; friend class MemoryIndexInfo; friend class MemoryIndexTermEnum; friend class MemoryIndexTermPositions; friend class MemoryIndexTermPositionVector; }; /// Index data structure for a field; Contains the tokenized term texts and their positions. class LPPCONTRIBAPI MemoryIndexInfo : public LuceneObject { public: MemoryIndexInfo(MapStringIntCollection terms, int32_t numTokens, int32_t numOverlapTokens, double boost); virtual ~MemoryIndexInfo(); LUCENE_CLASS(MemoryIndexInfo); protected: /// Term strings and their positions for this field MapStringIntCollection terms; /// Terms sorted ascending by term text; computed on demand CollectionStringIntCollection sortedTerms; /// Number of added tokens for this field int32_t numTokens; /// Number of overlapping tokens for this field int32_t numOverlapTokens; /// Boost factor for hits for this field double boost; /// Term for this field's fieldName, lazily computed on demand TermPtr _template; public: /// Sorts hashed terms into ascending order, reusing memory along the way. Note that /// sorting is lazily delayed until required (often it's not required at all). void sortTerms(); /// Note that the frequency can be calculated as numPosition(getPositions(x)) Collection getPositions(const String& term); /// Note that the frequency can be calculated as numPosition(getPositions(x)) Collection getPositions(int32_t pos); double getBoost(); friend class MemoryIndexReader; friend class MemoryIndexTermEnum; friend class MemoryIndexTermPositions; friend class MemoryIndexTermPositionVector; }; /// Search support for Lucene framework integration; implements all methods required by the /// Lucene IndexReader contracts. class LPPCONTRIBAPI MemoryIndexReader : public IndexReader { public: MemoryIndexReader(MemoryIndexPtr memoryIndex); virtual ~MemoryIndexReader(); LUCENE_CLASS(MemoryIndexReader); public: static TermPtr MATCH_ALL_TERM(); protected: MemoryIndexPtr memoryIndex; SearcherWeakPtr _searcher; // needed to find searcher.getSimilarity() /// cache norms to avoid repeated expensive calculations ByteArray cachedNorms; String cachedFieldName; SimilarityPtr cachedSimilarity; protected: MemoryIndexInfoPtr getInfo(const String& fieldName); MemoryIndexInfoPtr getInfo(int32_t pos); SimilarityPtr getSimilarity(); void setSearcher(SearcherPtr searcher); public: virtual int32_t docFreq(TermPtr t); virtual TermEnumPtr terms(); virtual TermEnumPtr terms(TermPtr t); virtual TermPositionsPtr termPositions(); virtual TermDocsPtr termDocs(); virtual Collection getTermFreqVectors(int32_t docNumber); virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); virtual ByteArray norms(const String& field); virtual void norms(const String& field, ByteArray norms, int32_t offset); virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); virtual int32_t numDocs(); virtual int32_t maxDoc(); virtual DocumentPtr document(int32_t n); virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); virtual bool isDeleted(int32_t n); virtual bool hasDeletions(); virtual void doDelete(int32_t docNum); virtual void doUndeleteAll(); virtual void doCommit(MapStringString commitUserData); virtual void doClose(); virtual HashSet getFieldNames(FieldOption fieldOption); friend class MemoryIndex; friend class MemoryIndexTermEnum; friend class MemoryIndexTermPositions; friend class MemoryIndexTermPositionVector; }; class LPPCONTRIBAPI MemoryIndexTermEnum : public TermEnum { public: MemoryIndexTermEnum(MemoryIndexReaderPtr reader, int32_t ix, int32_t jx); virtual ~MemoryIndexTermEnum(); LUCENE_CLASS(MemoryIndexTermEnum); protected: MemoryIndexReaderWeakPtr _reader; int32_t i; int32_t j; public: virtual bool next(); virtual TermPtr term(); virtual int32_t docFreq(); virtual void close(); protected: TermPtr createTerm(MemoryIndexInfoPtr info, int32_t pos, const String& text); }; class LPPCONTRIBAPI MemoryIndexCollector : public Collector { public: MemoryIndexCollector(Collection scores); virtual ~MemoryIndexCollector(); LUCENE_CLASS(MemoryIndexCollector); protected: Collection scores; ScorerPtr scorer; public: virtual void collect(int32_t doc); virtual void setScorer(ScorerPtr scorer); virtual bool acceptsDocsOutOfOrder(); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); }; class LPPCONTRIBAPI MemoryIndexTermPositions : public TermPositions, public LuceneObject { public: MemoryIndexTermPositions(MemoryIndexReaderPtr reader); virtual ~MemoryIndexTermPositions(); LUCENE_CLASS(MemoryIndexTermPositions); protected: MemoryIndexReaderWeakPtr _reader; bool hasNext; int32_t cursor; Collection current; TermPtr term; public: virtual void seek(TermPtr term); virtual void seek(TermEnumPtr termEnum); virtual int32_t doc(); virtual int32_t freq(); virtual bool next(); virtual int32_t read(Collection docs, Collection freqs); virtual bool skipTo(int32_t target); virtual void close(); virtual int32_t nextPosition(); virtual int32_t getPayloadLength(); virtual ByteArray getPayload(ByteArray data, int32_t offset); virtual bool isPayloadAvailable(); }; class MemoryIndexTermPositionVector : public TermPositionVector, public LuceneObject { public: MemoryIndexTermPositionVector(MemoryIndexReaderPtr reader, MemoryIndexInfoPtr info, const String& fieldName); virtual ~MemoryIndexTermPositionVector(); LUCENE_CLASS(MemoryIndexTermPositionVector); protected: MemoryIndexReaderWeakPtr _reader; CollectionStringIntCollection sortedTerms; String fieldName; public: virtual String getField(); virtual int32_t size(); virtual Collection getTerms(); virtual Collection getTermFrequencies(); virtual int32_t indexOf(const String& term); virtual Collection indexesOf(Collection terms, int32_t start, int32_t length); virtual Collection getTermPositions(int32_t index); virtual Collection getOffsets(int32_t index); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/NullFragmenter.h000066400000000000000000000016321217574114600242650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NULLFRAGMENTER_H #define NULLFRAGMENTER_H #include "Fragmenter.h" namespace Lucene { /// {@link Fragmenter} implementation which does not fragment the text. This is useful for /// highlighting the entire content of a document or field. class LPPCONTRIBAPI NullFragmenter : public Fragmenter, public LuceneObject { public: virtual ~NullFragmenter(); LUCENE_CLASS(NullFragmenter); public: virtual void start(const String& originalText, TokenStreamPtr tokenStream); virtual bool isNewFragment(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/PersianAnalyzer.h000066400000000000000000000060511217574114600244470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PERSIANANALYZER_H #define PERSIANANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Persian. /// /// This Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around /// zero-width non-joiner in addition to whitespace. Some persian-specific variant /// forms (such as farsi yeh and keheh) are standardized. "Stemming" is accomplished /// via stopwords. class LPPCONTRIBAPI PersianAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. PersianAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. PersianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~PersianAnalyzer(); LUCENE_CLASS(PersianAnalyzer); public: /// Default Persian stopwords in UTF-8 format. /// /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html /// The stopword list is BSD-Licensed. static const uint8_t DEFAULT_STOPWORD_FILE[]; protected: /// Contains the stopwords used with the StopFilter. HashSet stoptable; LuceneVersion::Version matchVersion; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} /// and Persian Stop words. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} /// and Persian Stop words. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI PersianAnalyzerSavedStreams : public LuceneObject { public: virtual ~PersianAnalyzerSavedStreams(); LUCENE_CLASS(PersianAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/PersianNormalizationFilter.h000066400000000000000000000017571217574114600266660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PERSIANNORMALIZATIONFILTER_H #define PERSIANNORMALIZATIONFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the orthography. class LPPCONTRIBAPI PersianNormalizationFilter : public TokenFilter { public: PersianNormalizationFilter(TokenStreamPtr input); virtual ~PersianNormalizationFilter(); LUCENE_CLASS(PersianNormalizationFilter); protected: PersianNormalizerPtr normalizer; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/PersianNormalizer.h000066400000000000000000000037211217574114600250050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PERSIANNORMALIZER_H #define PERSIANNORMALIZER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Normalizer for Persian. /// /// Normalization is done in-place for efficiency, operating on a termbuffer. /// /// Normalization is defined as: ///
    ///
  • Normalization of various heh + hamza forms and heh goal to heh. ///
  • Normalization of farsi yeh and yeh barree to arabic yeh. ///
  • Normalization of persian keheh to arabic kaf. ///
class LPPCONTRIBAPI PersianNormalizer : public LuceneObject { public: virtual ~PersianNormalizer(); LUCENE_CLASS(PersianNormalizer); public: static const wchar_t YEH; static const wchar_t FARSI_YEH; static const wchar_t YEH_BARREE; static const wchar_t KEHEH; static const wchar_t KAF; static const wchar_t HAMZA_ABOVE; static const wchar_t HEH_YEH; static const wchar_t HEH_GOAL; static const wchar_t HEH; public: /// Normalize an input buffer of Persian text /// @param s input buffer /// @param len length of input buffer /// @return length of input buffer after normalization int32_t normalize(wchar_t* s, int32_t len); /// Delete a character in-place /// @param s Input Buffer /// @param pos Position of character to delete /// @param len length of input buffer /// @return length of input buffer after deletion int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/QueryScorer.h000066400000000000000000000103501217574114600236200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYSCORER_H #define QUERYSCORER_H #include "LuceneContrib.h" #include "HighlighterScorer.h" namespace Lucene { /// {@link HighlighterScorer} implementation which scores text fragments by the number of unique query terms found. /// This class converts appropriate {@link Query}s to {@link SpanQuery}s and attempts to score only /// those terms that participated in generating the 'hit' on the document. class LPPCONTRIBAPI QueryScorer : public HighlighterScorer, public LuceneObject { public: /// @param query Query to use for highlighting QueryScorer(QueryPtr query); /// @param query Query to use for highlighting /// @param field Field to highlight - pass empty string to ignore fields QueryScorer(QueryPtr query, const String& field); /// @param query Query to use for highlighting /// @param reader {@link IndexReader} to use for quasi tf/idf scoring /// @param field Field to highlight - pass empty string to ignore fields QueryScorer(QueryPtr query, IndexReaderPtr reader, const String& field); /// @param query Query to use for highlighting /// @param reader {@link IndexReader} to use for quasi tf/idf scoring /// @param field Field to highlight - pass empty string to ignore fields /// @param defaultField QueryScorer(QueryPtr query, IndexReaderPtr reader, const String& field, const String& defaultField); /// @param query Query to use for highlighting /// @param field Field to highlight - pass empty string to ignore fields /// @param defaultField QueryScorer(QueryPtr query, const String& field, const String& defaultField); /// @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s QueryScorer(Collection weightedTerms); virtual ~QueryScorer(); LUCENE_CLASS(QueryScorer); protected: double totalScore; HashSet foundTerms; MapWeightedSpanTermPtr fieldWeightedSpanTerms; double maxTermWeight; int32_t position; String defaultField; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncAtt; bool expandMultiTermQuery; QueryPtr query; String field; IndexReaderPtr reader; bool skipInitExtractor; bool wrapToCaching; protected: void init(QueryPtr query, const String& field, IndexReaderPtr reader, bool expandMultiTermQuery); TokenStreamPtr initExtractor(TokenStreamPtr tokenStream); public: virtual double getFragmentScore(); /// @return The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale). virtual double getMaxTermWeight(); virtual double getTokenScore(); virtual TokenStreamPtr init(TokenStreamPtr tokenStream); virtual WeightedSpanTermPtr getWeightedSpanTerm(const String& token); virtual void startFragment(TextFragmentPtr newFragment); /// @return true if multi-term queries should be expanded virtual bool isExpandMultiTermQuery(); /// Controls whether or not multi-term queries are expanded against a {@link MemoryIndex} {@link IndexReader}. /// @param expandMultiTermQuery true if multi-term queries should be expanded virtual void setExpandMultiTermQuery(bool expandMultiTermQuery); /// By default, {@link TokenStream}s that are not of the type {@link CachingTokenFilter} are wrapped in a {@link /// CachingTokenFilter} to ensure an efficient reset - if you are already using a different caching {@link /// TokenStream} impl and you don't want it to be wrapped, set this to false. virtual void setWrapIfNotCachingTokenFilter(bool wrap); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/QueryTermExtractor.h000066400000000000000000000070341217574114600251730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYTERMEXTRACTOR_H #define QUERYTERMEXTRACTOR_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Utility class used to extract the terms used in a query, plus any weights. This class will not /// find terms for MultiTermQuery, TermRangeQuery and PrefixQuery classes so the caller must pass a /// rewritten query (see Query.rewrite) to obtain a list of expanded terms. class LPPCONTRIBAPI QueryTermExtractor : public LuceneObject { public: virtual ~QueryTermExtractor(); LUCENE_CLASS(QueryTermExtractor); public: /// Extracts all terms texts of a given Query into an array of WeightedTerms /// /// @param query Query to extract term texts from. /// @return an array of the terms used in a query, plus their weights. static Collection getTerms(QueryPtr query); /// Extracts all terms texts of a given Query into an array of WeightedTerms /// /// @param query Query to extract term texts from. /// @param reader used to compute IDF which can be used to /// a) score selected fragments better /// b) use graded highlights eg changing intensity of font color /// @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based. /// @return an array of the terms used in a query, plus their weights. static Collection getIdfWeightedTerms(QueryPtr query, IndexReaderPtr reader, const String& fieldName); /// Extracts all terms texts of a given Query into an array of WeightedTerms /// /// @param query Query to extract term texts from. /// @param prohibited true to extract "prohibited" terms, too. /// @param fieldName The fieldName used to filter query terms. /// @return an array of the terms used in a query, plus their weights. static Collection getTerms(QueryPtr query, bool prohibited, const String& fieldName); /// Extracts all terms texts of a given Query into an array of WeightedTerms /// /// @param query Query to extract term texts from. /// @param prohibited true to extract "prohibited" terms, too. /// @return an array of the terms used in a query, plus their weights. static Collection getTerms(QueryPtr query, bool prohibited); static void getTerms(QueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName); protected: /// extractTerms is currently the only query-independent means of introspecting queries but it only reveals /// a list of terms for that query - not the boosts each individual term in that query may or may not have. /// "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held in each child /// element. static void getTermsFromBooleanQuery(BooleanQueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName); static void getTermsFromFilteredQuery(FilteredQueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/QueryTermScorer.h000066400000000000000000000055601217574114600244570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYTERMSCORER_H #define QUERYTERMSCORER_H #include "LuceneContrib.h" #include "HighlighterScorer.h" namespace Lucene { /// {@link HighlighterScorer} implementation which scores text fragments by the number of unique query terms found. /// This class uses the {@link QueryTermExtractor} class to process determine the query terms and their /// boosts to be used. class LPPCONTRIBAPI QueryTermScorer : public HighlighterScorer, public LuceneObject { public: /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class /// and the searcher) QueryTermScorer(QueryPtr query); /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class /// and the searcher) /// @param fieldName the Field name which is used to match Query terms QueryTermScorer(QueryPtr query, const String& fieldName); /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class /// and the searcher) /// @param reader used to compute IDF which can be used to /// a) score selected fragments better /// b) use graded highlights eg set font color intensity /// @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based QueryTermScorer(QueryPtr query, IndexReaderPtr reader, const String& fieldName); /// @param weightedTerms an array of pre-created {@link WeightedTerm}s QueryTermScorer(Collection weightedTerms); virtual ~QueryTermScorer(); LUCENE_CLASS(QueryTermScorer); public: TextFragmentPtr currentTextFragment; HashSet uniqueTermsInFragment; double totalScore; double maxTermWeight; protected: MapStringWeightedTerm termsToFind; TermAttributePtr termAtt; protected: void ConstructQueryTermScorer(Collection weightedTerms); public: virtual TokenStreamPtr init(TokenStreamPtr tokenStream); virtual void startFragment(TextFragmentPtr newFragment); virtual double getTokenScore(); virtual double getFragmentScore(); virtual void allFragmentsProcessed(); /// @return The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale. virtual double getMaxTermWeight(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/ReverseStringFilter.h000066400000000000000000000042371217574114600253140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REVERSESTRINGFILTER_H #define REVERSESTRINGFILTER_H #include "TokenFilter.h" namespace Lucene { /// Reverse token string, for example "country" => "yrtnuoc". /// /// If marker is supplied, then tokens will be also prepended by that character. For example, with a /// marker of \u0001, "country" => "\u0001yrtnuoc". This is useful when implementing efficient /// leading wildcards search. class LPPCONTRIBAPI ReverseStringFilter : public TokenFilter { public: /// Create a new ReverseStringFilter that reverses all tokens in the supplied {@link TokenStream}. /// /// The reversed tokens will not be marked. ReverseStringFilter(TokenStreamPtr input); /// Create a new ReverseStringFilter that reverses and marks all tokens in the supplied {@link /// TokenStream}. /// /// The reversed tokens will be prepended (marked) by the marker character. ReverseStringFilter(TokenStreamPtr input, wchar_t marker); virtual ~ReverseStringFilter(); LUCENE_CLASS(ReverseStringFilter); protected: TermAttributePtr termAtt; wchar_t marker; static const wchar_t NOMARKER; public: /// Example marker character: U+0001 (START OF HEADING) static const wchar_t START_OF_HEADING_MARKER; /// Example marker character: U+001F (INFORMATION SEPARATOR ONE) static const wchar_t INFORMATION_SEPARATOR_MARKER; /// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) static const wchar_t PUA_EC00_MARKER; /// Example marker character: U+200F (RIGHT-TO-LEFT MARK) static const wchar_t RTL_DIRECTION_MARKER; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/RussianAnalyzer.h000066400000000000000000000053011217574114600244670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANANALYZER_H #define RUSSIANANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Russian language. /// /// Supports an external list of stopwords (words that will not be indexed at all). /// A default set of stopwords is used unless an alternative list is specified. class LPPCONTRIBAPI RussianAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. RussianAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. RussianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~RussianAnalyzer(); LUCENE_CLASS(RussianAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stopSet; LuceneVersion::Version matchVersion; /// List of typical Russian stopwords. static const uint8_t DEFAULT_STOPWORD_FILE[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link RussianLetterTokenizer} filtered with /// {@link RussianLowerCaseFilter}, {@link StopFilter} and {@link RussianStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link RussianLetterTokenizer} filtered with /// {@link RussianLowerCaseFilter}, {@link StopFilter} and {@link RussianStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI RussianAnalyzerSavedStreams : public LuceneObject { public: virtual ~RussianAnalyzerSavedStreams(); LUCENE_CLASS(RussianAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/RussianLetterTokenizer.h000066400000000000000000000025751217574114600260460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANLETTERTOKENIZER_H #define RUSSIANLETTERTOKENIZER_H #include "CharTokenizer.h" namespace Lucene { /// A RussianLetterTokenizer is a {@link Tokenizer} that extends {@link LetterTokenizer} by also /// allowing the basic Latin digits 0-9. class LPPCONTRIBAPI RussianLetterTokenizer : public CharTokenizer { public: /// Construct a new RussianLetterTokenizer. RussianLetterTokenizer(ReaderPtr input); /// Construct a new RussianLetterTokenizer using a given {@link AttributeSource}. RussianLetterTokenizer(AttributeSourcePtr source, ReaderPtr input); /// Construct a new RussianLetterTokenizer using a given {@link AttributeFactory}. RussianLetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input); virtual ~RussianLetterTokenizer(); LUCENE_CLASS(RussianLetterTokenizer); public: /// Collects only characters which satisfy UnicodeUtil::isAlpha(c). virtual bool isTokenChar(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/RussianLowerCaseFilter.h000066400000000000000000000016011217574114600257330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANLOWERCASEFILTER_H #define RUSSIANLOWERCASEFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// Normalizes token text to lower case. class LPPCONTRIBAPI RussianLowerCaseFilter : public TokenFilter { public: RussianLowerCaseFilter(TokenStreamPtr input); virtual ~RussianLowerCaseFilter(); LUCENE_CLASS(RussianLowerCaseFilter); protected: TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/RussianStemFilter.h000066400000000000000000000025341217574114600247650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANSTEMFILTER_H #define RUSSIANSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that stems Russian words. /// /// The implementation was inspired by GermanStemFilter. /// /// The input should be filtered by {@link LowerCaseFilter} before passing it to RussianStemFilter, /// because RussianStemFilter only works with lowercase characters. class LPPCONTRIBAPI RussianStemFilter : public TokenFilter { public: RussianStemFilter(TokenStreamPtr input); virtual ~RussianStemFilter(); LUCENE_CLASS(RussianStemFilter); protected: /// {@link RussianStemmer} in use by this filter. RussianStemmerPtr stemmer; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Set a alternative/custom {@link RussianStemmer} for this filter. void setStemmer(RussianStemmerPtr stemmer); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/RussianStemmer.h000066400000000000000000000106631217574114600243250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANSTEMMER_H #define RUSSIANSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Russian stemming algorithm implementation (see http://snowball.sourceforge.net for /// detailed description). class LPPCONTRIBAPI RussianStemmer : public LuceneObject { public: RussianStemmer(); virtual ~RussianStemmer(); LUCENE_CLASS(RussianStemmer); protected: /// positions of RV, R1 and R2 respectively int32_t RV; int32_t R1; int32_t R2; static const wchar_t A; static const wchar_t V; static const wchar_t G; static const wchar_t E; static const wchar_t I; static const wchar_t I_; static const wchar_t L; static const wchar_t M; static const wchar_t N; static const wchar_t O; static const wchar_t S; static const wchar_t T; static const wchar_t U; static const wchar_t X; static const wchar_t SH; static const wchar_t SHCH; static const wchar_t Y; static const wchar_t SOFT; static const wchar_t AE; static const wchar_t IU; static const wchar_t IA; /// stem definitions static const wchar_t vowels[]; Collection perfectiveGerundEndings1(); Collection perfectiveGerund1Predessors(); Collection perfectiveGerundEndings2(); Collection adjectiveEndings(); Collection participleEndings1(); Collection participleEndings2(); Collection participle1Predessors(); Collection reflexiveEndings(); Collection verbEndings1(); Collection verbEndings2(); Collection verb1Predessors(); Collection nounEndings(); Collection superlativeEndings(); Collection derivationalEndings(); Collection doubleN(); public: /// Finds the stem for given Russian word. String stem(const String& input); /// Static method for stemming. static String stemWord(const String& word); protected: /// Adjectival ending is an adjective ending, optionally preceded by participle ending. bool adjectival(String& stemmingZone); /// Derivational endings bool derivational(String& stemmingZone); /// Finds ending among given ending class and returns the length of ending found(0, if not found). int32_t findEnding(String& stemmingZone, int32_t startIndex, Collection theEndingClass); int32_t findEnding(String& stemmingZone, Collection theEndingClass); /// Finds the ending among the given class of endings and removes it from stemming zone. bool findAndRemoveEnding(String& stemmingZone, Collection theEndingClass); /// Finds the ending among the given class of endings, then checks if this ending was /// preceded by any of given predecessors, and if so, removes it from stemming zone. bool findAndRemoveEnding(String& stemmingZone, Collection theEndingClass, Collection thePredessors); /// Marks positions of RV, R1 and R2 in a given word. void markPositions(const String& word); /// Checks if character is a vowel. bool isVowel(wchar_t letter); /// Noun endings. bool noun(String& stemmingZone); /// Perfective gerund endings. bool perfectiveGerund(String& stemmingZone); /// Reflexive endings. bool reflexive(String& stemmingZone); bool removeI(String& stemmingZone); bool removeSoft(String& stemmingZone); /// Superlative endings. bool superlative(String& stemmingZone); /// Undoubles N. bool undoubleN(String& stemmingZone); /// Verb endings. bool verb(String& stemmingZone); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/SimpleFragmenter.h000066400000000000000000000026021217574114600246020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEFRAGMENTER_H #define SIMPLEFRAGMENTER_H #include "Fragmenter.h" namespace Lucene { /// {@link Fragmenter} implementation which breaks text up into same-size fragments with /// no concerns over spotting sentence boundaries. class LPPCONTRIBAPI SimpleFragmenter : public Fragmenter, public LuceneObject { public: SimpleFragmenter(); SimpleFragmenter(int32_t fragmentSize); virtual ~SimpleFragmenter(); LUCENE_CLASS(SimpleFragmenter); protected: static const int32_t DEFAULT_FRAGMENT_SIZE; int32_t currentNumFrags; int32_t fragmentSize; OffsetAttributePtr offsetAtt; public: virtual void start(const String& originalText, TokenStreamPtr tokenStream); virtual bool isNewFragment(); /// @return size in number of characters of each fragment int32_t getFragmentSize(); /// @param size size in characters of each fragment void setFragmentSize(int32_t size); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/SimpleHTMLEncoder.h000066400000000000000000000015751217574114600245640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEHTMLENCODER_H #define SIMPLEHTMLENCODER_H #include "Encoder.h" namespace Lucene { /// Simple {@link Encoder} implementation to escape text for HTML output. class LPPCONTRIBAPI SimpleHTMLEncoder : public Encoder, public LuceneObject { public: virtual ~SimpleHTMLEncoder(); LUCENE_CLASS(SimpleHTMLEncoder); public: virtual String encodeText(const String& originalText); /// Encode string into HTML static String htmlEncode(const String& plainText); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/SimpleHTMLFormatter.h000066400000000000000000000023141217574114600251400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEHTMLFORMATTER_H #define SIMPLEHTMLFORMATTER_H #include "Formatter.h" namespace Lucene { /// Simple {@link Formatter} implementation to highlight terms with a pre and post tag. class LPPCONTRIBAPI SimpleHTMLFormatter : public Formatter, public LuceneObject { public: /// Default constructor uses HTML: <B> tags to markup terms. SimpleHTMLFormatter(); SimpleHTMLFormatter(const String& preTag, const String& postTag); virtual ~SimpleHTMLFormatter(); LUCENE_CLASS(SimpleHTMLFormatter); protected: static const String DEFAULT_PRE_TAG; static const String DEFAULT_POST_TAG; String preTag; String postTag; public: virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/SimpleSpanFragmenter.h000066400000000000000000000032541217574114600254300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLESPANFRAGMENTER_H #define SIMPLESPANFRAGMENTER_H #include "Fragmenter.h" namespace Lucene { /// {@link Fragmenter} implementation which breaks text up into same-size fragments but /// does not split up {@link Spans}. This is a simple sample class. class LPPCONTRIBAPI SimpleSpanFragmenter : public Fragmenter, public LuceneObject { public: /// @param queryScorer QueryScorer that was used to score hits SimpleSpanFragmenter(QueryScorerPtr queryScorer); /// @param queryScorer QueryScorer that was used to score hits /// @param fragmentSize size in bytes of each fragment SimpleSpanFragmenter(QueryScorerPtr queryScorer, int32_t fragmentSize); virtual ~SimpleSpanFragmenter(); LUCENE_CLASS(SimpleSpanFragmenter); protected: static const int32_t DEFAULT_FRAGMENT_SIZE; int32_t fragmentSize; int32_t currentNumFrags; int32_t position; QueryScorerPtr queryScorer; int32_t waitForPos; int32_t textSize; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncAtt; OffsetAttributePtr offsetAtt; public: virtual bool isNewFragment(); virtual void start(const String& originalText, TokenStreamPtr tokenStream); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/SnowballAnalyzer.h000066400000000000000000000044131217574114600246270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SNOWBALLANALYZER_H #define SNOWBALLANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} /// and {@link SnowballFilter}. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI SnowballAnalyzer : public Analyzer { public: /// Builds the named analyzer with no stop words. SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name); /// Builds an analyzer with the given stop words. SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name, HashSet stopwords); virtual ~SnowballAnalyzer(); LUCENE_CLASS(SnowballAnalyzer); protected: /// Contains the stopwords used with the StopFilter. HashSet stopSet; String name; LuceneVersion::Version matchVersion; public: /// Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, /// a {@link StopFilter} and a {@link SnowballFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); /// Returns a (possibly reused) {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link /// LowerCaseFilter}, a {@link StopFilter} and a {@link SnowballFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); }; class LPPCONTRIBAPI SnowballAnalyzerSavedStreams : public LuceneObject { public: virtual ~SnowballAnalyzerSavedStreams(); LUCENE_CLASS(SnowballAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/SnowballFilter.h000066400000000000000000000017171217574114600242730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SNOWBALLFILTER_H #define SNOWBALLFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" struct sb_stemmer; namespace Lucene { /// A filter that stems words using a Snowball-generated stemmer. class LPPCONTRIBAPI SnowballFilter : public TokenFilter { public: SnowballFilter(TokenStreamPtr input, const String& name); virtual ~SnowballFilter(); LUCENE_CLASS(SnowballFilter); protected: struct sb_stemmer* stemmer; UTF8ResultPtr utf8Result; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/SpanGradientFormatter.h000066400000000000000000000022771217574114600256110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANGRADIENTFORMATTER_H #define SPANGRADIENTFORMATTER_H #include "GradientFormatter.h" namespace Lucene { /// Formats text with different color intensity depending on the score of the term using the /// span tag. GradientFormatter uses a bgcolor argument to the font tag which doesn't work /// in Mozilla, thus this class. /// @see GradientFormatter class LPPCONTRIBAPI SpanGradientFormatter : public GradientFormatter { public: SpanGradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor); virtual ~SpanGradientFormatter(); LUCENE_CLASS(SpanGradientFormatter); public: virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/TextFragment.h000066400000000000000000000036151217574114600237530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TEXTFRAGMENT_H #define TEXTFRAGMENT_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Low-level class used to record information about a section of a document with a score. class LPPCONTRIBAPI TextFragment : public LuceneObject { public: TextFragment(StringBufferPtr markedUpText, int32_t textStartPos, int32_t fragNum); virtual ~TextFragment(); LUCENE_CLASS(TextFragment); public: StringBufferPtr markedUpText; int32_t fragNum; int32_t textStartPos; int32_t textEndPos; double score; public: void setScore(double score); double getScore(); /// @param frag2 Fragment to be merged into this one void merge(TextFragmentPtr frag2); /// @return true if this fragment follows the one passed bool follows(TextFragmentPtr fragment); /// @return the fragment sequence number int32_t getFragNum(); /// Returns the marked-up text for this text fragment virtual String toString(); }; /// Utility class to store a string buffer that contains text fragment class LPPCONTRIBAPI StringBuffer : public LuceneObject { public: virtual ~StringBuffer(); LUCENE_CLASS(StringBuffer); protected: StringStream buffer; public: virtual String toString(); virtual int32_t length(); virtual void append(const String& str); virtual void clear(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/TokenGroup.h000066400000000000000000000036661217574114600234460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENGROUP_H #define TOKENGROUP_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// One, or several overlapping tokens, along with the score(s) and the scope of the original text class LPPCONTRIBAPI TokenGroup : public LuceneObject { public: TokenGroup(TokenStreamPtr tokenStream); virtual ~TokenGroup(); LUCENE_CLASS(TokenGroup); protected: static const int32_t MAX_NUM_TOKENS_PER_GROUP; OffsetAttributePtr offsetAtt; TermAttributePtr termAtt; public: Collection tokens; Collection scores; int32_t numTokens; int32_t startOffset; int32_t endOffset; double tot; int32_t matchStartOffset; int32_t matchEndOffset; public: void addToken(double score); bool isDistinct(); void clear(); /// @param index a value between 0 and numTokens -1 /// @return the "n"th token TokenPtr getToken(int32_t index); /// @param index a value between 0 and numTokens -1 /// @return the "n"th score double getScore(int32_t index); /// @return the end position in the original text int32_t getEndOffset(); /// @return the number of tokens in this group int32_t getNumTokens(); /// @return the start position in the original text int32_t getStartOffset(); /// @return all tokens' scores summed up double getTotalScore(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/TokenSources.h000066400000000000000000000115631217574114600237700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENSOURCES_H #define TOKENSOURCES_H #include "LuceneContrib.h" #include "TokenStream.h" namespace Lucene { /// Hides implementation issues associated with obtaining a TokenStream for use with the highlighter - can obtain /// from TermFreqVectors with offsets and (optionally) positions or from Analyzer class re-parsing the stored content. class LPPCONTRIBAPI TokenSources : public LuceneObject { public: virtual ~TokenSources(); LUCENE_CLASS(TokenSources); public: /// A convenience method that tries to first get a TermPositionVector for the specified docId, then, falls back to /// using the passed in {@link Document} to retrieve the TokenStream. This is useful when you already have the /// document, but would prefer to use the vector first. /// @param reader The {@link IndexReader} to use to try and get the vector from. /// @param docId The docId to retrieve. /// @param field The field to retrieve on the document. /// @param doc The document to fall back on. /// @param analyzer The analyzer to use for creating the TokenStream if the vector doesn't exist. /// @return The {@link TokenStream} for the {@link Fieldable} on the {@link Document} static TokenStreamPtr getAnyTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, DocumentPtr doc, AnalyzerPtr analyzer); /// A convenience method that tries a number of approaches to getting a token stream. The cost of finding there /// are no termVectors in the index is minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?) /// approach to coding is probably acceptable static TokenStreamPtr getAnyTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, AnalyzerPtr analyzer); static TokenStreamPtr getTokenStream(TermPositionVectorPtr tpv); /// Low level api. /// Returns a token stream or null if no offset info available in index. This can be used to feed the highlighter /// with a pre-parsed token stream. /// /// In my tests the speeds to recreate 1000 token streams using this method are: /// - with TermVector offset only data stored - 420 milliseconds /// - with TermVector offset AND position data stored - 271 milliseconds /// (nb timings for TermVector with position data are based on a tokenizer with contiguous positions - no overlaps /// or gaps) The cost of not using TermPositionVector to store pre-parsed content and using an analyzer to re-parse /// the original content: /// - reanalyzing the original content - 980 milliseconds /// /// The re-analyze timings will typically vary depending on - /// 1) The complexity of the analyzer code (timings above were using a stemmer/lowercaser/stopword combo) /// 2) The number of other fields (Lucene reads ALL fields off the disk when accessing just one document field - /// can cost dear!) /// 3) Use of compression on field storage - could be faster due to compression (less disk IO) or slower (more CPU /// burn) depending on the content. /// /// @param tpv /// @param tokenPositionsGuaranteedContiguous true if the token position numbers have no overlaps or gaps. If looking /// to eek out the last drops of performance, set to true. If in doubt, set to false. static TokenStreamPtr getTokenStream(TermPositionVectorPtr tpv, bool tokenPositionsGuaranteedContiguous); static TokenStreamPtr getTokenStream(IndexReaderPtr reader, int32_t docId, const String& field); static TokenStreamPtr getTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, AnalyzerPtr analyzer); static TokenStreamPtr getTokenStream(DocumentPtr doc, const String& field, AnalyzerPtr analyzer); static TokenStreamPtr getTokenStream(const String& field, const String& contents, AnalyzerPtr analyzer); }; /// an object used to iterate across an array of tokens class LPPCONTRIBAPI StoredTokenStream : public TokenStream { public: StoredTokenStream(Collection tokens); virtual ~StoredTokenStream(); LUCENE_CLASS(StoredTokenStream); public: Collection tokens; int32_t currentToken; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/WeightedSpanTerm.h000066400000000000000000000033111217574114600245460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WEIGHTEDSPANTERM_H #define WEIGHTEDSPANTERM_H #include "WeightedTerm.h" namespace Lucene { /// Lightweight class to hold term, weight, and positions used for scoring this term. class LPPCONTRIBAPI WeightedSpanTerm : public WeightedTerm { public: WeightedSpanTerm(double weight, const String& term, bool positionSensitive = false); virtual ~WeightedSpanTerm(); LUCENE_CLASS(WeightedSpanTerm); public: bool positionSensitive; protected: Collection positionSpans; public: /// Checks to see if this term is valid at position. /// @param position To check against valid term positions. /// @return true if this term is a hit at this position. bool checkPosition(int32_t position); void addPositionSpans(Collection positionSpans); bool isPositionSensitive(); void setPositionSensitive(bool positionSensitive); Collection getPositionSpans(); }; /// Utility class to store a Span class LPPCONTRIBAPI PositionSpan : public LuceneObject { public: PositionSpan(int32_t start, int32_t end); virtual ~PositionSpan(); LUCENE_CLASS(PositionSpan); public: int32_t start; int32_t end; }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/WeightedSpanTermExtractor.h000066400000000000000000000126311217574114600264470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WEIGHTEDSPANTERMEXTRACTOR_H #define WEIGHTEDSPANTERMEXTRACTOR_H #include "LuceneContrib.h" #include "FilterIndexReader.h" #include "MapWeightedSpanTerm.h" namespace Lucene { /// Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether {@link Term}s /// from the {@link Query} are contained in a supplied {@link TokenStream}. class LPPCONTRIBAPI WeightedSpanTermExtractor : public LuceneObject { public: WeightedSpanTermExtractor(const String& defaultField = L""); virtual ~WeightedSpanTermExtractor(); LUCENE_CLASS(WeightedSpanTermExtractor); protected: String fieldName; TokenStreamPtr tokenStream; MapStringIndexReader readers; String defaultField; bool expandMultiTermQuery; bool cachedTokenStream; bool wrapToCaching; protected: void closeReaders(); /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied Query. /// /// @param query Query to extract Terms from /// @param terms Map to place created WeightedSpanTerms in void extract(QueryPtr query, MapWeightedSpanTermPtr terms); /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied SpanQuery. /// /// @param terms Map to place created WeightedSpanTerms in. /// @param spanQuery SpanQuery to extract Terms from void extractWeightedSpanTerms(MapWeightedSpanTermPtr terms, SpanQueryPtr spanQuery); /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied Query. /// @param terms Map to place created WeightedSpanTerms in /// @param query Query to extract Terms from void extractWeightedTerms(MapWeightedSpanTermPtr terms, QueryPtr query); /// Necessary to implement matches for queries against defaultField bool fieldNameComparator(const String& fieldNameToCheck); IndexReaderPtr getReaderForField(const String& field); void collectSpanQueryFields(SpanQueryPtr spanQuery, HashSet fieldNames); bool mustRewriteQuery(SpanQueryPtr spanQuery); public: /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. /// /// @param query That caused hit /// @param tokenStream Of text to be highlighted /// @return Map containing WeightedSpanTerms MapWeightedSpanTermPtr getWeightedSpanTerms(QueryPtr query, TokenStreamPtr tokenStream); /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. /// /// @param query That caused hit /// @param tokenStream Of text to be highlighted /// @param fieldName Restricts Term's used based on field name /// @return Map containing WeightedSpanTerms MapWeightedSpanTermPtr getWeightedSpanTerms(QueryPtr query, TokenStreamPtr tokenStream, const String& fieldName); /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. Uses a supplied /// IndexReader to properly weight terms (for gradient highlighting). /// /// @param query That caused hit /// @param tokenStream Of text to be highlighted /// @param fieldName Restricts Term's used based on field name /// @param reader To use for scoring /// @return Map containing WeightedSpanTerms MapWeightedSpanTermPtr getWeightedSpanTermsWithScores(QueryPtr query, TokenStreamPtr tokenStream, const String& fieldName, IndexReaderPtr reader); bool getExpandMultiTermQuery(); void setExpandMultiTermQuery(bool expandMultiTermQuery); bool isCachedTokenStream(); TokenStreamPtr getTokenStream(); /// By default, {@link TokenStream}s that are not of the type {@link CachingTokenFilter} /// are wrapped in a {@link CachingTokenFilter} to ensure an efficient reset - if you /// are already using a different caching {@link TokenStream} impl and you don't want /// it to be wrapped, set this to false. void setWrapIfNotCachingTokenFilter(bool wrap); }; /// This class makes sure that if both position sensitive and insensitive versions of the same /// term are added, the position insensitive one wins. class LPPCONTRIBAPI PositionCheckingMap : public MapWeightedSpanTerm { public: virtual ~PositionCheckingMap(); LUCENE_CLASS(PositionCheckingMap); public: virtual void put(const String& key, WeightedSpanTermPtr val); }; /// A fake IndexReader class to extract the field from a MultiTermQuery class LPPCONTRIBAPI FakeReader : public FilterIndexReader { public: FakeReader(); virtual ~FakeReader(); LUCENE_CLASS(FakeReader); public: String field; protected: static IndexReaderPtr EMPTY_MEMORY_INDEX_READER(); public: virtual TermEnumPtr terms(TermPtr t); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/include/WeightedTerm.h000066400000000000000000000023511217574114600237270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WEIGHTEDTERM_H #define WEIGHTEDTERM_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Lightweight class to hold term and a weight value used for scoring this term class LPPCONTRIBAPI WeightedTerm : public LuceneObject { public: WeightedTerm(double weight, const String& term); virtual ~WeightedTerm(); LUCENE_CLASS(WeightedTerm); public: double weight; // multiplier String term; // stemmed form public: /// @return the term value (stemmed) String getTerm(); /// @return the weight associated with this term double getWeight(); /// @param term the term value (stemmed) void setTerm(const String& term); /// @param weight the weight associated with this term void setWeight(double weight); }; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/memory/000077500000000000000000000000001217574114600210525ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/memory/MemoryIndex.cpp000066400000000000000000000606511217574114600240260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "MemoryIndex.h" #include "TokenStream.h" #include "Analyzer.h" #include "StringReader.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "IndexSearcher.h" #include "Term.h" #include "Scorer.h" #include "TermFreqVector.h" #include "TermVectorOffsetInfo.h" #include "TermVectorMapper.h" #include "Similarity.h" #include "FieldInvertState.h" #include "Document.h" #include "MiscUtils.h" namespace Lucene { const double MemoryIndex::docBoost = 1.0; MemoryIndex::MemoryIndex(bool storeOffsets) { stride = storeOffsets ? 3 : 1; fields = MapStringMemoryIndexInfo::newInstance(); } MemoryIndex::~MemoryIndex() { } void MemoryIndex::addField(const String& fieldName, const String& text, AnalyzerPtr analyzer) { if (fieldName.empty()) boost::throw_exception(IllegalArgumentException(L"fieldName must not be empty")); if (text.empty()) boost::throw_exception(IllegalArgumentException(L"text must not be empty")); if (!analyzer) boost::throw_exception(IllegalArgumentException(L"analyzer must not be null")); TokenStreamPtr stream(analyzer->tokenStream(fieldName, newLucene(text))); addField(fieldName, stream); } void MemoryIndex::addField(const String& fieldName, TokenStreamPtr stream, double boost) { LuceneException finally; try { if (fieldName.empty()) boost::throw_exception(IllegalArgumentException(L"fieldName must not be empty")); if (!stream) boost::throw_exception(IllegalArgumentException(L"token stream must not be null")); if (boost <= 0.0) boost::throw_exception(IllegalArgumentException(L"boost factor must be greater than 0.0")); if (fields.contains(fieldName)) boost::throw_exception(IllegalArgumentException(L"field must not be added more than once")); MapStringIntCollection terms(MapStringIntCollection::newInstance()); int32_t numTokens = 0; int32_t numOverlapTokens = 0; int32_t pos = -1; TermAttributePtr termAtt(stream->addAttribute()); PositionIncrementAttributePtr posIncrAttribute(stream->addAttribute()); OffsetAttributePtr offsetAtt(stream->addAttribute()); stream->reset(); while (stream->incrementToken()) { String term(termAtt->term()); if (term.empty()) continue; // nothing to do ++numTokens; int32_t posIncr = posIncrAttribute->getPositionIncrement(); if (posIncr == 0) ++numOverlapTokens; pos += posIncr; Collection positions(terms.get(term)); if (!positions) { // term not seen before positions = Collection::newInstance(); terms.put(term, positions); } positions.add(pos); if (stride != 1) { positions.add(offsetAtt->startOffset()); positions.add(offsetAtt->endOffset()); } } stream->end(); // ensure infos.numTokens > 0 invariant; needed for correct operation of terms() if (numTokens > 0) { boost = boost * docBoost; // see DocumentWriter.addDocument(...) fields.put(fieldName, newLucene(terms, numTokens, numOverlapTokens, boost)); sortedFields.reset(); // invalidate sorted view, if any } } catch (IOException& e) { // can never happen boost::throw_exception(RuntimeException(e.getError())); } catch (LuceneException& e) { finally = e; } try { if (stream) stream->close(); } catch (IOException& e) { boost::throw_exception(RuntimeException(e.getError())); } finally.throwException(); } IndexSearcherPtr MemoryIndex::createSearcher() { MemoryIndexReaderPtr reader(newLucene(shared_from_this())); IndexSearcherPtr searcher(newLucene(reader)); // ensures no auto-close reader->setSearcher(searcher); // to later get hold of searcher.getSimilarity() return searcher; } double MemoryIndex::search(QueryPtr query) { if (!query) boost::throw_exception(IllegalArgumentException(L"query must not be null")); SearcherPtr searcher(createSearcher()); LuceneException finally; try { Collection scores = Collection::newInstance(1); scores[0] = 0.0; // inits to 0.0 (no match) searcher->search(query, newLucene(scores)); return scores[0]; } catch (IOException& e) { // can never happen boost::throw_exception(RuntimeException(e.getError())); } catch (LuceneException& e) { finally = e; } finally.throwException(); return 0; // silence static analyzers } int32_t MemoryIndex::numPositions(Collection positions) { return (positions.size() / stride); } struct lessField { inline bool operator()(const PairStringMemoryIndexInfo& first, const PairStringMemoryIndexInfo& second) const { return (first.first < second.first); } }; void MemoryIndex::sortFields() { if (!sortedFields) { sortedFields = CollectionStringMemoryIndexInfo::newInstance(fields.begin(), fields.end()); std::sort(sortedFields.begin(), sortedFields.end(), lessField()); } } MemoryIndexInfo::MemoryIndexInfo(MapStringIntCollection terms, int32_t numTokens, int32_t numOverlapTokens, double boost) { this->terms = terms; this->numTokens = numTokens; this->numOverlapTokens = numOverlapTokens; this->boost = boost; } MemoryIndexInfo::~MemoryIndexInfo() { } struct lessTerm { inline bool operator()(const PairStringIntCollection& first, const PairStringIntCollection& second) const { return (first.first < second.first); } }; void MemoryIndexInfo::sortTerms() { if (!sortedTerms) { sortedTerms = CollectionStringIntCollection::newInstance(terms.begin(), terms.end()); std::sort(sortedTerms.begin(), sortedTerms.end(), lessTerm()); } } Collection MemoryIndexInfo::getPositions(const String& term) { return terms.get(term); } Collection MemoryIndexInfo::getPositions(int32_t pos) { return sortedTerms[pos].second; } double MemoryIndexInfo::getBoost() { return boost; } MemoryIndexReader::MemoryIndexReader(MemoryIndexPtr memoryIndex) { this->memoryIndex = memoryIndex; } MemoryIndexReader::~MemoryIndexReader() { } TermPtr MemoryIndexReader::MATCH_ALL_TERM() { static TermPtr _MATCH_ALL_TERM; if (!_MATCH_ALL_TERM) { _MATCH_ALL_TERM = newLucene(L""); CycleCheck::addStatic(_MATCH_ALL_TERM); } return _MATCH_ALL_TERM; } MemoryIndexInfoPtr MemoryIndexReader::getInfo(const String& fieldName) { return memoryIndex->fields.get(fieldName); } MemoryIndexInfoPtr MemoryIndexReader::getInfo(int32_t pos) { return memoryIndex->sortedFields[pos].second; } int32_t MemoryIndexReader::docFreq(TermPtr t) { MemoryIndexInfoPtr info(getInfo(t->field())); int32_t freq = 0; if (info) freq = info->getPositions(t->text()) ? 1 : 0; return freq; } TermEnumPtr MemoryIndexReader::terms() { return terms(MATCH_ALL_TERM()); } TermEnumPtr MemoryIndexReader::terms(TermPtr t) { int32_t i = 0; // index into info.sortedTerms int32_t j = 0; // index into sortedFields memoryIndex->sortFields(); if (memoryIndex->sortedFields.size() == 1 && memoryIndex->sortedFields[0].first == t->field()) j = 0; // fast path else { CollectionStringMemoryIndexInfo::iterator search = std::lower_bound(memoryIndex->sortedFields.begin(), memoryIndex->sortedFields.end(), std::make_pair(t->field(), MemoryIndexInfoPtr()), lessField()); int32_t keyPos = std::distance(memoryIndex->sortedFields.begin(), search); j = (search == memoryIndex->sortedFields.end() || t->field() < search->first) ? -(keyPos + 1) : keyPos; } if (j < 0) // not found; choose successor { j = -j - 1; i = 0; if (j < memoryIndex->sortedFields.size()) getInfo(j)->sortTerms(); } else // found { MemoryIndexInfoPtr info(getInfo(j)); info->sortTerms(); CollectionStringIntCollection::iterator search = std::lower_bound(info->sortedTerms.begin(), info->sortedTerms.end(), std::make_pair(t->text(), Collection()), lessTerm()); int32_t keyPos = std::distance(info->sortedTerms.begin(), search); i = (search == info->sortedTerms.end() || t->text() < search->first) ? -(keyPos + 1) : keyPos; if (i < 0) // not found; choose successor { i = -i - 1; if (i >= info->sortedTerms.size()) // move to next successor { ++j; i = 0; if (j < memoryIndex->sortedFields.size()) getInfo(j)->sortTerms(); } } } return newLucene(shared_from_this(), i, j); } TermPositionsPtr MemoryIndexReader::termPositions() { return newLucene(shared_from_this()); } TermDocsPtr MemoryIndexReader::termDocs() { return termPositions(); } Collection MemoryIndexReader::getTermFreqVectors(int32_t docNumber) { Collection vectors(Collection::newInstance()); for (MapStringMemoryIndexInfo::iterator fieldName = memoryIndex->fields.begin(); fieldName != memoryIndex->fields.end(); ++fieldName) vectors.add(getTermFreqVector(docNumber, fieldName->first)); return vectors; } void MemoryIndexReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) { for (MapStringMemoryIndexInfo::iterator fieldName = memoryIndex->fields.begin(); fieldName != memoryIndex->fields.end(); ++fieldName) getTermFreqVector(docNumber, fieldName->first, mapper); } void MemoryIndexReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) { MemoryIndexInfoPtr info(getInfo(field)); if (!info) return; info->sortTerms(); mapper->setExpectations(field, info->sortedTerms.size(), memoryIndex->stride != 1, true); for (int32_t i = info->sortedTerms.size(); --i >=0;) { Collection positions(info->sortedTerms[i].second); int32_t size = positions.size(); Collection offsets(Collection::newInstance(size / memoryIndex->stride)); for (int32_t k = 0, j = 1; j < size; ++k, j += memoryIndex->stride) { int32_t start = positions[j]; int32_t end = positions[j + 1]; offsets[k] = newLucene(start, end); } mapper->map(info->sortedTerms[i].first, memoryIndex->numPositions(info->sortedTerms[i].second), offsets, info->sortedTerms[i].second); } } TermFreqVectorPtr MemoryIndexReader::getTermFreqVector(int32_t docNumber, const String& field) { MemoryIndexInfoPtr info(getInfo(field)); if (!info) return TermFreqVectorPtr(); info->sortTerms(); return newLucene(shared_from_this(), info, field); } SimilarityPtr MemoryIndexReader::getSimilarity() { SearcherPtr searcher(_searcher.lock()); if (searcher) return searcher->getSimilarity(); return Similarity::getDefault(); } void MemoryIndexReader::setSearcher(SearcherPtr searcher) { _searcher = searcher; } ByteArray MemoryIndexReader::norms(const String& field) { ByteArray norms(cachedNorms); SimilarityPtr sim(getSimilarity()); if (field != cachedFieldName || sim != cachedSimilarity) // not cached? { MemoryIndexInfoPtr info(getInfo(field)); int32_t numTokens = info ? info->numTokens : 0; int32_t numOverlapTokens = info ? info->numOverlapTokens : 0; double boost = info ? info->getBoost() : 1.0; FieldInvertStatePtr invertState(newLucene(0, numTokens, numOverlapTokens, 0, boost)); double n = sim->computeNorm(field, invertState); uint8_t norm = Similarity::encodeNorm(n); norms = ByteArray::newInstance(1); norms[0] = norm; // cache it for future reuse cachedNorms = norms; cachedFieldName = field; cachedSimilarity = sim; } return norms; } void MemoryIndexReader::norms(const String& field, ByteArray norms, int32_t offset) { ByteArray _norms(this->norms(field)); MiscUtils::arrayCopy(_norms.get(), 0, norms.get(), offset, _norms.size()); } void MemoryIndexReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { boost::throw_exception(UnsupportedOperationException()); } int32_t MemoryIndexReader::numDocs() { return memoryIndex->fields.empty() ? 0 : 1; } int32_t MemoryIndexReader::maxDoc() { return 1; } DocumentPtr MemoryIndexReader::document(int32_t n) { return newLucene(); // there are no stored fields } DocumentPtr MemoryIndexReader::document(int32_t n, FieldSelectorPtr fieldSelector) { return newLucene(); // there are no stored fields } bool MemoryIndexReader::isDeleted(int32_t n) { return false; } bool MemoryIndexReader::hasDeletions() { return false; } void MemoryIndexReader::doDelete(int32_t docNum) { boost::throw_exception(UnsupportedOperationException()); } void MemoryIndexReader::doUndeleteAll() { boost::throw_exception(UnsupportedOperationException()); } void MemoryIndexReader::doCommit(MapStringString commitUserData) { } void MemoryIndexReader::doClose() { } HashSet MemoryIndexReader::getFieldNames(FieldOption fieldOption) { static HashSet emptySet; if (!emptySet) emptySet = HashSet::newInstance(); if (fieldOption == FIELD_OPTION_UNINDEXED) return emptySet; if (fieldOption == FIELD_OPTION_INDEXED_NO_TERMVECTOR) return emptySet; if (fieldOption == FIELD_OPTION_TERMVECTOR_WITH_OFFSET && memoryIndex->stride == 1) return emptySet; if (fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET && memoryIndex->stride == 1) return emptySet; HashSet fieldSet(HashSet::newInstance()); for (MapStringMemoryIndexInfo::iterator field = memoryIndex->fields.begin(); field != memoryIndex->fields.end(); ++field) fieldSet.add(field->first); return fieldSet; } MemoryIndexTermEnum::MemoryIndexTermEnum(MemoryIndexReaderPtr reader, int32_t ix, int32_t jx) { _reader = reader; i = ix; j = jx; } MemoryIndexTermEnum::~MemoryIndexTermEnum() { } bool MemoryIndexTermEnum::next() { MemoryIndexReaderPtr reader(_reader); if (j >= reader->memoryIndex->sortedFields.size()) return false; MemoryIndexInfoPtr info(reader->getInfo(j)); if (++i < info->sortedTerms.size()) return true; // move to successor ++j; i = 0; if (j >= reader->memoryIndex->sortedFields.size()) return false; reader->getInfo(j)->sortTerms(); return true; } TermPtr MemoryIndexTermEnum::term() { MemoryIndexReaderPtr reader(_reader); if (j >= reader->memoryIndex->sortedFields.size()) return TermPtr(); MemoryIndexInfoPtr info(reader->getInfo(j)); if (i >= info->sortedTerms.size()) return TermPtr(); return createTerm(info, j, info->sortedTerms[i].first); } int32_t MemoryIndexTermEnum::docFreq() { MemoryIndexReaderPtr reader(_reader); if (j >= reader->memoryIndex->sortedFields.size()) return 0; MemoryIndexInfoPtr info(reader->getInfo(j)); if (i >= info->sortedTerms.size()) return 0; return reader->memoryIndex->numPositions(info->getPositions(i)); } void MemoryIndexTermEnum::close() { } TermPtr MemoryIndexTermEnum::createTerm(MemoryIndexInfoPtr info, int32_t pos, const String& text) { TermPtr _template(info->_template); if (!_template) // not yet cached? { MemoryIndexReaderPtr reader(_reader); String fieldName(reader->memoryIndex->sortedFields[pos].first); _template = newLucene(fieldName); info->_template = _template; } return _template->createTerm(text); } MemoryIndexCollector::MemoryIndexCollector(Collection scores) { this->scores = scores; } MemoryIndexCollector::~MemoryIndexCollector() { } void MemoryIndexCollector::collect(int32_t doc) { scores[0] = scorer->score(); } void MemoryIndexCollector::setScorer(ScorerPtr scorer) { this->scorer = scorer; } bool MemoryIndexCollector::acceptsDocsOutOfOrder() { return true; } void MemoryIndexCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) { } MemoryIndexTermPositions::MemoryIndexTermPositions(MemoryIndexReaderPtr reader) { _reader = reader; hasNext = false; cursor = 0; } MemoryIndexTermPositions::~MemoryIndexTermPositions() { } void MemoryIndexTermPositions::seek(TermPtr term) { this->term = term; if (!term) hasNext = true; // term == null means match all docs else { MemoryIndexReaderPtr reader(_reader); MemoryIndexInfoPtr info(reader->getInfo(term->field())); current = info ? info->getPositions(term->text()) : Collection(); hasNext = current; cursor = 0; } } void MemoryIndexTermPositions::seek(TermEnumPtr termEnum) { seek(termEnum->term()); } int32_t MemoryIndexTermPositions::doc() { return 0; } int32_t MemoryIndexTermPositions::freq() { MemoryIndexReaderPtr reader(_reader); int32_t freq = current ? reader->memoryIndex->numPositions(current) : (term ? 0 : 1); return freq; } bool MemoryIndexTermPositions::next() { bool _next = hasNext; hasNext = false; return _next; } int32_t MemoryIndexTermPositions::read(Collection docs, Collection freqs) { if (!hasNext) return 0; hasNext = false; docs[0] = 0; freqs[0] = freq(); return 1; } bool MemoryIndexTermPositions::skipTo(int32_t target) { return next(); } void MemoryIndexTermPositions::close() { } int32_t MemoryIndexTermPositions::nextPosition() { // implements TermPositions MemoryIndexReaderPtr reader(_reader); int32_t pos = current[cursor]; cursor += reader->memoryIndex->stride; return pos; } int32_t MemoryIndexTermPositions::getPayloadLength() { boost::throw_exception(UnsupportedOperationException()); } ByteArray MemoryIndexTermPositions::getPayload(ByteArray data, int32_t offset) { boost::throw_exception(UnsupportedOperationException()); return ByteArray(); } bool MemoryIndexTermPositions::isPayloadAvailable() { return false; // unsupported } MemoryIndexTermPositionVector::MemoryIndexTermPositionVector(MemoryIndexReaderPtr reader, MemoryIndexInfoPtr info, const String& fieldName) { this->_reader = reader; this->sortedTerms = info->sortedTerms; this->fieldName = fieldName; } MemoryIndexTermPositionVector::~MemoryIndexTermPositionVector() { } String MemoryIndexTermPositionVector::getField() { return fieldName; } int32_t MemoryIndexTermPositionVector::size() { return sortedTerms.size(); } Collection MemoryIndexTermPositionVector::getTerms() { Collection terms(Collection::newInstance(sortedTerms.size())); for (int32_t i = sortedTerms.size(); --i >= 0;) terms[i] = sortedTerms[i].first; return terms; } Collection MemoryIndexTermPositionVector::getTermFrequencies() { MemoryIndexReaderPtr reader(_reader); Collection freqs(Collection::newInstance(sortedTerms.size())); for (int32_t i = sortedTerms.size(); --i >= 0;) freqs[i] = reader->memoryIndex->numPositions(sortedTerms[i].second); return freqs; } int32_t MemoryIndexTermPositionVector::indexOf(const String& term) { CollectionStringIntCollection::iterator search = std::lower_bound(sortedTerms.begin(), sortedTerms.end(), std::make_pair(term, Collection()), lessTerm()); return (search == sortedTerms.end() || term < search->first) ? -1 : std::distance(sortedTerms.begin(), search); } Collection MemoryIndexTermPositionVector::indexesOf(Collection terms, int32_t start, int32_t length) { Collection indexes(Collection::newInstance(length)); for (int32_t i = 0; i < length; ++i) indexes[i] = indexOf(terms[start++]); return indexes; } Collection MemoryIndexTermPositionVector::getTermPositions(int32_t index) { return sortedTerms[index].second; } Collection MemoryIndexTermPositionVector::getOffsets(int32_t index) { MemoryIndexReaderPtr reader(_reader); if (reader->memoryIndex->stride == 1) return Collection(); // no offsets stored Collection positions(sortedTerms[index].second); int32_t size = positions.size(); Collection offsets(Collection::newInstance(size / reader->memoryIndex->stride)); for (int32_t i = 0, j = 1; j < size; ++i, j += reader->memoryIndex->stride) { int32_t start = positions[j]; int32_t end = positions[j + 1]; offsets[i] = newLucene(start, end); } return offsets; } } LucenePlusPlus-rel_3.0.4/src/contrib/msvc/000077500000000000000000000000001217574114600205125ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/msvc/ContribInc.cpp000066400000000000000000000005531217574114600232530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" LucenePlusPlus-rel_3.0.4/src/contrib/msvc/dllmain.cpp000066400000000000000000000013131217574114600226340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #if defined(_WIN32) && defined(LPP_HAVE_DLL) BOOL APIENTRY DllMain(HMODULE module, DWORD ul_reason_for_call, LPVOID lpReserved) { switch (ul_reason_for_call) { case DLL_PROCESS_ATTACH: case DLL_THREAD_ATTACH: case DLL_THREAD_DETACH: case DLL_PROCESS_DETACH: break; } return TRUE; } #endif LucenePlusPlus-rel_3.0.4/src/contrib/msvc/lucene_contrib.vcproj000066400000000000000000001242651217574114600247440ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/contrib/snowball/000077500000000000000000000000001217574114600213635ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/snowball/SnowballAnalyzer.cpp000066400000000000000000000051061217574114600253600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SnowballAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "SnowballFilter.h" namespace Lucene { SnowballAnalyzer::SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name) { this->matchVersion = matchVersion; this->name = name; } SnowballAnalyzer::SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name, HashSet stopwords) { this->stopSet = stopwords; this->matchVersion = matchVersion; this->name = name; } SnowballAnalyzer::~SnowballAnalyzer() { } TokenStreamPtr SnowballAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(result); if (stopSet) result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); result = newLucene(result, name); return result; } TokenStreamPtr SnowballAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { SnowballAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); if (stopSet) streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); streams->result = newLucene(streams->result, name); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } SnowballAnalyzerSavedStreams::~SnowballAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/SnowballFilter.cpp000066400000000000000000000032671217574114600250260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SnowballFilter.h" #include "TermAttribute.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" #include "libstemmer_c/include/libstemmer.h" namespace Lucene { SnowballFilter::SnowballFilter(TokenStreamPtr input, const String& name) : TokenFilter(input) { stemmer = sb_stemmer_new(StringUtils::toUTF8(name).c_str(), "UTF_8"); if (stemmer == NULL) boost::throw_exception(IllegalArgumentException(L"language not available for stemming:" + name)); termAtt = addAttribute(); utf8Result = newLucene(); } SnowballFilter::~SnowballFilter() { } bool SnowballFilter::incrementToken() { if (input->incrementToken()) { StringUtils::toUTF8(termAtt->termBuffer().get(), termAtt->termLength(), utf8Result); const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8Result->result.get(), utf8Result->length); if (stemmed == NULL) boost::throw_exception(RuntimeException(L"exception stemming word:" + termAtt->term())); int32_t newlen = StringUtils::toUnicode(stemmed, sb_stemmer_length(stemmer), termAtt->termBuffer()); termAtt->setTermLength(newlen); return true; } else return false; } } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/000077500000000000000000000000001217574114600240305ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/MANIFEST000066400000000000000000000037411217574114600251660ustar00rootroot00000000000000README src_c/stem_ISO_8859_1_danish.c src_c/stem_ISO_8859_1_danish.h src_c/stem_ISO_8859_1_dutch.c src_c/stem_ISO_8859_1_dutch.h src_c/stem_ISO_8859_1_english.c src_c/stem_ISO_8859_1_english.h src_c/stem_ISO_8859_1_finnish.c src_c/stem_ISO_8859_1_finnish.h src_c/stem_ISO_8859_1_french.c src_c/stem_ISO_8859_1_french.h src_c/stem_ISO_8859_1_german.c src_c/stem_ISO_8859_1_german.h src_c/stem_ISO_8859_1_hungarian.c src_c/stem_ISO_8859_1_hungarian.h src_c/stem_ISO_8859_1_italian.c src_c/stem_ISO_8859_1_italian.h src_c/stem_ISO_8859_1_norwegian.c src_c/stem_ISO_8859_1_norwegian.h src_c/stem_ISO_8859_1_porter.c src_c/stem_ISO_8859_1_porter.h src_c/stem_ISO_8859_1_portuguese.c src_c/stem_ISO_8859_1_portuguese.h src_c/stem_ISO_8859_1_spanish.c src_c/stem_ISO_8859_1_spanish.h src_c/stem_ISO_8859_1_swedish.c src_c/stem_ISO_8859_1_swedish.h src_c/stem_ISO_8859_2_romanian.c src_c/stem_ISO_8859_2_romanian.h src_c/stem_KOI8_R_russian.c src_c/stem_KOI8_R_russian.h src_c/stem_UTF_8_danish.c src_c/stem_UTF_8_danish.h src_c/stem_UTF_8_dutch.c src_c/stem_UTF_8_dutch.h src_c/stem_UTF_8_english.c src_c/stem_UTF_8_english.h src_c/stem_UTF_8_finnish.c src_c/stem_UTF_8_finnish.h src_c/stem_UTF_8_french.c src_c/stem_UTF_8_french.h src_c/stem_UTF_8_german.c src_c/stem_UTF_8_german.h src_c/stem_UTF_8_hungarian.c src_c/stem_UTF_8_hungarian.h src_c/stem_UTF_8_italian.c src_c/stem_UTF_8_italian.h src_c/stem_UTF_8_norwegian.c src_c/stem_UTF_8_norwegian.h src_c/stem_UTF_8_porter.c src_c/stem_UTF_8_porter.h src_c/stem_UTF_8_portuguese.c src_c/stem_UTF_8_portuguese.h src_c/stem_UTF_8_romanian.c src_c/stem_UTF_8_romanian.h src_c/stem_UTF_8_russian.c src_c/stem_UTF_8_russian.h src_c/stem_UTF_8_spanish.c src_c/stem_UTF_8_spanish.h src_c/stem_UTF_8_swedish.c src_c/stem_UTF_8_swedish.h src_c/stem_UTF_8_turkish.c src_c/stem_UTF_8_turkish.h runtime/api.c runtime/api.h runtime/header.h runtime/utilities.c libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c libstemmer/modules.h libstemmer/modules_utf8.h include/libstemmer.h LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/Makefile000066400000000000000000000003671217574114600254760ustar00rootroot00000000000000include mkinc.mak CFLAGS=-Iinclude all: libstemmer.o stemwords libstemmer.o: $(snowball_sources:.c=.o) $(AR) -cru $@ $^ stemwords: examples/stemwords.o libstemmer.o $(CC) -o $@ $^ clean: rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/README000066400000000000000000000117751217574114600247230ustar00rootroot00000000000000libstemmer_c ============ This document pertains to the C version of the libstemmer distribution, available for download from: http://snowball.tartarus.org/dist/libstemmer_c.tgz Compiling the library ===================== A simple makefile is provided for Unix style systems. On such systems, it should be possible simply to run "make", and the file "libstemmer.o" and the example program "stemwords" will be generated. If this doesn't work on your system, you need to write your own build system (or call the compiler directly). The files to compile are all contained in the "libstemmer", "runtime" and "src_c" directories, and the public header file is contained in the "include" directory. The library comes in two flavours; UTF-8 only, and UTF-8 plus other character sets. To use the utf-8 only flavour, compile "libstemmer_utf8.c" instead of "libstemmer.c". For convenience "mkinc.mak" is a makefile fragment listing the source files and header files used to compile the standard version of the library. "mkinc_utf8.mak" is a comparable makefile fragment listing just the source files for the UTF-8 only version of the library. Using the library ================= The library provides a simple C API. Essentially, a new stemmer can be obtained by using "sb_stemmer_new". "sb_stemmer_stem" is then used to stem a word, "sb_stemmer_length" returns the stemmed length of the last word processed, and "sb_stemmer_delete" is used to delete a stemmer. Creating a stemmer is a relatively expensive operation - the expected usage pattern is that a new stemmer is created when needed, used to stem many words, and deleted after some time. Stemmers are re-entrant, but not threadsafe. In other words, if you wish to access the same stemmer object from multiple threads, you must ensure that all access is protected by a mutex or similar device. libstemmer does not currently incorporate any mechanism for caching the results of stemming operations. Such caching can greatly increase the performance of a stemmer under certain situations, so suitable patches will be considered for inclusion. The standard libstemmer sources contain an algorithm for each of the supported languages. The algorithm may be selected using the english name of the language, or using the 2 or 3 letter ISO 639 language codes. In addition, the traditional "Porter" stemming algorithm for english is included for backwards compatibility purposes, but we recommend use of the "English" stemmer in preference for new projects. (Some minor algorithms which are included only as curiosities in the snowball website, such as the Lovins stemmer and the Kraaij Pohlmann stemmer, are not included in the standard libstemmer sources. These are not really supported by the snowball project, but it would be possible to compile a modified libstemmer library containing these if desired.) The stemwords example ===================== The stemwords example program allows you to run any of the stemmers compiled into the libstemmer library on a sample vocabulary. For details on how to use it, run it with the "-h" command line option. Using the library in a larger system ==================================== If you are incorporating the library into the build system of a larger program, I recommend copying the unpacked tarball without modification into a subdirectory of the sources of your program. Future versions of the library are intended to keep the same structure, so this will keep the work required to move to a new version of the library to a minimum. As an additional convenience, the list of source and header files used in the library is detailed in mkinc.mak - a file which is in a suitable format for inclusion by a Makefile. By including this file in your build system, you can link the snowball system into your program with a few extra rules. Using the library in a system using GNU autotools ================================================= The libstemmer_c library can be integrated into a larger system which uses the GNU autotool framework (and in particular, automake and autoconf) as follows: 1) Unpack libstemmer_c.tgz in the top level project directory so that there is a libstemmer_c subdirectory of the top level directory of the project. 2) Add a file "Makefile.am" to the unpacked libstemmer_c folder, containing: noinst_LTLIBRARIES = libstemmer.la include $(srcdir)/mkinc.mak noinst_HEADERS = $(snowball_headers) libstemmer_la_SOURCES = $(snowball_sources) (You may also need to add other lines to this, for example, if you are using compiler options which are not compatible with compiling the libstemmer library.) 3) Add libstemmer_c to the AC_CONFIG_FILES declaration in the project's configure.ac file. 4) Add to the top level makefile the following lines (or modify existing assignments to these variables appropriately): AUTOMAKE_OPTIONS = subdir-objects AM_CPPFLAGS = -I$(top_srcdir)/libstemmer_c/include SUBDIRS=libstemmer_c _LIBADD = libstemmer_c/libstemmer.la (Where is the name of the library or executable which links against libstemmer.) LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/examples/000077500000000000000000000000001217574114600256465ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/examples/stemwords.c000066400000000000000000000123421217574114600300430ustar00rootroot00000000000000/* This is a simple program which uses libstemmer to provide a command * line interface for stemming using any of the algorithms provided. */ #include #include /* for malloc, free */ #include /* for memmove */ #include /* for isupper, tolower */ #include "libstemmer.h" const char * progname; static int pretty = 1; static void stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out) { #define INC 10 int lim = INC; sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol)); while(1) { int ch = getc(f_in); if (ch == EOF) { free(b); return; } { int i = 0; int inlen = 0; while(1) { if (ch == '\n' || ch == EOF) break; if (i == lim) { sb_symbol * newb; newb = (sb_symbol *) realloc(b, (lim + INC) * sizeof(sb_symbol)); if (newb == 0) goto error; b = newb; lim = lim + INC; } /* Update count of utf-8 characters. */ if (ch < 0x80 || ch > 0xBF) inlen += 1; /* force lower case: */ if (isupper(ch)) ch = tolower(ch); b[i] = ch; i++; ch = getc(f_in); } { const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i); if (stemmed == NULL) { fprintf(stderr, "Out of memory"); exit(1); } else { if (pretty == 1) { fwrite(b, i, 1, f_out); fputs(" -> ", f_out); } else if (pretty == 2) { fwrite(b, i, 1, f_out); if (sb_stemmer_length(stemmer) > 0) { int j; if (inlen < 30) { for (j = 30 - inlen; j > 0; j--) fputs(" ", f_out); } else { fputs("\n", f_out); for (j = 30; j > 0; j--) fputs(" ", f_out); } } } fputs((char *)stemmed, f_out); putc('\n', f_out); } } } } error: if (b != 0) free(b); return; } /** Display the command line syntax, and then exit. * @param n The value to exit with. */ static void usage(int n) { printf("usage: %s [-l ] [-i ] [-o ] [-c ] [-p[2]] [-h]\n" "\n" "The input file consists of a list of words to be stemmed, one per\n" "line. Words should be in lower case, but (for English) A-Z letters\n" "are mapped to their a-z equivalents anyway. If omitted, stdin is\n" "used.\n" "\n" "If -c is given, the argument is the character encoding of the input\n" "and output files. If it is omitted, the UTF-8 encoding is used.\n" "\n" "If -p is given the output file consists of each word of the input\n" "file followed by \"->\" followed by its stemmed equivalent.\n" "If -p2 is given the output file is a two column layout containing\n" "the input words in the first column and the stemmed eqivalents in\n" "the second column.\n" "Otherwise, the output file consists of the stemmed words, one per\n" "line.\n" "\n" "-h displays this help\n", progname); exit(n); } int main(int argc, char * argv[]) { char * in = 0; char * out = 0; FILE * f_in; FILE * f_out; struct sb_stemmer * stemmer; char * language = "english"; char * charenc = NULL; char * s; int i = 1; pretty = 0; progname = argv[0]; while(i < argc) { s = argv[i++]; if (s[0] == '-') { if (strcmp(s, "-o") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } out = argv[i++]; } else if (strcmp(s, "-i") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } in = argv[i++]; } else if (strcmp(s, "-l") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } language = argv[i++]; } else if (strcmp(s, "-c") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } charenc = argv[i++]; } else if (strcmp(s, "-p2") == 0) { pretty = 2; } else if (strcmp(s, "-p") == 0) { pretty = 1; } else if (strcmp(s, "-h") == 0) { usage(0); } else { fprintf(stderr, "option %s unknown\n", s); usage(1); } } else { fprintf(stderr, "unexpected parameter %s\n", s); usage(1); } } /* prepare the files */ f_in = (in == 0) ? stdin : fopen(in, "r"); if (f_in == 0) { fprintf(stderr, "file %s not found\n", in); exit(1); } f_out = (out == 0) ? stdout : fopen(out, "w"); if (f_out == 0) { fprintf(stderr, "file %s cannot be opened\n", out); exit(1); } /* do the stemming process: */ stemmer = sb_stemmer_new(language, charenc); if (stemmer == 0) { if (charenc == NULL) { fprintf(stderr, "language `%s' not available for stemming\n", language); exit(1); } else { fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc); exit(1); } } stem_file(stemmer, f_in, f_out); sb_stemmer_delete(stemmer); if (in != 0) (void) fclose(f_in); if (out != 0) (void) fclose(f_out); return 0; } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/include/000077500000000000000000000000001217574114600254535ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/include/libstemmer.h000066400000000000000000000055441217574114600277770ustar00rootroot00000000000000 /* Make header file work when included from C++ */ #ifdef __cplusplus extern "C" { #endif struct sb_stemmer; typedef unsigned char sb_symbol; /* FIXME - should be able to get a version number for each stemming * algorithm (which will be incremented each time the output changes). */ /** Returns an array of the names of the available stemming algorithms. * Note that these are the canonical names - aliases (ie, other names for * the same algorithm) will not be included in the list. * The list is terminated with a null pointer. * * The list must not be modified in any way. */ const char ** sb_stemmer_list(void); /** Create a new stemmer object, using the specified algorithm, for the * specified character encoding. * * All algorithms will usually be available in UTF-8, but may also be * available in other character encodings. * * @param algorithm The algorithm name. This is either the english * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the * language. Note that case is significant in this parameter - the * value should be supplied in lower case. * * @param charenc The character encoding. NULL may be passed as * this value, in which case UTF-8 encoding will be assumed. Otherwise, * the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1), * "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian). Note that * case is significant in this parameter. * * @return NULL if the specified algorithm is not recognised, or the * algorithm is not available for the requested encoding. Otherwise, * returns a pointer to a newly created stemmer for the requested algorithm. * The returned pointer must be deleted by calling sb_stemmer_delete(). * * @note NULL will also be returned if an out of memory error occurs. */ struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc); /** Delete a stemmer object. * * This frees all resources allocated for the stemmer. After calling * this function, the supplied stemmer may no longer be used in any way. * * It is safe to pass a null pointer to this function - this will have * no effect. */ void sb_stemmer_delete(struct sb_stemmer * stemmer); /** Stem a word. * * The return value is owned by the stemmer - it must not be freed or * modified, and it will become invalid when the stemmer is called again, * or if the stemmer is freed. * * The length of the return value can be obtained using sb_stemmer_length(). * * If an out-of-memory error occurs, this will return NULL. */ const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size); /** Get the length of the result of the last stemmed word. * This should not be called before sb_stemmer_stem() has been called. */ int sb_stemmer_length(struct sb_stemmer * stemmer); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/libstemmer/000077500000000000000000000000001217574114600261735ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/libstemmer/libstemmer.c000066400000000000000000000042531217574114600305060ustar00rootroot00000000000000 #include #include #include "../include/libstemmer.h" #include "../runtime/api.h" #include "modules.h" struct sb_stemmer { struct SN_env * (*create)(void); void (*close)(struct SN_env *); int (*stem)(struct SN_env *); struct SN_env * env; }; extern const char ** sb_stemmer_list(void) { return algorithm_names; } static stemmer_encoding_t sb_getenc(const char * charenc) { struct stemmer_encoding * encoding; if (charenc == NULL) return ENC_UTF_8; for (encoding = encodings; encoding->name != 0; encoding++) { if (strcmp(encoding->name, charenc) == 0) break; } if (encoding->name == NULL) return ENC_UNKNOWN; return encoding->enc; } extern struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc) { stemmer_encoding_t enc; struct stemmer_modules * module; struct sb_stemmer * stemmer; enc = sb_getenc(charenc); if (enc == ENC_UNKNOWN) return NULL; for (module = modules; module->name != 0; module++) { if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; } if (module->name == NULL) return NULL; stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); if (stemmer == NULL) return NULL; stemmer->create = module->create; stemmer->close = module->close; stemmer->stem = module->stem; stemmer->env = stemmer->create(); if (stemmer->env == NULL) { sb_stemmer_delete(stemmer); return NULL; } return stemmer; } void sb_stemmer_delete(struct sb_stemmer * stemmer) { if (stemmer == 0) return; if (stemmer->close == 0) return; stemmer->close(stemmer->env); stemmer->close = 0; free(stemmer); } const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) { int ret; if (SN_set_current(stemmer->env, size, (const symbol *)(word))) { stemmer->env->l = 0; return NULL; } ret = stemmer->stem(stemmer->env); if (ret < 0) return NULL; stemmer->env->p[stemmer->env->l] = 0; return (const sb_symbol *)(stemmer->env->p); } int sb_stemmer_length(struct sb_stemmer * stemmer) { return stemmer->env->l; } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/libstemmer/libstemmer_c.in000066400000000000000000000042551217574114600311760ustar00rootroot00000000000000 #include #include #include "../include/libstemmer.h" #include "../runtime/api.h" #include "@MODULES_H@" struct sb_stemmer { struct SN_env * (*create)(void); void (*close)(struct SN_env *); int (*stem)(struct SN_env *); struct SN_env * env; }; extern const char ** sb_stemmer_list(void) { return algorithm_names; } static stemmer_encoding_t sb_getenc(const char * charenc) { struct stemmer_encoding * encoding; if (charenc == NULL) return ENC_UTF_8; for (encoding = encodings; encoding->name != 0; encoding++) { if (strcmp(encoding->name, charenc) == 0) break; } if (encoding->name == NULL) return ENC_UNKNOWN; return encoding->enc; } extern struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc) { stemmer_encoding_t enc; struct stemmer_modules * module; struct sb_stemmer * stemmer; enc = sb_getenc(charenc); if (enc == ENC_UNKNOWN) return NULL; for (module = modules; module->name != 0; module++) { if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; } if (module->name == NULL) return NULL; stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); if (stemmer == NULL) return NULL; stemmer->create = module->create; stemmer->close = module->close; stemmer->stem = module->stem; stemmer->env = stemmer->create(); if (stemmer->env == NULL) { sb_stemmer_delete(stemmer); return NULL; } return stemmer; } void sb_stemmer_delete(struct sb_stemmer * stemmer) { if (stemmer == 0) return; if (stemmer->close == 0) return; stemmer->close(stemmer->env); stemmer->close = 0; free(stemmer); } const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) { int ret; if (SN_set_current(stemmer->env, size, (const symbol *)(word))) { stemmer->env->l = 0; return NULL; } ret = stemmer->stem(stemmer->env); if (ret < 0) return NULL; stemmer->env->p[stemmer->env->l] = 0; return (const sb_symbol *)(stemmer->env->p); } int sb_stemmer_length(struct sb_stemmer * stemmer) { return stemmer->env->l; } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/libstemmer/libstemmer_utf8.c000066400000000000000000000042601217574114600314520ustar00rootroot00000000000000 #include #include #include "../include/libstemmer.h" #include "../runtime/api.h" #include "modules_utf8.h" struct sb_stemmer { struct SN_env * (*create)(void); void (*close)(struct SN_env *); int (*stem)(struct SN_env *); struct SN_env * env; }; extern const char ** sb_stemmer_list(void) { return algorithm_names; } static stemmer_encoding_t sb_getenc(const char * charenc) { struct stemmer_encoding * encoding; if (charenc == NULL) return ENC_UTF_8; for (encoding = encodings; encoding->name != 0; encoding++) { if (strcmp(encoding->name, charenc) == 0) break; } if (encoding->name == NULL) return ENC_UNKNOWN; return encoding->enc; } extern struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc) { stemmer_encoding_t enc; struct stemmer_modules * module; struct sb_stemmer * stemmer; enc = sb_getenc(charenc); if (enc == ENC_UNKNOWN) return NULL; for (module = modules; module->name != 0; module++) { if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; } if (module->name == NULL) return NULL; stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); if (stemmer == NULL) return NULL; stemmer->create = module->create; stemmer->close = module->close; stemmer->stem = module->stem; stemmer->env = stemmer->create(); if (stemmer->env == NULL) { sb_stemmer_delete(stemmer); return NULL; } return stemmer; } void sb_stemmer_delete(struct sb_stemmer * stemmer) { if (stemmer == 0) return; if (stemmer->close == 0) return; stemmer->close(stemmer->env); stemmer->close = 0; free(stemmer); } const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) { int ret; if (SN_set_current(stemmer->env, size, (const symbol *)(word))) { stemmer->env->l = 0; return NULL; } ret = stemmer->stem(stemmer->env); if (ret < 0) return NULL; stemmer->env->p[stemmer->env->l] = 0; return (const sb_symbol *)(stemmer->env->p); } int sb_stemmer_length(struct sb_stemmer * stemmer) { return stemmer->env->l; } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/libstemmer/modules.h000066400000000000000000000310551217574114600300200ustar00rootroot00000000000000/* libstemmer/modules.h: List of stemming modules. * * This file is generated by mkmodules.pl from a list of module names. * Do not edit manually. * * Modules included by this file are: danish, dutch, english, finnish, french, * german, hungarian, italian, norwegian, porter, portuguese, romanian, * russian, spanish, swedish, turkish */ #include "../src_c/stem_ISO_8859_1_danish.h" #include "../src_c/stem_UTF_8_danish.h" #include "../src_c/stem_ISO_8859_1_dutch.h" #include "../src_c/stem_UTF_8_dutch.h" #include "../src_c/stem_ISO_8859_1_english.h" #include "../src_c/stem_UTF_8_english.h" #include "../src_c/stem_ISO_8859_1_finnish.h" #include "../src_c/stem_UTF_8_finnish.h" #include "../src_c/stem_ISO_8859_1_french.h" #include "../src_c/stem_UTF_8_french.h" #include "../src_c/stem_ISO_8859_1_german.h" #include "../src_c/stem_UTF_8_german.h" #include "../src_c/stem_ISO_8859_1_hungarian.h" #include "../src_c/stem_UTF_8_hungarian.h" #include "../src_c/stem_ISO_8859_1_italian.h" #include "../src_c/stem_UTF_8_italian.h" #include "../src_c/stem_ISO_8859_1_norwegian.h" #include "../src_c/stem_UTF_8_norwegian.h" #include "../src_c/stem_ISO_8859_1_porter.h" #include "../src_c/stem_UTF_8_porter.h" #include "../src_c/stem_ISO_8859_1_portuguese.h" #include "../src_c/stem_UTF_8_portuguese.h" #include "../src_c/stem_ISO_8859_2_romanian.h" #include "../src_c/stem_UTF_8_romanian.h" #include "../src_c/stem_KOI8_R_russian.h" #include "../src_c/stem_UTF_8_russian.h" #include "../src_c/stem_ISO_8859_1_spanish.h" #include "../src_c/stem_UTF_8_spanish.h" #include "../src_c/stem_ISO_8859_1_swedish.h" #include "../src_c/stem_UTF_8_swedish.h" #include "../src_c/stem_UTF_8_turkish.h" typedef enum { ENC_UNKNOWN=0, ENC_ISO_8859_1, ENC_ISO_8859_2, ENC_KOI8_R, ENC_UTF_8 } stemmer_encoding_t; struct stemmer_encoding { const char * name; stemmer_encoding_t enc; }; static struct stemmer_encoding encodings[] = { {"ISO_8859_1", ENC_ISO_8859_1}, {"ISO_8859_2", ENC_ISO_8859_2}, {"KOI8_R", ENC_KOI8_R}, {"UTF_8", ENC_UTF_8}, {0,ENC_UNKNOWN} }; struct stemmer_modules { const char * name; stemmer_encoding_t enc; struct SN_env * (*create)(void); void (*close)(struct SN_env *); int (*stem)(struct SN_env *); }; static struct stemmer_modules modules[] = { {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"hu", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"hun", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"hungarian", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem}, {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {0,ENC_UNKNOWN,0,0,0} }; static const char * algorithm_names[] = { "danish", "dutch", "english", "finnish", "french", "german", "hungarian", "italian", "norwegian", "porter", "portuguese", "romanian", "russian", "spanish", "swedish", "turkish", 0 }; LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/libstemmer/modules.txt000066400000000000000000000047331217574114600304130ustar00rootroot00000000000000# This file contains a list of stemmers to include in the distribution. # The format is a set of space separated lines - on each line: # First item is name of stemmer. # Second item is comma separated list of character sets. # Third item is comma separated list of names to refer to the stemmer by. # # Lines starting with a #, or blank lines, are ignored. # List all the main algorithms for each language, in UTF-8, and also with # the most commonly used encoding. danish UTF_8,ISO_8859_1 danish,da,dan dutch UTF_8,ISO_8859_1 dutch,nl,dut,nld english UTF_8,ISO_8859_1 english,en,eng finnish UTF_8,ISO_8859_1 finnish,fi,fin french UTF_8,ISO_8859_1 french,fr,fre,fra german UTF_8,ISO_8859_1 german,de,ger,deu hungarian UTF_8,ISO_8859_1 hungarian,hu,hun italian UTF_8,ISO_8859_1 italian,it,ita norwegian UTF_8,ISO_8859_1 norwegian,no,nor portuguese UTF_8,ISO_8859_1 portuguese,pt,por romanian UTF_8,ISO_8859_2 romanian,ro,rum,ron russian UTF_8,KOI8_R russian,ru,rus spanish UTF_8,ISO_8859_1 spanish,es,esl,spa swedish UTF_8,ISO_8859_1 swedish,sv,swe turkish UTF_8 turkish,tr,tur # Also include the traditional porter algorithm for english. # The porter algorithm is included in the libstemmer distribution to assist # with backwards compatibility, but for new systems the english algorithm # should be used in preference. porter UTF_8,ISO_8859_1 porter # Some other stemmers in the snowball project are not included in the standard # distribution. To compile a libstemmer with them in, add them to this list, # and regenerate the distribution. (You will need a full source checkout for # this.) They are included in the snowball website as curiosities, but are not # intended for general use, and use of them is is not fully supported. These # algorithms are: # # german2 - This is a slight modification of the german stemmer. #german2 UTF_8,ISO_8859_1 german2 # # kraaij_pohlmann - This is a different dutch stemmer. #kraaij_pohlmann UTF_8,ISO_8859_1 kraaij_pohlmann # # lovins - This is an english stemmer, but fairly outdated, and # only really applicable to a restricted type of input text # (keywords in academic publications). #lovins UTF_8,ISO_8859_1 lovins LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/libstemmer/modules_utf8.h000066400000000000000000000146631217574114600307740ustar00rootroot00000000000000/* libstemmer/modules_utf8.h: List of stemming modules. * * This file is generated by mkmodules.pl from a list of module names. * Do not edit manually. * * Modules included by this file are: danish, dutch, english, finnish, french, * german, hungarian, italian, norwegian, porter, portuguese, romanian, * russian, spanish, swedish, turkish */ #include "../src_c/stem_UTF_8_danish.h" #include "../src_c/stem_UTF_8_dutch.h" #include "../src_c/stem_UTF_8_english.h" #include "../src_c/stem_UTF_8_finnish.h" #include "../src_c/stem_UTF_8_french.h" #include "../src_c/stem_UTF_8_german.h" #include "../src_c/stem_UTF_8_hungarian.h" #include "../src_c/stem_UTF_8_italian.h" #include "../src_c/stem_UTF_8_norwegian.h" #include "../src_c/stem_UTF_8_porter.h" #include "../src_c/stem_UTF_8_portuguese.h" #include "../src_c/stem_UTF_8_romanian.h" #include "../src_c/stem_UTF_8_russian.h" #include "../src_c/stem_UTF_8_spanish.h" #include "../src_c/stem_UTF_8_swedish.h" #include "../src_c/stem_UTF_8_turkish.h" typedef enum { ENC_UNKNOWN=0, ENC_UTF_8 } stemmer_encoding_t; struct stemmer_encoding { const char * name; stemmer_encoding_t enc; }; static struct stemmer_encoding encodings[] = { {"UTF_8", ENC_UTF_8}, {0,ENC_UNKNOWN} }; struct stemmer_modules { const char * name; stemmer_encoding_t enc; struct SN_env * (*create)(void); void (*close)(struct SN_env *); int (*stem)(struct SN_env *); }; static struct stemmer_modules modules[] = { {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {0,ENC_UNKNOWN,0,0,0} }; static const char * algorithm_names[] = { "danish", "dutch", "english", "finnish", "french", "german", "hungarian", "italian", "norwegian", "porter", "portuguese", "romanian", "russian", "spanish", "swedish", "turkish", 0 }; LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/libstemmer/modules_utf8.txt000066400000000000000000000046521217574114600313610ustar00rootroot00000000000000# This file contains a list of stemmers to include in the distribution. # The format is a set of space separated lines - on each line: # First item is name of stemmer. # Second item is comma separated list of character sets. # Third item is comma separated list of names to refer to the stemmer by. # # Lines starting with a #, or blank lines, are ignored. # List all the main algorithms for each language, in UTF-8. danish UTF_8 danish,da,dan dutch UTF_8 dutch,nl,dut,nld english UTF_8 english,en,eng finnish UTF_8 finnish,fi,fin french UTF_8 french,fr,fre,fra german UTF_8 german,de,ger,deu hungarian UTF_8 hungarian,hu,hun italian UTF_8 italian,it,ita norwegian UTF_8 norwegian,no,nor portuguese UTF_8 portuguese,pt,por romanian UTF_8 romanian,ro,rum,ron russian UTF_8 russian,ru,rus spanish UTF_8 spanish,es,esl,spa swedish UTF_8 swedish,sv,swe turkish UTF_8 turkish,tr,tur # Also include the traditional porter algorithm for english. # The porter algorithm is included in the libstemmer distribution to assist # with backwards compatibility, but for new systems the english algorithm # should be used in preference. porter UTF_8 porter # Some other stemmers in the snowball project are not included in the standard # distribution. To compile a libstemmer with them in, add them to this list, # and regenerate the distribution. (You will need a full source checkout for # this.) They are included in the snowball website as curiosities, but are not # intended for general use, and use of them is is not fully supported. These # algorithms are: # # german2 - This is a slight modification of the german stemmer. #german2 UTF_8 german2 # # kraaij_pohlmann - This is a different dutch stemmer. #kraaij_pohlmann UTF_8 kraaij_pohlmann # # lovins - This is an english stemmer, but fairly outdated, and # only really applicable to a restricted type of input text # (keywords in academic publications). #lovins UTF_8 lovins LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/mkinc.mak000066400000000000000000000050671217574114600256330ustar00rootroot00000000000000# libstemmer/mkinc.mak: List of stemming module source files # # This file is generated by mkmodules.pl from a list of module names. # Do not edit manually. # # Modules included by this file are: danish, dutch, english, finnish, french, # german, hungarian, italian, norwegian, porter, portuguese, romanian, # russian, spanish, swedish, turkish snowball_sources= \ src_c/stem_ISO_8859_1_danish.c \ src_c/stem_UTF_8_danish.c \ src_c/stem_ISO_8859_1_dutch.c \ src_c/stem_UTF_8_dutch.c \ src_c/stem_ISO_8859_1_english.c \ src_c/stem_UTF_8_english.c \ src_c/stem_ISO_8859_1_finnish.c \ src_c/stem_UTF_8_finnish.c \ src_c/stem_ISO_8859_1_french.c \ src_c/stem_UTF_8_french.c \ src_c/stem_ISO_8859_1_german.c \ src_c/stem_UTF_8_german.c \ src_c/stem_ISO_8859_1_hungarian.c \ src_c/stem_UTF_8_hungarian.c \ src_c/stem_ISO_8859_1_italian.c \ src_c/stem_UTF_8_italian.c \ src_c/stem_ISO_8859_1_norwegian.c \ src_c/stem_UTF_8_norwegian.c \ src_c/stem_ISO_8859_1_porter.c \ src_c/stem_UTF_8_porter.c \ src_c/stem_ISO_8859_1_portuguese.c \ src_c/stem_UTF_8_portuguese.c \ src_c/stem_ISO_8859_2_romanian.c \ src_c/stem_UTF_8_romanian.c \ src_c/stem_KOI8_R_russian.c \ src_c/stem_UTF_8_russian.c \ src_c/stem_ISO_8859_1_spanish.c \ src_c/stem_UTF_8_spanish.c \ src_c/stem_ISO_8859_1_swedish.c \ src_c/stem_UTF_8_swedish.c \ src_c/stem_UTF_8_turkish.c \ runtime/api.c \ runtime/utilities.c \ libstemmer/libstemmer.c snowball_headers= \ src_c/stem_ISO_8859_1_danish.h \ src_c/stem_UTF_8_danish.h \ src_c/stem_ISO_8859_1_dutch.h \ src_c/stem_UTF_8_dutch.h \ src_c/stem_ISO_8859_1_english.h \ src_c/stem_UTF_8_english.h \ src_c/stem_ISO_8859_1_finnish.h \ src_c/stem_UTF_8_finnish.h \ src_c/stem_ISO_8859_1_french.h \ src_c/stem_UTF_8_french.h \ src_c/stem_ISO_8859_1_german.h \ src_c/stem_UTF_8_german.h \ src_c/stem_ISO_8859_1_hungarian.h \ src_c/stem_UTF_8_hungarian.h \ src_c/stem_ISO_8859_1_italian.h \ src_c/stem_UTF_8_italian.h \ src_c/stem_ISO_8859_1_norwegian.h \ src_c/stem_UTF_8_norwegian.h \ src_c/stem_ISO_8859_1_porter.h \ src_c/stem_UTF_8_porter.h \ src_c/stem_ISO_8859_1_portuguese.h \ src_c/stem_UTF_8_portuguese.h \ src_c/stem_ISO_8859_2_romanian.h \ src_c/stem_UTF_8_romanian.h \ src_c/stem_KOI8_R_russian.h \ src_c/stem_UTF_8_russian.h \ src_c/stem_ISO_8859_1_spanish.h \ src_c/stem_UTF_8_spanish.h \ src_c/stem_ISO_8859_1_swedish.h \ src_c/stem_UTF_8_swedish.h \ src_c/stem_UTF_8_turkish.h \ include/libstemmer.h \ libstemmer/modules.h \ runtime/api.h \ runtime/header.h LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/mkinc_utf8.mak000066400000000000000000000030221217574114600265660ustar00rootroot00000000000000# libstemmer/mkinc_utf8.mak: List of stemming module source files # # This file is generated by mkmodules.pl from a list of module names. # Do not edit manually. # # Modules included by this file are: danish, dutch, english, finnish, french, # german, hungarian, italian, norwegian, porter, portuguese, romanian, # russian, spanish, swedish, turkish snowball_sources= \ src_c/stem_UTF_8_danish.c \ src_c/stem_UTF_8_dutch.c \ src_c/stem_UTF_8_english.c \ src_c/stem_UTF_8_finnish.c \ src_c/stem_UTF_8_french.c \ src_c/stem_UTF_8_german.c \ src_c/stem_UTF_8_hungarian.c \ src_c/stem_UTF_8_italian.c \ src_c/stem_UTF_8_norwegian.c \ src_c/stem_UTF_8_porter.c \ src_c/stem_UTF_8_portuguese.c \ src_c/stem_UTF_8_romanian.c \ src_c/stem_UTF_8_russian.c \ src_c/stem_UTF_8_spanish.c \ src_c/stem_UTF_8_swedish.c \ src_c/stem_UTF_8_turkish.c \ runtime/api.c \ runtime/utilities.c \ libstemmer/libstemmer_utf8.c snowball_headers= \ src_c/stem_UTF_8_danish.h \ src_c/stem_UTF_8_dutch.h \ src_c/stem_UTF_8_english.h \ src_c/stem_UTF_8_finnish.h \ src_c/stem_UTF_8_french.h \ src_c/stem_UTF_8_german.h \ src_c/stem_UTF_8_hungarian.h \ src_c/stem_UTF_8_italian.h \ src_c/stem_UTF_8_norwegian.h \ src_c/stem_UTF_8_porter.h \ src_c/stem_UTF_8_portuguese.h \ src_c/stem_UTF_8_romanian.h \ src_c/stem_UTF_8_russian.h \ src_c/stem_UTF_8_spanish.h \ src_c/stem_UTF_8_swedish.h \ src_c/stem_UTF_8_turkish.h \ include/libstemmer.h \ libstemmer/modules_utf8.h \ runtime/api.h \ runtime/header.h LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/runtime/000077500000000000000000000000001217574114600255135ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/runtime/api.c000066400000000000000000000025501217574114600264320ustar00rootroot00000000000000 #include /* for calloc, free */ #include "header.h" extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size) { struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); if (z == NULL) return NULL; z->p = create_s(); if (z->p == NULL) goto error; if (S_size) { int i; z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); if (z->S == NULL) goto error; for (i = 0; i < S_size; i++) { z->S[i] = create_s(); if (z->S[i] == NULL) goto error; } } if (I_size) { z->I = (int *) calloc(I_size, sizeof(int)); if (z->I == NULL) goto error; } if (B_size) { z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char)); if (z->B == NULL) goto error; } return z; error: SN_close_env(z, S_size); return NULL; } extern void SN_close_env(struct SN_env * z, int S_size) { if (z == NULL) return; if (S_size) { int i; for (i = 0; i < S_size; i++) { lose_s(z->S[i]); } free(z->S); } free(z->I); free(z->B); if (z->p) lose_s(z->p); free(z); } extern int SN_set_current(struct SN_env * z, int size, const symbol * s) { int err = replace_s(z, 0, z->l, size, s, NULL); z->c = 0; return err; } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/runtime/api.h000066400000000000000000000013731217574114600264410ustar00rootroot00000000000000 typedef unsigned char symbol; /* Or replace 'char' above with 'short' for 16 bit characters. More precisely, replace 'char' with whatever type guarantees the character width you need. Note however that sizeof(symbol) should divide HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise there is an alignment problem. In the unlikely event of a problem here, consult Martin Porter. */ struct SN_env { symbol * p; int c; int l; int lb; int bra; int ket; symbol * * S; int * I; unsigned char * B; }; extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size); extern void SN_close_env(struct SN_env * z, int S_size); extern int SN_set_current(struct SN_env * z, int size, const symbol * s); LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/runtime/header.h000066400000000000000000000046671217574114600271310ustar00rootroot00000000000000 #include #include "api.h" #define MAXINT INT_MAX #define MININT INT_MIN #define HEAD 2*sizeof(int) #define SIZE(p) ((int *)(p))[-1] #define SET_SIZE(p, n) ((int *)(p))[-1] = n #define CAPACITY(p) ((int *)(p))[-2] struct among { int s_size; /* number of chars in string */ const symbol * s; /* search string */ int substring_i;/* index to longest matching substring */ int result; /* result of the lookup */ int (* function)(struct SN_env *); }; extern symbol * create_s(void); extern void lose_s(symbol * p); extern int skip_utf8(const symbol * p, int c, int lb, int l, int n); extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); extern int eq_s(struct SN_env * z, int s_size, const symbol * s); extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s); extern int eq_v(struct SN_env * z, const symbol * p); extern int eq_v_b(struct SN_env * z, const symbol * p); extern int find_among(struct SN_env * z, const struct among * v, int v_size); extern int find_among_b(struct SN_env * z, const struct among * v, int v_size); extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment); extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s); extern int slice_from_v(struct SN_env * z, const symbol * p); extern int slice_del(struct SN_env * z); extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s); extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p); extern symbol * slice_to(struct SN_env * z, symbol * p); extern symbol * assign_to(struct SN_env * z, symbol * p); extern void debug(struct SN_env * z, int number, int line_count); LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/runtime/utilities.c000066400000000000000000000313431217574114600276760ustar00rootroot00000000000000 #include #include #include #include "header.h" #define unless(C) if(!(C)) #define CREATE_SIZE 1 extern symbol * create_s(void) { symbol * p; void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)); if (mem == NULL) return NULL; p = (symbol *) (HEAD + (char *) mem); CAPACITY(p) = CREATE_SIZE; SET_SIZE(p, CREATE_SIZE); return p; } extern void lose_s(symbol * p) { if (p == NULL) return; free((char *) p - HEAD); } /* new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new position, or 0 on failure. -- used to implement hop and next in the utf8 case. */ extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) { int b; if (n >= 0) { for (; n > 0; n--) { if (c >= l) return -1; b = p[c++]; if (b >= 0xC0) { /* 1100 0000 */ while (c < l) { b = p[c]; if (b >= 0xC0 || b < 0x80) break; /* break unless b is 10------ */ c++; } } } } else { for (; n < 0; n++) { if (c <= lb) return -1; b = p[--c]; if (b >= 0x80) { /* 1000 0000 */ while (c > lb) { b = p[c]; if (b >= 0xC0) break; /* 1100 0000 */ c--; } } } } return c; } /* Code for character groupings: utf8 cases */ static int get_utf8(const symbol * p, int c, int l, int * slot) { int b0, b1; if (c >= l) return 0; b0 = p[c++]; if (b0 < 0xC0 || c == l) { /* 1100 0000 */ * slot = b0; return 1; } b1 = p[c++]; if (b0 < 0xE0 || c == l) { /* 1110 0000 */ * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2; } * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3; } static int get_b_utf8(const symbol * p, int c, int lb, int * slot) { int b0, b1; if (c <= lb) return 0; b0 = p[--c]; if (b0 < 0x80 || c == lb) { /* 1000 0000 */ * slot = b0; return 1; } b1 = p[--c]; if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */ * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2; } * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; } extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; int w = get_utf8(z->p, z->c, z->l, & ch); unless (w) return -1; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c += w; } while (repeat); return 0; } extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; int w = get_b_utf8(z->p, z->c, z->lb, & ch); unless (w) return -1; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c -= w; } while (repeat); return 0; } extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; int w = get_utf8(z->p, z->c, z->l, & ch); unless (w) return -1; unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c += w; } while (repeat); return 0; } extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; int w = get_b_utf8(z->p, z->c, z->lb, & ch); unless (w) return -1; unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c -= w; } while (repeat); return 0; } /* Code for character groupings: non-utf8 cases */ extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; if (z->c >= z->l) return -1; ch = z->p[z->c]; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 1; z->c++; } while (repeat); return 0; } extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; if (z->c <= z->lb) return -1; ch = z->p[z->c - 1]; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 1; z->c--; } while (repeat); return 0; } extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; if (z->c >= z->l) return -1; ch = z->p[z->c]; unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 1; z->c++; } while (repeat); return 0; } extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; if (z->c <= z->lb) return -1; ch = z->p[z->c - 1]; unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 1; z->c--; } while (repeat); return 0; } extern int eq_s(struct SN_env * z, int s_size, const symbol * s) { if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0; z->c += s_size; return 1; } extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) { if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0; z->c -= s_size; return 1; } extern int eq_v(struct SN_env * z, const symbol * p) { return eq_s(z, SIZE(p), p); } extern int eq_v_b(struct SN_env * z, const symbol * p) { return eq_s_b(z, SIZE(p), p); } extern int find_among(struct SN_env * z, const struct among * v, int v_size) { int i = 0; int j = v_size; int c = z->c; int l = z->l; symbol * q = z->p + c; const struct among * w; int common_i = 0; int common_j = 0; int first_key_inspected = 0; while(1) { int k = i + ((j - i) >> 1); int diff = 0; int common = common_i < common_j ? common_i : common_j; /* smaller */ w = v + k; { int i2; for (i2 = common; i2 < w->s_size; i2++) { if (c + common == l) { diff = -1; break; } diff = q[common] - w->s[i2]; if (diff != 0) break; common++; } } if (diff < 0) { j = k; common_j = common; } else { i = k; common_i = common; } if (j - i <= 1) { if (i > 0) break; /* v->s has been inspected */ if (j == i) break; /* only one item in v */ /* - but now we need to go round once more to get v->s inspected. This looks messy, but is actually the optimal approach. */ if (first_key_inspected) break; first_key_inspected = 1; } } while(1) { w = v + i; if (common_i >= w->s_size) { z->c = c + w->s_size; if (w->function == 0) return w->result; { int res = w->function(z); z->c = c + w->s_size; if (res) return w->result; } } i = w->substring_i; if (i < 0) return 0; } } /* find_among_b is for backwards processing. Same comments apply */ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) { int i = 0; int j = v_size; int c = z->c; int lb = z->lb; symbol * q = z->p + c - 1; const struct among * w; int common_i = 0; int common_j = 0; int first_key_inspected = 0; while(1) { int k = i + ((j - i) >> 1); int diff = 0; int common = common_i < common_j ? common_i : common_j; w = v + k; { int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) { if (c - common == lb) { diff = -1; break; } diff = q[- common] - w->s[i2]; if (diff != 0) break; common++; } } if (diff < 0) { j = k; common_j = common; } else { i = k; common_i = common; } if (j - i <= 1) { if (i > 0) break; if (j == i) break; if (first_key_inspected) break; first_key_inspected = 1; } } while(1) { w = v + i; if (common_i >= w->s_size) { z->c = c - w->s_size; if (w->function == 0) return w->result; { int res = w->function(z); z->c = c - w->s_size; if (res) return w->result; } } i = w->substring_i; if (i < 0) return 0; } } /* Increase the size of the buffer pointed to by p to at least n symbols. * If insufficient memory, returns NULL and frees the old buffer. */ static symbol * increase_size(symbol * p, int n) { symbol * q; int new_size = n + 20; void * mem = realloc((char *) p - HEAD, HEAD + (new_size + 1) * sizeof(symbol)); if (mem == NULL) { lose_s(p); return NULL; } q = (symbol *) (HEAD + (char *)mem); CAPACITY(q) = new_size; return q; } /* to replace symbols between c_bra and c_ket in z->p by the s_size symbols at s. Returns 0 on success, -1 on error. Also, frees z->p (and sets it to NULL) on error. */ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr) { int adjustment; int len; if (z->p == NULL) { z->p = create_s(); if (z->p == NULL) return -1; } adjustment = s_size - (c_ket - c_bra); len = SIZE(z->p); if (adjustment != 0) { if (adjustment + len > CAPACITY(z->p)) { z->p = increase_size(z->p, adjustment + len); if (z->p == NULL) return -1; } memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol)); SET_SIZE(z->p, adjustment + len); z->l += adjustment; if (z->c >= c_ket) z->c += adjustment; else if (z->c > c_bra) z->c = c_bra; } unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); if (adjptr != NULL) *adjptr = adjustment; return 0; } static int slice_check(struct SN_env * z) { if (z->bra < 0 || z->bra > z->ket || z->ket > z->l || z->p == NULL || z->l > SIZE(z->p)) /* this line could be removed */ { #if 0 fprintf(stderr, "faulty slice operation:\n"); debug(z, -1, 0); #endif return -1; } return 0; } extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) { if (slice_check(z)) return -1; return replace_s(z, z->bra, z->ket, s_size, s, NULL); } extern int slice_from_v(struct SN_env * z, const symbol * p) { return slice_from_s(z, SIZE(p), p); } extern int slice_del(struct SN_env * z) { return slice_from_s(z, 0, 0); } extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) { int adjustment; if (replace_s(z, bra, ket, s_size, s, &adjustment)) return -1; if (bra <= z->bra) z->bra += adjustment; if (bra <= z->ket) z->ket += adjustment; return 0; } extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) { int adjustment; if (replace_s(z, bra, ket, SIZE(p), p, &adjustment)) return -1; if (bra <= z->bra) z->bra += adjustment; if (bra <= z->ket) z->ket += adjustment; return 0; } extern symbol * slice_to(struct SN_env * z, symbol * p) { if (slice_check(z)) { lose_s(p); return NULL; } { int len = z->ket - z->bra; if (CAPACITY(p) < len) { p = increase_size(p, len); if (p == NULL) return NULL; } memmove(p, z->p + z->bra, len * sizeof(symbol)); SET_SIZE(p, len); } return p; } extern symbol * assign_to(struct SN_env * z, symbol * p) { int len = z->l; if (CAPACITY(p) < len) { p = increase_size(p, len); if (p == NULL) return NULL; } memmove(p, z->p, len * sizeof(symbol)); SET_SIZE(p, len); return p; } #if 0 extern void debug(struct SN_env * z, int number, int line_count) { int i; int limit = SIZE(z->p); /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit); for (i = 0; i <= limit; i++) { if (z->lb == i) printf("{"); if (z->bra == i) printf("["); if (z->c == i) printf("|"); if (z->ket == i) printf("]"); if (z->l == i) printf("}"); if (i < limit) { int ch = z->p[i]; if (ch == 0) ch = '#'; printf("%c", ch); } } printf("'\n"); } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/000077500000000000000000000000001217574114600251215ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_danish.c000066400000000000000000000264001217574114600312140ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int danish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_undouble(struct SN_env * z); static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * danish_ISO_8859_1_create_env(void); extern void danish_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 'h', 'e', 'd' }; static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; static const symbol s_0_3[1] = { 'e' }; static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; static const symbol s_0_7[3] = { 'e', 'n', 'e' }; static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; static const symbol s_0_9[3] = { 'e', 'r', 'e' }; static const symbol s_0_10[2] = { 'e', 'n' }; static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; static const symbol s_0_13[2] = { 'e', 'r' }; static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; static const symbol s_0_16[1] = { 's' }; static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; static const symbol s_0_18[2] = { 'e', 's' }; static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; static const symbol s_0_24[3] = { 'e', 'n', 's' }; static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; static const symbol s_0_27[3] = { 'e', 'r', 's' }; static const symbol s_0_28[3] = { 'e', 't', 's' }; static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; static const symbol s_0_30[2] = { 'e', 't' }; static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; static const struct among a_0[32] = { /* 0 */ { 3, s_0_0, -1, 1, 0}, /* 1 */ { 5, s_0_1, 0, 1, 0}, /* 2 */ { 4, s_0_2, -1, 1, 0}, /* 3 */ { 1, s_0_3, -1, 1, 0}, /* 4 */ { 5, s_0_4, 3, 1, 0}, /* 5 */ { 4, s_0_5, 3, 1, 0}, /* 6 */ { 6, s_0_6, 5, 1, 0}, /* 7 */ { 3, s_0_7, 3, 1, 0}, /* 8 */ { 4, s_0_8, 3, 1, 0}, /* 9 */ { 3, s_0_9, 3, 1, 0}, /* 10 */ { 2, s_0_10, -1, 1, 0}, /* 11 */ { 5, s_0_11, 10, 1, 0}, /* 12 */ { 4, s_0_12, 10, 1, 0}, /* 13 */ { 2, s_0_13, -1, 1, 0}, /* 14 */ { 5, s_0_14, 13, 1, 0}, /* 15 */ { 4, s_0_15, 13, 1, 0}, /* 16 */ { 1, s_0_16, -1, 2, 0}, /* 17 */ { 4, s_0_17, 16, 1, 0}, /* 18 */ { 2, s_0_18, 16, 1, 0}, /* 19 */ { 5, s_0_19, 18, 1, 0}, /* 20 */ { 7, s_0_20, 19, 1, 0}, /* 21 */ { 4, s_0_21, 18, 1, 0}, /* 22 */ { 5, s_0_22, 18, 1, 0}, /* 23 */ { 4, s_0_23, 18, 1, 0}, /* 24 */ { 3, s_0_24, 16, 1, 0}, /* 25 */ { 6, s_0_25, 24, 1, 0}, /* 26 */ { 5, s_0_26, 24, 1, 0}, /* 27 */ { 3, s_0_27, 16, 1, 0}, /* 28 */ { 3, s_0_28, 16, 1, 0}, /* 29 */ { 5, s_0_29, 28, 1, 0}, /* 30 */ { 2, s_0_30, -1, 1, 0}, /* 31 */ { 4, s_0_31, 30, 1, 0} }; static const symbol s_1_0[2] = { 'g', 'd' }; static const symbol s_1_1[2] = { 'd', 't' }; static const symbol s_1_2[2] = { 'g', 't' }; static const symbol s_1_3[2] = { 'k', 't' }; static const struct among a_1[4] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0} }; static const symbol s_2_0[2] = { 'i', 'g' }; static const symbol s_2_1[3] = { 'l', 'i', 'g' }; static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; static const symbol s_2_3[3] = { 'e', 'l', 's' }; static const symbol s_2_4[4] = { 'l', 0xF8, 's', 't' }; static const struct among a_2[5] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 4, s_2_2, 1, 1, 0}, /* 3 */ { 3, s_2_3, -1, 1, 0}, /* 4 */ { 4, s_2_4, -1, 2, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; static const symbol s_0[] = { 's', 't' }; static const symbol s_1[] = { 'i', 'g' }; static const symbol s_2[] = { 'l', 0xF8, 's' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 33 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) return 0; z->c = ret; /* hop, line 33 */ } z->I[1] = z->c; /* setmark x, line 33 */ z->c = c_test; } if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ { /* gopast */ /* non v, line 34 */ int ret = in_grouping(z, g_v, 97, 248, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 34 */ /* try, line 35 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 41 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 41 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 41 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 41 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 48 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; { int ret = slice_del(z); /* delete, line 50 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 55 */ { int mlimit; /* setlimit, line 56 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 56 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 56 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ z->bra = z->c; /* ], line 56 */ z->lb = mlimit; } z->c = z->l - m_test; } if (z->c <= z->lb) return 0; z->c--; /* next, line 62 */ z->bra = z->c; /* ], line 62 */ { int ret = slice_del(z); /* delete, line 62 */ if (ret < 0) return ret; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ z->ket = z->c; /* [, line 66 */ if (!(eq_s_b(z, 2, s_0))) goto lab0; z->bra = z->c; /* ], line 66 */ if (!(eq_s_b(z, 2, s_1))) goto lab0; { int ret = slice_del(z); /* delete, line 66 */ if (ret < 0) return ret; } lab0: z->c = z->l - m1; } { int mlimit; /* setlimit, line 67 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 67 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; z->ket = z->c; /* [, line 67 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 67 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 70 */ if (ret < 0) return ret; } { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ if (ret < 0) return ret; } lab1: z->c = z->l - m3; } break; case 2: { int ret = slice_from_s(z, 3, s_2); /* <-, line 72 */ if (ret < 0) return ret; } break; } return 1; } static int r_undouble(struct SN_env * z) { { int mlimit; /* setlimit, line 76 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 76 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 76 */ if (out_grouping_b(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 76 */ z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ if (z->S[0] == 0) return -1; /* -> ch, line 76 */ z->lb = mlimit; } if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } return 1; } extern int danish_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 84 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 84 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 85 */ { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 86 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 88 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ { int ret = r_undouble(z); if (ret == 0) goto lab4; /* call undouble, line 89 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } z->c = z->lb; return 1; } extern struct SN_env * danish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 0); } extern void danish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_danish.h000066400000000000000000000005051217574114600312170ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * danish_ISO_8859_1_create_env(void); extern void danish_ISO_8859_1_close_env(struct SN_env * z); extern int danish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c000066400000000000000000000505011217574114600310540ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int dutch_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_standard_suffix(struct SN_env * z); static int r_undouble(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_en_ending(struct SN_env * z); static int r_e_ending(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * dutch_ISO_8859_1_create_env(void); extern void dutch_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 0xE1 }; static const symbol s_0_2[1] = { 0xE4 }; static const symbol s_0_3[1] = { 0xE9 }; static const symbol s_0_4[1] = { 0xEB }; static const symbol s_0_5[1] = { 0xED }; static const symbol s_0_6[1] = { 0xEF }; static const symbol s_0_7[1] = { 0xF3 }; static const symbol s_0_8[1] = { 0xF6 }; static const symbol s_0_9[1] = { 0xFA }; static const symbol s_0_10[1] = { 0xFC }; static const struct among a_0[11] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 1, 0}, /* 3 */ { 1, s_0_3, 0, 2, 0}, /* 4 */ { 1, s_0_4, 0, 2, 0}, /* 5 */ { 1, s_0_5, 0, 3, 0}, /* 6 */ { 1, s_0_6, 0, 3, 0}, /* 7 */ { 1, s_0_7, 0, 4, 0}, /* 8 */ { 1, s_0_8, 0, 4, 0}, /* 9 */ { 1, s_0_9, 0, 5, 0}, /* 10 */ { 1, s_0_10, 0, 5, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'Y' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_1_1, 0, 2, 0}, /* 2 */ { 1, s_1_2, 0, 1, 0} }; static const symbol s_2_0[2] = { 'd', 'd' }; static const symbol s_2_1[2] = { 'k', 'k' }; static const symbol s_2_2[2] = { 't', 't' }; static const struct among a_2[3] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0} }; static const symbol s_3_0[3] = { 'e', 'n', 'e' }; static const symbol s_3_1[2] = { 's', 'e' }; static const symbol s_3_2[2] = { 'e', 'n' }; static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' }; static const symbol s_3_4[1] = { 's' }; static const struct among a_3[5] = { /* 0 */ { 3, s_3_0, -1, 2, 0}, /* 1 */ { 2, s_3_1, -1, 3, 0}, /* 2 */ { 2, s_3_2, -1, 2, 0}, /* 3 */ { 5, s_3_3, 2, 1, 0}, /* 4 */ { 1, s_3_4, -1, 3, 0} }; static const symbol s_4_0[3] = { 'e', 'n', 'd' }; static const symbol s_4_1[2] = { 'i', 'g' }; static const symbol s_4_2[3] = { 'i', 'n', 'g' }; static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' }; static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' }; static const symbol s_4_5[3] = { 'b', 'a', 'r' }; static const struct among a_4[6] = { /* 0 */ { 3, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 2, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 3, 0}, /* 4 */ { 4, s_4_4, -1, 4, 0}, /* 5 */ { 3, s_4_5, -1, 5, 0} }; static const symbol s_5_0[2] = { 'a', 'a' }; static const symbol s_5_1[2] = { 'e', 'e' }; static const symbol s_5_2[2] = { 'o', 'o' }; static const symbol s_5_3[2] = { 'u', 'u' }; static const struct among a_5[4] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0}, /* 2 */ { 2, s_5_2, -1, -1, 0}, /* 3 */ { 2, s_5_3, -1, -1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'o' }; static const symbol s_4[] = { 'u' }; static const symbol s_5[] = { 'y' }; static const symbol s_6[] = { 'Y' }; static const symbol s_7[] = { 'i' }; static const symbol s_8[] = { 'I' }; static const symbol s_9[] = { 'y' }; static const symbol s_10[] = { 'Y' }; static const symbol s_11[] = { 'y' }; static const symbol s_12[] = { 'i' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'g', 'e', 'm' }; static const symbol s_15[] = { 'h', 'e', 'i', 'd' }; static const symbol s_16[] = { 'h', 'e', 'i', 'd' }; static const symbol s_17[] = { 'c' }; static const symbol s_18[] = { 'e', 'n' }; static const symbol s_19[] = { 'i', 'g' }; static const symbol s_20[] = { 'e' }; static const symbol s_21[] = { 'e' }; static int r_prelude(struct SN_env * z) { int among_var; { int c_test = z->c; /* test, line 42 */ while(1) { /* repeat, line 42 */ int c1 = z->c; z->bra = z->c; /* [, line 43 */ if (z->c >= z->l || z->p[z->c + 0] >> 5 != 7 || !((340306450 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 6; else among_var = find_among(z, a_0, 11); /* substring, line 43 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 43 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */ if (ret < 0) return ret; } break; case 6: if (z->c >= z->l) goto lab0; z->c++; /* next, line 54 */ break; } continue; lab0: z->c = c1; break; } z->c = c_test; } { int c_keep = z->c; /* try, line 57 */ z->bra = z->c; /* [, line 57 */ if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; } z->ket = z->c; /* ], line 57 */ { int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */ if (ret < 0) return ret; } lab1: ; } while(1) { /* repeat, line 58 */ int c2 = z->c; while(1) { /* goto, line 58 */ int c3 = z->c; if (in_grouping(z, g_v, 97, 232, 0)) goto lab3; z->bra = z->c; /* [, line 59 */ { int c4 = z->c; /* or, line 59 */ if (!(eq_s(z, 1, s_7))) goto lab5; z->ket = z->c; /* ], line 59 */ if (in_grouping(z, g_v, 97, 232, 0)) goto lab5; { int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = c4; if (!(eq_s(z, 1, s_9))) goto lab3; z->ket = z->c; /* ], line 60 */ { int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */ if (ret < 0) return ret; } } lab4: z->c = c3; break; lab3: z->c = c3; if (z->c >= z->l) goto lab2; z->c++; /* goto, line 58 */ } continue; lab2: z->c = c2; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { /* gopast */ /* grouping v, line 69 */ int ret = out_grouping(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 69 */ int ret = in_grouping(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 69 */ /* try, line 70 */ if (!(z->I[0] < 3)) goto lab0; z->I[0] = 3; lab0: { /* gopast */ /* grouping v, line 71 */ int ret = out_grouping(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 71 */ int ret = in_grouping(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 71 */ return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 75 */ int c1 = z->c; z->bra = z->c; /* [, line 77 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 77 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 77 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 80 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_undouble(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 91 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */ z->c = z->l - m_test; } z->ket = z->c; /* [, line 91 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 91 */ z->bra = z->c; /* ], line 91 */ { int ret = slice_del(z); /* delete, line 91 */ if (ret < 0) return ret; } return 1; } static int r_e_ending(struct SN_env * z) { z->B[0] = 0; /* unset e_found, line 95 */ z->ket = z->c; /* [, line 96 */ if (!(eq_s_b(z, 1, s_13))) return 0; z->bra = z->c; /* ], line 96 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 96 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 96 */ if (out_grouping_b(z, g_v, 97, 232, 0)) return 0; z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } z->B[0] = 1; /* set e_found, line 97 */ { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 98 */ if (ret < 0) return ret; } return 1; } static int r_en_ending(struct SN_env * z) { { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 102 */ if (ret < 0) return ret; } { int m1 = z->l - z->c; (void)m1; /* and, line 102 */ if (out_grouping_b(z, g_v, 97, 232, 0)) return 0; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ if (!(eq_s_b(z, 3, s_14))) goto lab0; return 0; lab0: z->c = z->l - m2; } } { int ret = slice_del(z); /* delete, line 102 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 103 */ if (ret < 0) return ret; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 107 */ z->ket = z->c; /* [, line 108 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; among_var = find_among_b(z, a_3, 5); /* substring, line 108 */ if (!(among_var)) goto lab0; z->bra = z->c; /* ], line 108 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 110 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */ if (ret < 0) return ret; } break; case 2: { int ret = r_en_ending(z); if (ret == 0) goto lab0; /* call en_ending, line 113 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 116 */ if (ret < 0) return ret; } if (out_grouping_b(z, g_v_j, 97, 232, 0)) goto lab0; { int ret = slice_del(z); /* delete, line 116 */ if (ret < 0) return ret; } break; } lab0: z->c = z->l - m1; } { int m2 = z->l - z->c; (void)m2; /* do, line 120 */ { int ret = r_e_ending(z); if (ret == 0) goto lab1; /* call e_ending, line 120 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 122 */ z->ket = z->c; /* [, line 122 */ if (!(eq_s_b(z, 4, s_16))) goto lab2; z->bra = z->c; /* ], line 122 */ { int ret = r_R2(z); if (ret == 0) goto lab2; /* call R2, line 122 */ if (ret < 0) return ret; } { int m4 = z->l - z->c; (void)m4; /* not, line 122 */ if (!(eq_s_b(z, 1, s_17))) goto lab3; goto lab2; lab3: z->c = z->l - m4; } { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 123 */ if (!(eq_s_b(z, 2, s_18))) goto lab2; z->bra = z->c; /* ], line 123 */ { int ret = r_en_ending(z); if (ret == 0) goto lab2; /* call en_ending, line 123 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m5 = z->l - z->c; (void)m5; /* do, line 126 */ z->ket = z->c; /* [, line 127 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4; among_var = find_among_b(z, a_4, 6); /* substring, line 127 */ if (!(among_var)) goto lab4; z->bra = z->c; /* ], line 127 */ switch(among_var) { case 0: goto lab4; case 1: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 129 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 129 */ if (ret < 0) return ret; } { int m6 = z->l - z->c; (void)m6; /* or, line 130 */ z->ket = z->c; /* [, line 130 */ if (!(eq_s_b(z, 2, s_19))) goto lab6; z->bra = z->c; /* ], line 130 */ { int ret = r_R2(z); if (ret == 0) goto lab6; /* call R2, line 130 */ if (ret < 0) return ret; } { int m7 = z->l - z->c; (void)m7; /* not, line 130 */ if (!(eq_s_b(z, 1, s_20))) goto lab7; goto lab6; lab7: z->c = z->l - m7; } { int ret = slice_del(z); /* delete, line 130 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_undouble(z); if (ret == 0) goto lab4; /* call undouble, line 130 */ if (ret < 0) return ret; } } lab5: break; case 2: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 133 */ if (ret < 0) return ret; } { int m8 = z->l - z->c; (void)m8; /* not, line 133 */ if (!(eq_s_b(z, 1, s_21))) goto lab8; goto lab4; lab8: z->c = z->l - m8; } { int ret = slice_del(z); /* delete, line 133 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 136 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 136 */ if (ret < 0) return ret; } { int ret = r_e_ending(z); if (ret == 0) goto lab4; /* call e_ending, line 136 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 139 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 139 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 142 */ if (ret < 0) return ret; } if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */ { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } break; } lab4: z->c = z->l - m5; } { int m9 = z->l - z->c; (void)m9; /* do, line 146 */ if (out_grouping_b(z, g_v_I, 73, 232, 0)) goto lab9; { int m_test = z->l - z->c; /* test, line 148 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9; if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */ if (out_grouping_b(z, g_v, 97, 232, 0)) goto lab9; z->c = z->l - m_test; } z->ket = z->c; /* [, line 152 */ if (z->c <= z->lb) goto lab9; z->c--; /* next, line 152 */ z->bra = z->c; /* ], line 152 */ { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } lab9: z->c = z->l - m9; } return 1; } extern int dutch_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 159 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 159 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 160 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 160 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 161 */ { int m3 = z->l - z->c; (void)m3; /* do, line 162 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab2; /* call standard_suffix, line 162 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; { int c4 = z->c; /* do, line 163 */ { int ret = r_postlude(z); if (ret == 0) goto lab3; /* call postlude, line 163 */ if (ret < 0) return ret; } lab3: z->c = c4; } return 1; } extern struct SN_env * dutch_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } extern void dutch_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h000066400000000000000000000005021217574114600310550ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * dutch_ISO_8859_1_create_env(void); extern void dutch_ISO_8859_1_close_env(struct SN_env * z); extern int dutch_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_english.c000066400000000000000000001134661217574114600314100ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int english_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_exception2(struct SN_env * z); static int r_exception1(struct SN_env * z); static int r_Step_5(struct SN_env * z); static int r_Step_4(struct SN_env * z); static int r_Step_3(struct SN_env * z); static int r_Step_2(struct SN_env * z); static int r_Step_1c(struct SN_env * z); static int r_Step_1b(struct SN_env * z); static int r_Step_1a(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_shortv(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * english_ISO_8859_1_create_env(void); extern void english_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[5] = { 'a', 'r', 's', 'e', 'n' }; static const symbol s_0_1[6] = { 'c', 'o', 'm', 'm', 'u', 'n' }; static const symbol s_0_2[5] = { 'g', 'e', 'n', 'e', 'r' }; static const struct among a_0[3] = { /* 0 */ { 5, s_0_0, -1, -1, 0}, /* 1 */ { 6, s_0_1, -1, -1, 0}, /* 2 */ { 5, s_0_2, -1, -1, 0} }; static const symbol s_1_0[1] = { '\'' }; static const symbol s_1_1[3] = { '\'', 's', '\'' }; static const symbol s_1_2[2] = { '\'', 's' }; static const struct among a_1[3] = { /* 0 */ { 1, s_1_0, -1, 1, 0}, /* 1 */ { 3, s_1_1, 0, 1, 0}, /* 2 */ { 2, s_1_2, -1, 1, 0} }; static const symbol s_2_0[3] = { 'i', 'e', 'd' }; static const symbol s_2_1[1] = { 's' }; static const symbol s_2_2[3] = { 'i', 'e', 's' }; static const symbol s_2_3[4] = { 's', 's', 'e', 's' }; static const symbol s_2_4[2] = { 's', 's' }; static const symbol s_2_5[2] = { 'u', 's' }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, 2, 0}, /* 1 */ { 1, s_2_1, -1, 3, 0}, /* 2 */ { 3, s_2_2, 1, 2, 0}, /* 3 */ { 4, s_2_3, 1, 1, 0}, /* 4 */ { 2, s_2_4, 1, -1, 0}, /* 5 */ { 2, s_2_5, 1, -1, 0} }; static const symbol s_3_1[2] = { 'b', 'b' }; static const symbol s_3_2[2] = { 'd', 'd' }; static const symbol s_3_3[2] = { 'f', 'f' }; static const symbol s_3_4[2] = { 'g', 'g' }; static const symbol s_3_5[2] = { 'b', 'l' }; static const symbol s_3_6[2] = { 'm', 'm' }; static const symbol s_3_7[2] = { 'n', 'n' }; static const symbol s_3_8[2] = { 'p', 'p' }; static const symbol s_3_9[2] = { 'r', 'r' }; static const symbol s_3_10[2] = { 'a', 't' }; static const symbol s_3_11[2] = { 't', 't' }; static const symbol s_3_12[2] = { 'i', 'z' }; static const struct among a_3[13] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_3_1, 0, 2, 0}, /* 2 */ { 2, s_3_2, 0, 2, 0}, /* 3 */ { 2, s_3_3, 0, 2, 0}, /* 4 */ { 2, s_3_4, 0, 2, 0}, /* 5 */ { 2, s_3_5, 0, 1, 0}, /* 6 */ { 2, s_3_6, 0, 2, 0}, /* 7 */ { 2, s_3_7, 0, 2, 0}, /* 8 */ { 2, s_3_8, 0, 2, 0}, /* 9 */ { 2, s_3_9, 0, 2, 0}, /* 10 */ { 2, s_3_10, 0, 1, 0}, /* 11 */ { 2, s_3_11, 0, 2, 0}, /* 12 */ { 2, s_3_12, 0, 1, 0} }; static const symbol s_4_0[2] = { 'e', 'd' }; static const symbol s_4_1[3] = { 'e', 'e', 'd' }; static const symbol s_4_2[3] = { 'i', 'n', 'g' }; static const symbol s_4_3[4] = { 'e', 'd', 'l', 'y' }; static const symbol s_4_4[5] = { 'e', 'e', 'd', 'l', 'y' }; static const symbol s_4_5[5] = { 'i', 'n', 'g', 'l', 'y' }; static const struct among a_4[6] = { /* 0 */ { 2, s_4_0, -1, 2, 0}, /* 1 */ { 3, s_4_1, 0, 1, 0}, /* 2 */ { 3, s_4_2, -1, 2, 0}, /* 3 */ { 4, s_4_3, -1, 2, 0}, /* 4 */ { 5, s_4_4, 3, 1, 0}, /* 5 */ { 5, s_4_5, -1, 2, 0} }; static const symbol s_5_0[4] = { 'a', 'n', 'c', 'i' }; static const symbol s_5_1[4] = { 'e', 'n', 'c', 'i' }; static const symbol s_5_2[3] = { 'o', 'g', 'i' }; static const symbol s_5_3[2] = { 'l', 'i' }; static const symbol s_5_4[3] = { 'b', 'l', 'i' }; static const symbol s_5_5[4] = { 'a', 'b', 'l', 'i' }; static const symbol s_5_6[4] = { 'a', 'l', 'l', 'i' }; static const symbol s_5_7[5] = { 'f', 'u', 'l', 'l', 'i' }; static const symbol s_5_8[6] = { 'l', 'e', 's', 's', 'l', 'i' }; static const symbol s_5_9[5] = { 'o', 'u', 's', 'l', 'i' }; static const symbol s_5_10[5] = { 'e', 'n', 't', 'l', 'i' }; static const symbol s_5_11[5] = { 'a', 'l', 'i', 't', 'i' }; static const symbol s_5_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; static const symbol s_5_13[5] = { 'i', 'v', 'i', 't', 'i' }; static const symbol s_5_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_5_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_5_16[5] = { 'a', 'l', 'i', 's', 'm' }; static const symbol s_5_17[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_5_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; static const symbol s_5_19[4] = { 'i', 'z', 'e', 'r' }; static const symbol s_5_20[4] = { 'a', 't', 'o', 'r' }; static const symbol s_5_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; static const symbol s_5_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; static const symbol s_5_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; static const struct among a_5[24] = { /* 0 */ { 4, s_5_0, -1, 3, 0}, /* 1 */ { 4, s_5_1, -1, 2, 0}, /* 2 */ { 3, s_5_2, -1, 13, 0}, /* 3 */ { 2, s_5_3, -1, 16, 0}, /* 4 */ { 3, s_5_4, 3, 12, 0}, /* 5 */ { 4, s_5_5, 4, 4, 0}, /* 6 */ { 4, s_5_6, 3, 8, 0}, /* 7 */ { 5, s_5_7, 3, 14, 0}, /* 8 */ { 6, s_5_8, 3, 15, 0}, /* 9 */ { 5, s_5_9, 3, 10, 0}, /* 10 */ { 5, s_5_10, 3, 5, 0}, /* 11 */ { 5, s_5_11, -1, 8, 0}, /* 12 */ { 6, s_5_12, -1, 12, 0}, /* 13 */ { 5, s_5_13, -1, 11, 0}, /* 14 */ { 6, s_5_14, -1, 1, 0}, /* 15 */ { 7, s_5_15, 14, 7, 0}, /* 16 */ { 5, s_5_16, -1, 8, 0}, /* 17 */ { 5, s_5_17, -1, 7, 0}, /* 18 */ { 7, s_5_18, 17, 6, 0}, /* 19 */ { 4, s_5_19, -1, 6, 0}, /* 20 */ { 4, s_5_20, -1, 7, 0}, /* 21 */ { 7, s_5_21, -1, 11, 0}, /* 22 */ { 7, s_5_22, -1, 9, 0}, /* 23 */ { 7, s_5_23, -1, 10, 0} }; static const symbol s_6_0[5] = { 'i', 'c', 'a', 't', 'e' }; static const symbol s_6_1[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_6_2[5] = { 'a', 'l', 'i', 'z', 'e' }; static const symbol s_6_3[5] = { 'i', 'c', 'i', 't', 'i' }; static const symbol s_6_4[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_6_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_6_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_6_7[3] = { 'f', 'u', 'l' }; static const symbol s_6_8[4] = { 'n', 'e', 's', 's' }; static const struct among a_6[9] = { /* 0 */ { 5, s_6_0, -1, 4, 0}, /* 1 */ { 5, s_6_1, -1, 6, 0}, /* 2 */ { 5, s_6_2, -1, 3, 0}, /* 3 */ { 5, s_6_3, -1, 4, 0}, /* 4 */ { 4, s_6_4, -1, 4, 0}, /* 5 */ { 6, s_6_5, -1, 1, 0}, /* 6 */ { 7, s_6_6, 5, 2, 0}, /* 7 */ { 3, s_6_7, -1, 5, 0}, /* 8 */ { 4, s_6_8, -1, 5, 0} }; static const symbol s_7_0[2] = { 'i', 'c' }; static const symbol s_7_1[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_7_2[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_7_3[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_7_4[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_7_5[3] = { 'a', 't', 'e' }; static const symbol s_7_6[3] = { 'i', 'v', 'e' }; static const symbol s_7_7[3] = { 'i', 'z', 'e' }; static const symbol s_7_8[3] = { 'i', 't', 'i' }; static const symbol s_7_9[2] = { 'a', 'l' }; static const symbol s_7_10[3] = { 'i', 's', 'm' }; static const symbol s_7_11[3] = { 'i', 'o', 'n' }; static const symbol s_7_12[2] = { 'e', 'r' }; static const symbol s_7_13[3] = { 'o', 'u', 's' }; static const symbol s_7_14[3] = { 'a', 'n', 't' }; static const symbol s_7_15[3] = { 'e', 'n', 't' }; static const symbol s_7_16[4] = { 'm', 'e', 'n', 't' }; static const symbol s_7_17[5] = { 'e', 'm', 'e', 'n', 't' }; static const struct among a_7[18] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 4, s_7_1, -1, 1, 0}, /* 2 */ { 4, s_7_2, -1, 1, 0}, /* 3 */ { 4, s_7_3, -1, 1, 0}, /* 4 */ { 4, s_7_4, -1, 1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 3, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 3, s_7_8, -1, 1, 0}, /* 9 */ { 2, s_7_9, -1, 1, 0}, /* 10 */ { 3, s_7_10, -1, 1, 0}, /* 11 */ { 3, s_7_11, -1, 2, 0}, /* 12 */ { 2, s_7_12, -1, 1, 0}, /* 13 */ { 3, s_7_13, -1, 1, 0}, /* 14 */ { 3, s_7_14, -1, 1, 0}, /* 15 */ { 3, s_7_15, -1, 1, 0}, /* 16 */ { 4, s_7_16, 15, 1, 0}, /* 17 */ { 5, s_7_17, 16, 1, 0} }; static const symbol s_8_0[1] = { 'e' }; static const symbol s_8_1[1] = { 'l' }; static const struct among a_8[2] = { /* 0 */ { 1, s_8_0, -1, 1, 0}, /* 1 */ { 1, s_8_1, -1, 2, 0} }; static const symbol s_9_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' }; static const symbol s_9_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' }; static const symbol s_9_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' }; static const symbol s_9_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' }; static const symbol s_9_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' }; static const symbol s_9_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' }; static const symbol s_9_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' }; static const symbol s_9_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' }; static const struct among a_9[8] = { /* 0 */ { 7, s_9_0, -1, -1, 0}, /* 1 */ { 7, s_9_1, -1, -1, 0}, /* 2 */ { 6, s_9_2, -1, -1, 0}, /* 3 */ { 7, s_9_3, -1, -1, 0}, /* 4 */ { 6, s_9_4, -1, -1, 0}, /* 5 */ { 7, s_9_5, -1, -1, 0}, /* 6 */ { 7, s_9_6, -1, -1, 0}, /* 7 */ { 6, s_9_7, -1, -1, 0} }; static const symbol s_10_0[5] = { 'a', 'n', 'd', 'e', 's' }; static const symbol s_10_1[5] = { 'a', 't', 'l', 'a', 's' }; static const symbol s_10_2[4] = { 'b', 'i', 'a', 's' }; static const symbol s_10_3[6] = { 'c', 'o', 's', 'm', 'o', 's' }; static const symbol s_10_4[5] = { 'd', 'y', 'i', 'n', 'g' }; static const symbol s_10_5[5] = { 'e', 'a', 'r', 'l', 'y' }; static const symbol s_10_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' }; static const symbol s_10_7[4] = { 'h', 'o', 'w', 'e' }; static const symbol s_10_8[4] = { 'i', 'd', 'l', 'y' }; static const symbol s_10_9[5] = { 'l', 'y', 'i', 'n', 'g' }; static const symbol s_10_10[4] = { 'n', 'e', 'w', 's' }; static const symbol s_10_11[4] = { 'o', 'n', 'l', 'y' }; static const symbol s_10_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' }; static const symbol s_10_13[5] = { 's', 'k', 'i', 'e', 's' }; static const symbol s_10_14[4] = { 's', 'k', 'i', 's' }; static const symbol s_10_15[3] = { 's', 'k', 'y' }; static const symbol s_10_16[5] = { 't', 'y', 'i', 'n', 'g' }; static const symbol s_10_17[4] = { 'u', 'g', 'l', 'y' }; static const struct among a_10[18] = { /* 0 */ { 5, s_10_0, -1, -1, 0}, /* 1 */ { 5, s_10_1, -1, -1, 0}, /* 2 */ { 4, s_10_2, -1, -1, 0}, /* 3 */ { 6, s_10_3, -1, -1, 0}, /* 4 */ { 5, s_10_4, -1, 3, 0}, /* 5 */ { 5, s_10_5, -1, 9, 0}, /* 6 */ { 6, s_10_6, -1, 7, 0}, /* 7 */ { 4, s_10_7, -1, -1, 0}, /* 8 */ { 4, s_10_8, -1, 6, 0}, /* 9 */ { 5, s_10_9, -1, 4, 0}, /* 10 */ { 4, s_10_10, -1, -1, 0}, /* 11 */ { 4, s_10_11, -1, 10, 0}, /* 12 */ { 6, s_10_12, -1, 11, 0}, /* 13 */ { 5, s_10_13, -1, 2, 0}, /* 14 */ { 4, s_10_14, -1, 1, 0}, /* 15 */ { 3, s_10_15, -1, -1, 0}, /* 16 */ { 5, s_10_16, -1, 5, 0}, /* 17 */ { 4, s_10_17, -1, 8, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1 }; static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; static const unsigned char g_valid_LI[] = { 55, 141, 2 }; static const symbol s_0[] = { '\'' }; static const symbol s_1[] = { 'y' }; static const symbol s_2[] = { 'Y' }; static const symbol s_3[] = { 'y' }; static const symbol s_4[] = { 'Y' }; static const symbol s_5[] = { 's', 's' }; static const symbol s_6[] = { 'i' }; static const symbol s_7[] = { 'i', 'e' }; static const symbol s_8[] = { 'e', 'e' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'e' }; static const symbol s_11[] = { 'y' }; static const symbol s_12[] = { 'Y' }; static const symbol s_13[] = { 'i' }; static const symbol s_14[] = { 't', 'i', 'o', 'n' }; static const symbol s_15[] = { 'e', 'n', 'c', 'e' }; static const symbol s_16[] = { 'a', 'n', 'c', 'e' }; static const symbol s_17[] = { 'a', 'b', 'l', 'e' }; static const symbol s_18[] = { 'e', 'n', 't' }; static const symbol s_19[] = { 'i', 'z', 'e' }; static const symbol s_20[] = { 'a', 't', 'e' }; static const symbol s_21[] = { 'a', 'l' }; static const symbol s_22[] = { 'f', 'u', 'l' }; static const symbol s_23[] = { 'o', 'u', 's' }; static const symbol s_24[] = { 'i', 'v', 'e' }; static const symbol s_25[] = { 'b', 'l', 'e' }; static const symbol s_26[] = { 'l' }; static const symbol s_27[] = { 'o', 'g' }; static const symbol s_28[] = { 'f', 'u', 'l' }; static const symbol s_29[] = { 'l', 'e', 's', 's' }; static const symbol s_30[] = { 't', 'i', 'o', 'n' }; static const symbol s_31[] = { 'a', 't', 'e' }; static const symbol s_32[] = { 'a', 'l' }; static const symbol s_33[] = { 'i', 'c' }; static const symbol s_34[] = { 's' }; static const symbol s_35[] = { 't' }; static const symbol s_36[] = { 'l' }; static const symbol s_37[] = { 's', 'k', 'i' }; static const symbol s_38[] = { 's', 'k', 'y' }; static const symbol s_39[] = { 'd', 'i', 'e' }; static const symbol s_40[] = { 'l', 'i', 'e' }; static const symbol s_41[] = { 't', 'i', 'e' }; static const symbol s_42[] = { 'i', 'd', 'l' }; static const symbol s_43[] = { 'g', 'e', 'n', 't', 'l' }; static const symbol s_44[] = { 'u', 'g', 'l', 'i' }; static const symbol s_45[] = { 'e', 'a', 'r', 'l', 'i' }; static const symbol s_46[] = { 'o', 'n', 'l', 'i' }; static const symbol s_47[] = { 's', 'i', 'n', 'g', 'l' }; static const symbol s_48[] = { 'Y' }; static const symbol s_49[] = { 'y' }; static int r_prelude(struct SN_env * z) { z->B[0] = 0; /* unset Y_found, line 26 */ { int c1 = z->c; /* do, line 27 */ z->bra = z->c; /* [, line 27 */ if (!(eq_s(z, 1, s_0))) goto lab0; z->ket = z->c; /* ], line 27 */ { int ret = slice_del(z); /* delete, line 27 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 28 */ z->bra = z->c; /* [, line 28 */ if (!(eq_s(z, 1, s_1))) goto lab1; z->ket = z->c; /* ], line 28 */ { int ret = slice_from_s(z, 1, s_2); /* <-, line 28 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 28 */ lab1: z->c = c2; } { int c3 = z->c; /* do, line 29 */ while(1) { /* repeat, line 29 */ int c4 = z->c; while(1) { /* goto, line 29 */ int c5 = z->c; if (in_grouping(z, g_v, 97, 121, 0)) goto lab4; z->bra = z->c; /* [, line 29 */ if (!(eq_s(z, 1, s_3))) goto lab4; z->ket = z->c; /* ], line 29 */ z->c = c5; break; lab4: z->c = c5; if (z->c >= z->l) goto lab3; z->c++; /* goto, line 29 */ } { int ret = slice_from_s(z, 1, s_4); /* <-, line 29 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 29 */ continue; lab3: z->c = c4; break; } z->c = c3; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c1 = z->c; /* do, line 35 */ { int c2 = z->c; /* or, line 41 */ if (z->c + 4 >= z->l || z->p[z->c + 4] >> 5 != 3 || !((2375680 >> (z->p[z->c + 4] & 0x1f)) & 1)) goto lab2; if (!(find_among(z, a_0, 3))) goto lab2; /* among, line 36 */ goto lab1; lab2: z->c = c2; { /* gopast */ /* grouping v, line 41 */ int ret = out_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 41 */ int ret = in_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } } lab1: z->I[0] = z->c; /* setmark p1, line 42 */ { /* gopast */ /* grouping v, line 43 */ int ret = out_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 43 */ int ret = in_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 43 */ lab0: z->c = c1; } return 1; } static int r_shortv(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 51 */ if (out_grouping_b(z, g_v_WXY, 89, 121, 0)) goto lab1; if (in_grouping_b(z, g_v, 97, 121, 0)) goto lab1; if (out_grouping_b(z, g_v, 97, 121, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; if (in_grouping_b(z, g_v, 97, 121, 0)) return 0; if (z->c > z->lb) return 0; /* atlimit, line 52 */ } lab0: return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_Step_1a(struct SN_env * z) { int among_var; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 59 */ z->ket = z->c; /* [, line 60 */ if (z->c <= z->lb || (z->p[z->c - 1] != 39 && z->p[z->c - 1] != 115)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_1, 3); /* substring, line 60 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 60 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: { int ret = slice_del(z); /* delete, line 62 */ if (ret < 0) return ret; } break; } lab0: ; } z->ket = z->c; /* [, line 65 */ if (z->c <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 115)) return 0; among_var = find_among_b(z, a_2, 6); /* substring, line 65 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 65 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_5); /* <-, line 66 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 68 */ { int ret = z->c - 2; if (z->lb > ret || ret > z->l) goto lab2; z->c = ret; /* hop, line 68 */ } { int ret = slice_from_s(z, 1, s_6); /* <-, line 68 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m1; { int ret = slice_from_s(z, 2, s_7); /* <-, line 68 */ if (ret < 0) return ret; } } lab1: break; case 3: if (z->c <= z->lb) return 0; z->c--; /* next, line 69 */ { /* gopast */ /* grouping v, line 69 */ int ret = out_grouping_b(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } { int ret = slice_del(z); /* delete, line 69 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_1b(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 75 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33554576 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 6); /* substring, line 75 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 75 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 77 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_8); /* <-, line 77 */ if (ret < 0) return ret; } break; case 2: { int m_test = z->l - z->c; /* test, line 80 */ { /* gopast */ /* grouping v, line 80 */ int ret = out_grouping_b(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 80 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 81 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else among_var = find_among_b(z, a_3, 13); /* substring, line 81 */ if (!(among_var)) return 0; z->c = z->l - m_test; } switch(among_var) { case 0: return 0; case 1: { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_9); /* <+, line 83 */ z->c = c_keep; if (ret < 0) return ret; } break; case 2: z->ket = z->c; /* [, line 86 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 86 */ z->bra = z->c; /* ], line 86 */ { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } break; case 3: if (z->c != z->I[0]) return 0; /* atmark, line 87 */ { int m_test = z->l - z->c; /* test, line 87 */ { int ret = r_shortv(z); if (ret == 0) return 0; /* call shortv, line 87 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_10); /* <+, line 87 */ z->c = c_keep; if (ret < 0) return ret; } break; } break; } return 1; } static int r_Step_1c(struct SN_env * z) { z->ket = z->c; /* [, line 94 */ { int m1 = z->l - z->c; (void)m1; /* or, line 94 */ if (!(eq_s_b(z, 1, s_11))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_12))) return 0; } lab0: z->bra = z->c; /* ], line 94 */ if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; { int m2 = z->l - z->c; (void)m2; /* not, line 95 */ if (z->c > z->lb) goto lab2; /* atlimit, line 95 */ return 0; lab2: z->c = z->l - m2; } { int ret = slice_from_s(z, 1, s_13); /* <-, line 96 */ if (ret < 0) return ret; } return 1; } static int r_Step_2(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 100 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 24); /* substring, line 100 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 100 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 100 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_14); /* <-, line 101 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_15); /* <-, line 102 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_16); /* <-, line 103 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 4, s_17); /* <-, line 104 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_18); /* <-, line 105 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 3, s_19); /* <-, line 107 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_20); /* <-, line 109 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 2, s_21); /* <-, line 111 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 3, s_22); /* <-, line 112 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 3, s_23); /* <-, line 114 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 3, s_24); /* <-, line 116 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 3, s_25); /* <-, line 118 */ if (ret < 0) return ret; } break; case 13: if (!(eq_s_b(z, 1, s_26))) return 0; { int ret = slice_from_s(z, 2, s_27); /* <-, line 119 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 3, s_28); /* <-, line 120 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_from_s(z, 4, s_29); /* <-, line 121 */ if (ret < 0) return ret; } break; case 16: if (in_grouping_b(z, g_valid_LI, 99, 116, 0)) return 0; { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_3(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 127 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_6, 9); /* substring, line 127 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 127 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 127 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_30); /* <-, line 128 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_31); /* <-, line 129 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_32); /* <-, line 130 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_33); /* <-, line 132 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; case 6: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 136 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 136 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_4(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 141 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1864232 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_7, 18); /* substring, line 141 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 141 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 141 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 144 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ if (!(eq_s_b(z, 1, s_34))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_35))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_5(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 150 */ if (z->c <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) return 0; among_var = find_among_b(z, a_8, 2); /* substring, line 150 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 150 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 151 */ { int ret = r_R2(z); if (ret == 0) goto lab1; /* call R2, line 151 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 151 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* not, line 151 */ { int ret = r_shortv(z); if (ret == 0) goto lab2; /* call shortv, line 151 */ if (ret < 0) return ret; } return 0; lab2: z->c = z->l - m2; } } lab0: { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 152 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_36))) return 0; { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } break; } return 1; } static int r_exception2(struct SN_env * z) { z->ket = z->c; /* [, line 158 */ if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; if (!(find_among_b(z, a_9, 8))) return 0; /* substring, line 158 */ z->bra = z->c; /* ], line 158 */ if (z->c > z->lb) return 0; /* atlimit, line 158 */ return 1; } static int r_exception1(struct SN_env * z) { int among_var; z->bra = z->c; /* [, line 170 */ if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((42750482 >> (z->p[z->c + 2] & 0x1f)) & 1)) return 0; among_var = find_among(z, a_10, 18); /* substring, line 170 */ if (!(among_var)) return 0; z->ket = z->c; /* ], line 170 */ if (z->c < z->l) return 0; /* atlimit, line 170 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 3, s_37); /* <-, line 174 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_38); /* <-, line 175 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 3, s_39); /* <-, line 176 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 3, s_40); /* <-, line 177 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_41); /* <-, line 178 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 3, s_42); /* <-, line 182 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 5, s_43); /* <-, line 183 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 4, s_44); /* <-, line 184 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 5, s_45); /* <-, line 185 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 4, s_46); /* <-, line 186 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 5, s_47); /* <-, line 187 */ if (ret < 0) return ret; } break; } return 1; } static int r_postlude(struct SN_env * z) { if (!(z->B[0])) return 0; /* Boolean test Y_found, line 203 */ while(1) { /* repeat, line 203 */ int c1 = z->c; while(1) { /* goto, line 203 */ int c2 = z->c; z->bra = z->c; /* [, line 203 */ if (!(eq_s(z, 1, s_48))) goto lab1; z->ket = z->c; /* ], line 203 */ z->c = c2; break; lab1: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* goto, line 203 */ } { int ret = slice_from_s(z, 1, s_49); /* <-, line 203 */ if (ret < 0) return ret; } continue; lab0: z->c = c1; break; } return 1; } extern int english_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* or, line 207 */ { int ret = r_exception1(z); if (ret == 0) goto lab1; /* call exception1, line 207 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = c1; { int c2 = z->c; /* not, line 208 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) goto lab3; z->c = ret; /* hop, line 208 */ } goto lab2; lab3: z->c = c2; } goto lab0; lab2: z->c = c1; { int c3 = z->c; /* do, line 209 */ { int ret = r_prelude(z); if (ret == 0) goto lab4; /* call prelude, line 209 */ if (ret < 0) return ret; } lab4: z->c = c3; } { int c4 = z->c; /* do, line 210 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab5; /* call mark_regions, line 210 */ if (ret < 0) return ret; } lab5: z->c = c4; } z->lb = z->c; z->c = z->l; /* backwards, line 211 */ { int m5 = z->l - z->c; (void)m5; /* do, line 213 */ { int ret = r_Step_1a(z); if (ret == 0) goto lab6; /* call Step_1a, line 213 */ if (ret < 0) return ret; } lab6: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* or, line 215 */ { int ret = r_exception2(z); if (ret == 0) goto lab8; /* call exception2, line 215 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m6; { int m7 = z->l - z->c; (void)m7; /* do, line 217 */ { int ret = r_Step_1b(z); if (ret == 0) goto lab9; /* call Step_1b, line 217 */ if (ret < 0) return ret; } lab9: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 218 */ { int ret = r_Step_1c(z); if (ret == 0) goto lab10; /* call Step_1c, line 218 */ if (ret < 0) return ret; } lab10: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 220 */ { int ret = r_Step_2(z); if (ret == 0) goto lab11; /* call Step_2, line 220 */ if (ret < 0) return ret; } lab11: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 221 */ { int ret = r_Step_3(z); if (ret == 0) goto lab12; /* call Step_3, line 221 */ if (ret < 0) return ret; } lab12: z->c = z->l - m10; } { int m11 = z->l - z->c; (void)m11; /* do, line 222 */ { int ret = r_Step_4(z); if (ret == 0) goto lab13; /* call Step_4, line 222 */ if (ret < 0) return ret; } lab13: z->c = z->l - m11; } { int m12 = z->l - z->c; (void)m12; /* do, line 224 */ { int ret = r_Step_5(z); if (ret == 0) goto lab14; /* call Step_5, line 224 */ if (ret < 0) return ret; } lab14: z->c = z->l - m12; } } lab7: z->c = z->lb; { int c13 = z->c; /* do, line 227 */ { int ret = r_postlude(z); if (ret == 0) goto lab15; /* call postlude, line 227 */ if (ret < 0) return ret; } lab15: z->c = c13; } } lab0: return 1; } extern struct SN_env * english_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } extern void english_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_english.h000066400000000000000000000005101217574114600313760ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * english_ISO_8859_1_create_env(void); extern void english_ISO_8859_1_close_env(struct SN_env * z); extern int english_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c000066400000000000000000000623641217574114600314150ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int finnish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_tidy(struct SN_env * z); static int r_other_endings(struct SN_env * z); static int r_t_plural(struct SN_env * z); static int r_i_plural(struct SN_env * z); static int r_case_ending(struct SN_env * z); static int r_VI(struct SN_env * z); static int r_LONG(struct SN_env * z); static int r_possessive(struct SN_env * z); static int r_particle_etc(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * finnish_ISO_8859_1_create_env(void); extern void finnish_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[2] = { 'p', 'a' }; static const symbol s_0_1[3] = { 's', 't', 'i' }; static const symbol s_0_2[4] = { 'k', 'a', 'a', 'n' }; static const symbol s_0_3[3] = { 'h', 'a', 'n' }; static const symbol s_0_4[3] = { 'k', 'i', 'n' }; static const symbol s_0_5[3] = { 'h', 0xE4, 'n' }; static const symbol s_0_6[4] = { 'k', 0xE4, 0xE4, 'n' }; static const symbol s_0_7[2] = { 'k', 'o' }; static const symbol s_0_8[2] = { 'p', 0xE4 }; static const symbol s_0_9[2] = { 'k', 0xF6 }; static const struct among a_0[10] = { /* 0 */ { 2, s_0_0, -1, 1, 0}, /* 1 */ { 3, s_0_1, -1, 2, 0}, /* 2 */ { 4, s_0_2, -1, 1, 0}, /* 3 */ { 3, s_0_3, -1, 1, 0}, /* 4 */ { 3, s_0_4, -1, 1, 0}, /* 5 */ { 3, s_0_5, -1, 1, 0}, /* 6 */ { 4, s_0_6, -1, 1, 0}, /* 7 */ { 2, s_0_7, -1, 1, 0}, /* 8 */ { 2, s_0_8, -1, 1, 0}, /* 9 */ { 2, s_0_9, -1, 1, 0} }; static const symbol s_1_0[3] = { 'l', 'l', 'a' }; static const symbol s_1_1[2] = { 'n', 'a' }; static const symbol s_1_2[3] = { 's', 's', 'a' }; static const symbol s_1_3[2] = { 't', 'a' }; static const symbol s_1_4[3] = { 'l', 't', 'a' }; static const symbol s_1_5[3] = { 's', 't', 'a' }; static const struct among a_1[6] = { /* 0 */ { 3, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 3, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 3, s_1_4, 3, -1, 0}, /* 5 */ { 3, s_1_5, 3, -1, 0} }; static const symbol s_2_0[3] = { 'l', 'l', 0xE4 }; static const symbol s_2_1[2] = { 'n', 0xE4 }; static const symbol s_2_2[3] = { 's', 's', 0xE4 }; static const symbol s_2_3[2] = { 't', 0xE4 }; static const symbol s_2_4[3] = { 'l', 't', 0xE4 }; static const symbol s_2_5[3] = { 's', 't', 0xE4 }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 3, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, -1, 0}, /* 4 */ { 3, s_2_4, 3, -1, 0}, /* 5 */ { 3, s_2_5, 3, -1, 0} }; static const symbol s_3_0[3] = { 'l', 'l', 'e' }; static const symbol s_3_1[3] = { 'i', 'n', 'e' }; static const struct among a_3[2] = { /* 0 */ { 3, s_3_0, -1, -1, 0}, /* 1 */ { 3, s_3_1, -1, -1, 0} }; static const symbol s_4_0[3] = { 'n', 's', 'a' }; static const symbol s_4_1[3] = { 'm', 'm', 'e' }; static const symbol s_4_2[3] = { 'n', 'n', 'e' }; static const symbol s_4_3[2] = { 'n', 'i' }; static const symbol s_4_4[2] = { 's', 'i' }; static const symbol s_4_5[2] = { 'a', 'n' }; static const symbol s_4_6[2] = { 'e', 'n' }; static const symbol s_4_7[2] = { 0xE4, 'n' }; static const symbol s_4_8[3] = { 'n', 's', 0xE4 }; static const struct among a_4[9] = { /* 0 */ { 3, s_4_0, -1, 3, 0}, /* 1 */ { 3, s_4_1, -1, 3, 0}, /* 2 */ { 3, s_4_2, -1, 3, 0}, /* 3 */ { 2, s_4_3, -1, 2, 0}, /* 4 */ { 2, s_4_4, -1, 1, 0}, /* 5 */ { 2, s_4_5, -1, 4, 0}, /* 6 */ { 2, s_4_6, -1, 6, 0}, /* 7 */ { 2, s_4_7, -1, 5, 0}, /* 8 */ { 3, s_4_8, -1, 3, 0} }; static const symbol s_5_0[2] = { 'a', 'a' }; static const symbol s_5_1[2] = { 'e', 'e' }; static const symbol s_5_2[2] = { 'i', 'i' }; static const symbol s_5_3[2] = { 'o', 'o' }; static const symbol s_5_4[2] = { 'u', 'u' }; static const symbol s_5_5[2] = { 0xE4, 0xE4 }; static const symbol s_5_6[2] = { 0xF6, 0xF6 }; static const struct among a_5[7] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0}, /* 2 */ { 2, s_5_2, -1, -1, 0}, /* 3 */ { 2, s_5_3, -1, -1, 0}, /* 4 */ { 2, s_5_4, -1, -1, 0}, /* 5 */ { 2, s_5_5, -1, -1, 0}, /* 6 */ { 2, s_5_6, -1, -1, 0} }; static const symbol s_6_0[1] = { 'a' }; static const symbol s_6_1[3] = { 'l', 'l', 'a' }; static const symbol s_6_2[2] = { 'n', 'a' }; static const symbol s_6_3[3] = { 's', 's', 'a' }; static const symbol s_6_4[2] = { 't', 'a' }; static const symbol s_6_5[3] = { 'l', 't', 'a' }; static const symbol s_6_6[3] = { 's', 't', 'a' }; static const symbol s_6_7[3] = { 't', 't', 'a' }; static const symbol s_6_8[3] = { 'l', 'l', 'e' }; static const symbol s_6_9[3] = { 'i', 'n', 'e' }; static const symbol s_6_10[3] = { 'k', 's', 'i' }; static const symbol s_6_11[1] = { 'n' }; static const symbol s_6_12[3] = { 'h', 'a', 'n' }; static const symbol s_6_13[3] = { 'd', 'e', 'n' }; static const symbol s_6_14[4] = { 's', 'e', 'e', 'n' }; static const symbol s_6_15[3] = { 'h', 'e', 'n' }; static const symbol s_6_16[4] = { 't', 't', 'e', 'n' }; static const symbol s_6_17[3] = { 'h', 'i', 'n' }; static const symbol s_6_18[4] = { 's', 'i', 'i', 'n' }; static const symbol s_6_19[3] = { 'h', 'o', 'n' }; static const symbol s_6_20[3] = { 'h', 0xE4, 'n' }; static const symbol s_6_21[3] = { 'h', 0xF6, 'n' }; static const symbol s_6_22[1] = { 0xE4 }; static const symbol s_6_23[3] = { 'l', 'l', 0xE4 }; static const symbol s_6_24[2] = { 'n', 0xE4 }; static const symbol s_6_25[3] = { 's', 's', 0xE4 }; static const symbol s_6_26[2] = { 't', 0xE4 }; static const symbol s_6_27[3] = { 'l', 't', 0xE4 }; static const symbol s_6_28[3] = { 's', 't', 0xE4 }; static const symbol s_6_29[3] = { 't', 't', 0xE4 }; static const struct among a_6[30] = { /* 0 */ { 1, s_6_0, -1, 8, 0}, /* 1 */ { 3, s_6_1, 0, -1, 0}, /* 2 */ { 2, s_6_2, 0, -1, 0}, /* 3 */ { 3, s_6_3, 0, -1, 0}, /* 4 */ { 2, s_6_4, 0, -1, 0}, /* 5 */ { 3, s_6_5, 4, -1, 0}, /* 6 */ { 3, s_6_6, 4, -1, 0}, /* 7 */ { 3, s_6_7, 4, 9, 0}, /* 8 */ { 3, s_6_8, -1, -1, 0}, /* 9 */ { 3, s_6_9, -1, -1, 0}, /* 10 */ { 3, s_6_10, -1, -1, 0}, /* 11 */ { 1, s_6_11, -1, 7, 0}, /* 12 */ { 3, s_6_12, 11, 1, 0}, /* 13 */ { 3, s_6_13, 11, -1, r_VI}, /* 14 */ { 4, s_6_14, 11, -1, r_LONG}, /* 15 */ { 3, s_6_15, 11, 2, 0}, /* 16 */ { 4, s_6_16, 11, -1, r_VI}, /* 17 */ { 3, s_6_17, 11, 3, 0}, /* 18 */ { 4, s_6_18, 11, -1, r_VI}, /* 19 */ { 3, s_6_19, 11, 4, 0}, /* 20 */ { 3, s_6_20, 11, 5, 0}, /* 21 */ { 3, s_6_21, 11, 6, 0}, /* 22 */ { 1, s_6_22, -1, 8, 0}, /* 23 */ { 3, s_6_23, 22, -1, 0}, /* 24 */ { 2, s_6_24, 22, -1, 0}, /* 25 */ { 3, s_6_25, 22, -1, 0}, /* 26 */ { 2, s_6_26, 22, -1, 0}, /* 27 */ { 3, s_6_27, 26, -1, 0}, /* 28 */ { 3, s_6_28, 26, -1, 0}, /* 29 */ { 3, s_6_29, 26, 9, 0} }; static const symbol s_7_0[3] = { 'e', 'j', 'a' }; static const symbol s_7_1[3] = { 'm', 'm', 'a' }; static const symbol s_7_2[4] = { 'i', 'm', 'm', 'a' }; static const symbol s_7_3[3] = { 'm', 'p', 'a' }; static const symbol s_7_4[4] = { 'i', 'm', 'p', 'a' }; static const symbol s_7_5[3] = { 'm', 'm', 'i' }; static const symbol s_7_6[4] = { 'i', 'm', 'm', 'i' }; static const symbol s_7_7[3] = { 'm', 'p', 'i' }; static const symbol s_7_8[4] = { 'i', 'm', 'p', 'i' }; static const symbol s_7_9[3] = { 'e', 'j', 0xE4 }; static const symbol s_7_10[3] = { 'm', 'm', 0xE4 }; static const symbol s_7_11[4] = { 'i', 'm', 'm', 0xE4 }; static const symbol s_7_12[3] = { 'm', 'p', 0xE4 }; static const symbol s_7_13[4] = { 'i', 'm', 'p', 0xE4 }; static const struct among a_7[14] = { /* 0 */ { 3, s_7_0, -1, -1, 0}, /* 1 */ { 3, s_7_1, -1, 1, 0}, /* 2 */ { 4, s_7_2, 1, -1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 4, s_7_4, 3, -1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 4, s_7_6, 5, -1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 4, s_7_8, 7, -1, 0}, /* 9 */ { 3, s_7_9, -1, -1, 0}, /* 10 */ { 3, s_7_10, -1, 1, 0}, /* 11 */ { 4, s_7_11, 10, -1, 0}, /* 12 */ { 3, s_7_12, -1, 1, 0}, /* 13 */ { 4, s_7_13, 12, -1, 0} }; static const symbol s_8_0[1] = { 'i' }; static const symbol s_8_1[1] = { 'j' }; static const struct among a_8[2] = { /* 0 */ { 1, s_8_0, -1, -1, 0}, /* 1 */ { 1, s_8_1, -1, -1, 0} }; static const symbol s_9_0[3] = { 'm', 'm', 'a' }; static const symbol s_9_1[4] = { 'i', 'm', 'm', 'a' }; static const struct among a_9[2] = { /* 0 */ { 3, s_9_0, -1, 1, 0}, /* 1 */ { 4, s_9_1, 0, -1, 0} }; static const unsigned char g_AEI[] = { 17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; static const unsigned char g_V1[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const unsigned char g_V2[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const unsigned char g_particle_end[] = { 17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const symbol s_0[] = { 'k' }; static const symbol s_1[] = { 'k', 's', 'e' }; static const symbol s_2[] = { 'k', 's', 'i' }; static const symbol s_3[] = { 'i' }; static const symbol s_4[] = { 'a' }; static const symbol s_5[] = { 'e' }; static const symbol s_6[] = { 'i' }; static const symbol s_7[] = { 'o' }; static const symbol s_8[] = { 0xE4 }; static const symbol s_9[] = { 0xF6 }; static const symbol s_10[] = { 'i', 'e' }; static const symbol s_11[] = { 'e' }; static const symbol s_12[] = { 'p', 'o' }; static const symbol s_13[] = { 't' }; static const symbol s_14[] = { 'p', 'o' }; static const symbol s_15[] = { 'j' }; static const symbol s_16[] = { 'o' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'o' }; static const symbol s_19[] = { 'j' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 46 */ { /* gopast */ /* non V1, line 46 */ int ret = in_grouping(z, g_V1, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 46 */ if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 47 */ { /* gopast */ /* non V1, line 47 */ int ret = in_grouping(z, g_V1, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 47 */ return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_particle_etc(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 55 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 55 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 55 */ among_var = find_among_b(z, a_0, 10); /* substring, line 55 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 55 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (in_grouping_b(z, g_particle_end, 97, 246, 0)) return 0; break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 64 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 66 */ if (ret < 0) return ret; } return 1; } static int r_possessive(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 69 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 69 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 69 */ among_var = find_among_b(z, a_4, 9); /* substring, line 69 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 69 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m2 = z->l - z->c; (void)m2; /* not, line 72 */ if (!(eq_s_b(z, 1, s_0))) goto lab0; return 0; lab0: z->c = z->l - m2; } { int ret = slice_del(z); /* delete, line 72 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 74 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 74 */ if (!(eq_s_b(z, 3, s_1))) return 0; z->bra = z->c; /* ], line 74 */ { int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } break; case 4: if (z->c - 1 <= z->lb || z->p[z->c - 1] != 97) return 0; if (!(find_among_b(z, a_1, 6))) return 0; /* among, line 81 */ { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } break; case 5: if (z->c - 1 <= z->lb || z->p[z->c - 1] != 228) return 0; if (!(find_among_b(z, a_2, 6))) return 0; /* among, line 83 */ { int ret = slice_del(z); /* delete, line 84 */ if (ret < 0) return ret; } break; case 6: if (z->c - 2 <= z->lb || z->p[z->c - 1] != 101) return 0; if (!(find_among_b(z, a_3, 2))) return 0; /* among, line 86 */ { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } break; } return 1; } static int r_LONG(struct SN_env * z) { if (!(find_among_b(z, a_5, 7))) return 0; /* among, line 91 */ return 1; } static int r_VI(struct SN_env * z) { if (!(eq_s_b(z, 1, s_3))) return 0; if (in_grouping_b(z, g_V2, 97, 246, 0)) return 0; return 1; } static int r_case_ending(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 96 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 96 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 96 */ among_var = find_among_b(z, a_6, 30); /* substring, line 96 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 96 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (!(eq_s_b(z, 1, s_4))) return 0; break; case 2: if (!(eq_s_b(z, 1, s_5))) return 0; break; case 3: if (!(eq_s_b(z, 1, s_6))) return 0; break; case 4: if (!(eq_s_b(z, 1, s_7))) return 0; break; case 5: if (!(eq_s_b(z, 1, s_8))) return 0; break; case 6: if (!(eq_s_b(z, 1, s_9))) return 0; break; case 7: { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ { int m2 = z->l - z->c; (void)m2; /* and, line 113 */ { int m3 = z->l - z->c; (void)m3; /* or, line 112 */ { int ret = r_LONG(z); if (ret == 0) goto lab2; /* call LONG, line 111 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m3; if (!(eq_s_b(z, 2, s_10))) { z->c = z->l - m_keep; goto lab0; } } lab1: z->c = z->l - m2; if (z->c <= z->lb) { z->c = z->l - m_keep; goto lab0; } z->c--; /* next, line 113 */ } z->bra = z->c; /* ], line 113 */ lab0: ; } break; case 8: if (in_grouping_b(z, g_V1, 97, 246, 0)) return 0; if (out_grouping_b(z, g_V1, 97, 246, 0)) return 0; break; case 9: if (!(eq_s_b(z, 1, s_11))) return 0; break; } { int ret = slice_del(z); /* delete, line 138 */ if (ret < 0) return ret; } z->B[0] = 1; /* set ending_removed, line 139 */ return 1; } static int r_other_endings(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 142 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[1]) return 0; z->c = z->I[1]; /* tomark, line 142 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 142 */ among_var = find_among_b(z, a_7, 14); /* substring, line 142 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 142 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m2 = z->l - z->c; (void)m2; /* not, line 146 */ if (!(eq_s_b(z, 2, s_12))) goto lab0; return 0; lab0: z->c = z->l - m2; } break; } { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } return 1; } static int r_i_plural(struct SN_env * z) { { int mlimit; /* setlimit, line 154 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 154 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 154 */ if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 106)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_8, 2))) { z->lb = mlimit; return 0; } /* substring, line 154 */ z->bra = z->c; /* ], line 154 */ z->lb = mlimit; } { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } return 1; } static int r_t_plural(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 161 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 161 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 162 */ if (!(eq_s_b(z, 1, s_13))) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 162 */ { int m_test = z->l - z->c; /* test, line 162 */ if (in_grouping_b(z, g_V1, 97, 246, 0)) { z->lb = mlimit; return 0; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } z->lb = mlimit; } { int mlimit; /* setlimit, line 165 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[1]) return 0; z->c = z->I[1]; /* tomark, line 165 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; z->ket = z->c; /* [, line 165 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] != 97) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_9, 2); /* substring, line 165 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 165 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m3 = z->l - z->c; (void)m3; /* not, line 167 */ if (!(eq_s_b(z, 2, s_14))) goto lab0; return 0; lab0: z->c = z->l - m3; } break; } { int ret = slice_del(z); /* delete, line 170 */ if (ret < 0) return ret; } return 1; } static int r_tidy(struct SN_env * z) { { int mlimit; /* setlimit, line 173 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 173 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* do, line 174 */ { int m3 = z->l - z->c; (void)m3; /* and, line 174 */ { int ret = r_LONG(z); if (ret == 0) goto lab0; /* call LONG, line 174 */ if (ret < 0) return ret; } z->c = z->l - m3; z->ket = z->c; /* [, line 174 */ if (z->c <= z->lb) goto lab0; z->c--; /* next, line 174 */ z->bra = z->c; /* ], line 174 */ { int ret = slice_del(z); /* delete, line 174 */ if (ret < 0) return ret; } } lab0: z->c = z->l - m2; } { int m4 = z->l - z->c; (void)m4; /* do, line 175 */ z->ket = z->c; /* [, line 175 */ if (in_grouping_b(z, g_AEI, 97, 228, 0)) goto lab1; z->bra = z->c; /* ], line 175 */ if (out_grouping_b(z, g_V1, 97, 246, 0)) goto lab1; { int ret = slice_del(z); /* delete, line 175 */ if (ret < 0) return ret; } lab1: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 176 */ z->ket = z->c; /* [, line 176 */ if (!(eq_s_b(z, 1, s_15))) goto lab2; z->bra = z->c; /* ], line 176 */ { int m6 = z->l - z->c; (void)m6; /* or, line 176 */ if (!(eq_s_b(z, 1, s_16))) goto lab4; goto lab3; lab4: z->c = z->l - m6; if (!(eq_s_b(z, 1, s_17))) goto lab2; } lab3: { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } lab2: z->c = z->l - m5; } { int m7 = z->l - z->c; (void)m7; /* do, line 177 */ z->ket = z->c; /* [, line 177 */ if (!(eq_s_b(z, 1, s_18))) goto lab5; z->bra = z->c; /* ], line 177 */ if (!(eq_s_b(z, 1, s_19))) goto lab5; { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } lab5: z->c = z->l - m7; } z->lb = mlimit; } if (in_grouping_b(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* non V1, line 179 */ z->ket = z->c; /* [, line 179 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 179 */ z->bra = z->c; /* ], line 179 */ z->S[0] = slice_to(z, z->S[0]); /* -> x, line 179 */ if (z->S[0] == 0) return -1; /* -> x, line 179 */ if (!(eq_v_b(z, z->S[0]))) return 0; /* name x, line 179 */ { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } return 1; } extern int finnish_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 185 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 185 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->B[0] = 0; /* unset ending_removed, line 186 */ z->lb = z->c; z->c = z->l; /* backwards, line 187 */ { int m2 = z->l - z->c; (void)m2; /* do, line 188 */ { int ret = r_particle_etc(z); if (ret == 0) goto lab1; /* call particle_etc, line 188 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 189 */ { int ret = r_possessive(z); if (ret == 0) goto lab2; /* call possessive, line 189 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 190 */ { int ret = r_case_ending(z); if (ret == 0) goto lab3; /* call case_ending, line 190 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 191 */ { int ret = r_other_endings(z); if (ret == 0) goto lab4; /* call other_endings, line 191 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* or, line 192 */ if (!(z->B[0])) goto lab6; /* Boolean test ending_removed, line 192 */ { int m7 = z->l - z->c; (void)m7; /* do, line 192 */ { int ret = r_i_plural(z); if (ret == 0) goto lab7; /* call i_plural, line 192 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } goto lab5; lab6: z->c = z->l - m6; { int m8 = z->l - z->c; (void)m8; /* do, line 192 */ { int ret = r_t_plural(z); if (ret == 0) goto lab8; /* call t_plural, line 192 */ if (ret < 0) return ret; } lab8: z->c = z->l - m8; } } lab5: { int m9 = z->l - z->c; (void)m9; /* do, line 193 */ { int ret = r_tidy(z); if (ret == 0) goto lab9; /* call tidy, line 193 */ if (ret < 0) return ret; } lab9: z->c = z->l - m9; } z->c = z->lb; return 1; } extern struct SN_env * finnish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 1); } extern void finnish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h000066400000000000000000000005101217574114600314030ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * finnish_ISO_8859_1_create_env(void); extern void finnish_ISO_8859_1_close_env(struct SN_env * z); extern int finnish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_french.c000066400000000000000000001345611217574114600312230ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int french_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_un_accent(struct SN_env * z); static int r_un_double(struct SN_env * z); static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_i_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * french_ISO_8859_1_create_env(void); extern void french_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 'c', 'o', 'l' }; static const symbol s_0_1[3] = { 'p', 'a', 'r' }; static const symbol s_0_2[3] = { 't', 'a', 'p' }; static const struct among a_0[3] = { /* 0 */ { 3, s_0_0, -1, -1, 0}, /* 1 */ { 3, s_0_1, -1, -1, 0}, /* 2 */ { 3, s_0_2, -1, -1, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'U' }; static const symbol s_1_3[1] = { 'Y' }; static const struct among a_1[4] = { /* 0 */ { 0, 0, -1, 4, 0}, /* 1 */ { 1, s_1_1, 0, 1, 0}, /* 2 */ { 1, s_1_2, 0, 2, 0}, /* 3 */ { 1, s_1_3, 0, 3, 0} }; static const symbol s_2_0[3] = { 'i', 'q', 'U' }; static const symbol s_2_1[3] = { 'a', 'b', 'l' }; static const symbol s_2_2[3] = { 'I', 0xE8, 'r' }; static const symbol s_2_3[3] = { 'i', 0xE8, 'r' }; static const symbol s_2_4[3] = { 'e', 'u', 's' }; static const symbol s_2_5[2] = { 'i', 'v' }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, 3, 0}, /* 1 */ { 3, s_2_1, -1, 3, 0}, /* 2 */ { 3, s_2_2, -1, 4, 0}, /* 3 */ { 3, s_2_3, -1, 4, 0}, /* 4 */ { 3, s_2_4, -1, 2, 0}, /* 5 */ { 2, s_2_5, -1, 1, 0} }; static const symbol s_3_0[2] = { 'i', 'c' }; static const symbol s_3_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_3_2[2] = { 'i', 'v' }; static const struct among a_3[3] = { /* 0 */ { 2, s_3_0, -1, 2, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 2, s_3_2, -1, 3, 0} }; static const symbol s_4_0[4] = { 'i', 'q', 'U', 'e' }; static const symbol s_4_1[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; static const symbol s_4_2[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_4_3[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_4_4[5] = { 'l', 'o', 'g', 'i', 'e' }; static const symbol s_4_5[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_4_6[4] = { 'i', 's', 'm', 'e' }; static const symbol s_4_7[4] = { 'e', 'u', 's', 'e' }; static const symbol s_4_8[4] = { 'i', 's', 't', 'e' }; static const symbol s_4_9[3] = { 'i', 'v', 'e' }; static const symbol s_4_10[2] = { 'i', 'f' }; static const symbol s_4_11[5] = { 'u', 's', 'i', 'o', 'n' }; static const symbol s_4_12[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_4_13[5] = { 'u', 't', 'i', 'o', 'n' }; static const symbol s_4_14[5] = { 'a', 't', 'e', 'u', 'r' }; static const symbol s_4_15[5] = { 'i', 'q', 'U', 'e', 's' }; static const symbol s_4_16[7] = { 'a', 't', 'r', 'i', 'c', 'e', 's' }; static const symbol s_4_17[5] = { 'a', 'n', 'c', 'e', 's' }; static const symbol s_4_18[5] = { 'e', 'n', 'c', 'e', 's' }; static const symbol s_4_19[6] = { 'l', 'o', 'g', 'i', 'e', 's' }; static const symbol s_4_20[5] = { 'a', 'b', 'l', 'e', 's' }; static const symbol s_4_21[5] = { 'i', 's', 'm', 'e', 's' }; static const symbol s_4_22[5] = { 'e', 'u', 's', 'e', 's' }; static const symbol s_4_23[5] = { 'i', 's', 't', 'e', 's' }; static const symbol s_4_24[4] = { 'i', 'v', 'e', 's' }; static const symbol s_4_25[3] = { 'i', 'f', 's' }; static const symbol s_4_26[6] = { 'u', 's', 'i', 'o', 'n', 's' }; static const symbol s_4_27[6] = { 'a', 't', 'i', 'o', 'n', 's' }; static const symbol s_4_28[6] = { 'u', 't', 'i', 'o', 'n', 's' }; static const symbol s_4_29[6] = { 'a', 't', 'e', 'u', 'r', 's' }; static const symbol s_4_30[5] = { 'm', 'e', 'n', 't', 's' }; static const symbol s_4_31[6] = { 'e', 'm', 'e', 'n', 't', 's' }; static const symbol s_4_32[9] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't', 's' }; static const symbol s_4_33[4] = { 'i', 't', 0xE9, 's' }; static const symbol s_4_34[4] = { 'm', 'e', 'n', 't' }; static const symbol s_4_35[5] = { 'e', 'm', 'e', 'n', 't' }; static const symbol s_4_36[8] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't' }; static const symbol s_4_37[6] = { 'a', 'm', 'm', 'e', 'n', 't' }; static const symbol s_4_38[6] = { 'e', 'm', 'm', 'e', 'n', 't' }; static const symbol s_4_39[3] = { 'a', 'u', 'x' }; static const symbol s_4_40[4] = { 'e', 'a', 'u', 'x' }; static const symbol s_4_41[3] = { 'e', 'u', 'x' }; static const symbol s_4_42[3] = { 'i', 't', 0xE9 }; static const struct among a_4[43] = { /* 0 */ { 4, s_4_0, -1, 1, 0}, /* 1 */ { 6, s_4_1, -1, 2, 0}, /* 2 */ { 4, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 5, 0}, /* 4 */ { 5, s_4_4, -1, 3, 0}, /* 5 */ { 4, s_4_5, -1, 1, 0}, /* 6 */ { 4, s_4_6, -1, 1, 0}, /* 7 */ { 4, s_4_7, -1, 11, 0}, /* 8 */ { 4, s_4_8, -1, 1, 0}, /* 9 */ { 3, s_4_9, -1, 8, 0}, /* 10 */ { 2, s_4_10, -1, 8, 0}, /* 11 */ { 5, s_4_11, -1, 4, 0}, /* 12 */ { 5, s_4_12, -1, 2, 0}, /* 13 */ { 5, s_4_13, -1, 4, 0}, /* 14 */ { 5, s_4_14, -1, 2, 0}, /* 15 */ { 5, s_4_15, -1, 1, 0}, /* 16 */ { 7, s_4_16, -1, 2, 0}, /* 17 */ { 5, s_4_17, -1, 1, 0}, /* 18 */ { 5, s_4_18, -1, 5, 0}, /* 19 */ { 6, s_4_19, -1, 3, 0}, /* 20 */ { 5, s_4_20, -1, 1, 0}, /* 21 */ { 5, s_4_21, -1, 1, 0}, /* 22 */ { 5, s_4_22, -1, 11, 0}, /* 23 */ { 5, s_4_23, -1, 1, 0}, /* 24 */ { 4, s_4_24, -1, 8, 0}, /* 25 */ { 3, s_4_25, -1, 8, 0}, /* 26 */ { 6, s_4_26, -1, 4, 0}, /* 27 */ { 6, s_4_27, -1, 2, 0}, /* 28 */ { 6, s_4_28, -1, 4, 0}, /* 29 */ { 6, s_4_29, -1, 2, 0}, /* 30 */ { 5, s_4_30, -1, 15, 0}, /* 31 */ { 6, s_4_31, 30, 6, 0}, /* 32 */ { 9, s_4_32, 31, 12, 0}, /* 33 */ { 4, s_4_33, -1, 7, 0}, /* 34 */ { 4, s_4_34, -1, 15, 0}, /* 35 */ { 5, s_4_35, 34, 6, 0}, /* 36 */ { 8, s_4_36, 35, 12, 0}, /* 37 */ { 6, s_4_37, 34, 13, 0}, /* 38 */ { 6, s_4_38, 34, 14, 0}, /* 39 */ { 3, s_4_39, -1, 10, 0}, /* 40 */ { 4, s_4_40, 39, 9, 0}, /* 41 */ { 3, s_4_41, -1, 1, 0}, /* 42 */ { 3, s_4_42, -1, 7, 0} }; static const symbol s_5_0[3] = { 'i', 'r', 'a' }; static const symbol s_5_1[2] = { 'i', 'e' }; static const symbol s_5_2[4] = { 'i', 's', 's', 'e' }; static const symbol s_5_3[7] = { 'i', 's', 's', 'a', 'n', 't', 'e' }; static const symbol s_5_4[1] = { 'i' }; static const symbol s_5_5[4] = { 'i', 'r', 'a', 'i' }; static const symbol s_5_6[2] = { 'i', 'r' }; static const symbol s_5_7[4] = { 'i', 'r', 'a', 's' }; static const symbol s_5_8[3] = { 'i', 'e', 's' }; static const symbol s_5_9[4] = { 0xEE, 'm', 'e', 's' }; static const symbol s_5_10[5] = { 'i', 's', 's', 'e', 's' }; static const symbol s_5_11[8] = { 'i', 's', 's', 'a', 'n', 't', 'e', 's' }; static const symbol s_5_12[4] = { 0xEE, 't', 'e', 's' }; static const symbol s_5_13[2] = { 'i', 's' }; static const symbol s_5_14[5] = { 'i', 'r', 'a', 'i', 's' }; static const symbol s_5_15[6] = { 'i', 's', 's', 'a', 'i', 's' }; static const symbol s_5_16[6] = { 'i', 'r', 'i', 'o', 'n', 's' }; static const symbol s_5_17[7] = { 'i', 's', 's', 'i', 'o', 'n', 's' }; static const symbol s_5_18[5] = { 'i', 'r', 'o', 'n', 's' }; static const symbol s_5_19[6] = { 'i', 's', 's', 'o', 'n', 's' }; static const symbol s_5_20[7] = { 'i', 's', 's', 'a', 'n', 't', 's' }; static const symbol s_5_21[2] = { 'i', 't' }; static const symbol s_5_22[5] = { 'i', 'r', 'a', 'i', 't' }; static const symbol s_5_23[6] = { 'i', 's', 's', 'a', 'i', 't' }; static const symbol s_5_24[6] = { 'i', 's', 's', 'a', 'n', 't' }; static const symbol s_5_25[7] = { 'i', 'r', 'a', 'I', 'e', 'n', 't' }; static const symbol s_5_26[8] = { 'i', 's', 's', 'a', 'I', 'e', 'n', 't' }; static const symbol s_5_27[5] = { 'i', 'r', 'e', 'n', 't' }; static const symbol s_5_28[6] = { 'i', 's', 's', 'e', 'n', 't' }; static const symbol s_5_29[5] = { 'i', 'r', 'o', 'n', 't' }; static const symbol s_5_30[2] = { 0xEE, 't' }; static const symbol s_5_31[5] = { 'i', 'r', 'i', 'e', 'z' }; static const symbol s_5_32[6] = { 'i', 's', 's', 'i', 'e', 'z' }; static const symbol s_5_33[4] = { 'i', 'r', 'e', 'z' }; static const symbol s_5_34[5] = { 'i', 's', 's', 'e', 'z' }; static const struct among a_5[35] = { /* 0 */ { 3, s_5_0, -1, 1, 0}, /* 1 */ { 2, s_5_1, -1, 1, 0}, /* 2 */ { 4, s_5_2, -1, 1, 0}, /* 3 */ { 7, s_5_3, -1, 1, 0}, /* 4 */ { 1, s_5_4, -1, 1, 0}, /* 5 */ { 4, s_5_5, 4, 1, 0}, /* 6 */ { 2, s_5_6, -1, 1, 0}, /* 7 */ { 4, s_5_7, -1, 1, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 4, s_5_9, -1, 1, 0}, /* 10 */ { 5, s_5_10, -1, 1, 0}, /* 11 */ { 8, s_5_11, -1, 1, 0}, /* 12 */ { 4, s_5_12, -1, 1, 0}, /* 13 */ { 2, s_5_13, -1, 1, 0}, /* 14 */ { 5, s_5_14, 13, 1, 0}, /* 15 */ { 6, s_5_15, 13, 1, 0}, /* 16 */ { 6, s_5_16, -1, 1, 0}, /* 17 */ { 7, s_5_17, -1, 1, 0}, /* 18 */ { 5, s_5_18, -1, 1, 0}, /* 19 */ { 6, s_5_19, -1, 1, 0}, /* 20 */ { 7, s_5_20, -1, 1, 0}, /* 21 */ { 2, s_5_21, -1, 1, 0}, /* 22 */ { 5, s_5_22, 21, 1, 0}, /* 23 */ { 6, s_5_23, 21, 1, 0}, /* 24 */ { 6, s_5_24, -1, 1, 0}, /* 25 */ { 7, s_5_25, -1, 1, 0}, /* 26 */ { 8, s_5_26, -1, 1, 0}, /* 27 */ { 5, s_5_27, -1, 1, 0}, /* 28 */ { 6, s_5_28, -1, 1, 0}, /* 29 */ { 5, s_5_29, -1, 1, 0}, /* 30 */ { 2, s_5_30, -1, 1, 0}, /* 31 */ { 5, s_5_31, -1, 1, 0}, /* 32 */ { 6, s_5_32, -1, 1, 0}, /* 33 */ { 4, s_5_33, -1, 1, 0}, /* 34 */ { 5, s_5_34, -1, 1, 0} }; static const symbol s_6_0[1] = { 'a' }; static const symbol s_6_1[3] = { 'e', 'r', 'a' }; static const symbol s_6_2[4] = { 'a', 's', 's', 'e' }; static const symbol s_6_3[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_4[2] = { 0xE9, 'e' }; static const symbol s_6_5[2] = { 'a', 'i' }; static const symbol s_6_6[4] = { 'e', 'r', 'a', 'i' }; static const symbol s_6_7[2] = { 'e', 'r' }; static const symbol s_6_8[2] = { 'a', 's' }; static const symbol s_6_9[4] = { 'e', 'r', 'a', 's' }; static const symbol s_6_10[4] = { 0xE2, 'm', 'e', 's' }; static const symbol s_6_11[5] = { 'a', 's', 's', 'e', 's' }; static const symbol s_6_12[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_6_13[4] = { 0xE2, 't', 'e', 's' }; static const symbol s_6_14[3] = { 0xE9, 'e', 's' }; static const symbol s_6_15[3] = { 'a', 'i', 's' }; static const symbol s_6_16[5] = { 'e', 'r', 'a', 'i', 's' }; static const symbol s_6_17[4] = { 'i', 'o', 'n', 's' }; static const symbol s_6_18[6] = { 'e', 'r', 'i', 'o', 'n', 's' }; static const symbol s_6_19[7] = { 'a', 's', 's', 'i', 'o', 'n', 's' }; static const symbol s_6_20[5] = { 'e', 'r', 'o', 'n', 's' }; static const symbol s_6_21[4] = { 'a', 'n', 't', 's' }; static const symbol s_6_22[2] = { 0xE9, 's' }; static const symbol s_6_23[3] = { 'a', 'i', 't' }; static const symbol s_6_24[5] = { 'e', 'r', 'a', 'i', 't' }; static const symbol s_6_25[3] = { 'a', 'n', 't' }; static const symbol s_6_26[5] = { 'a', 'I', 'e', 'n', 't' }; static const symbol s_6_27[7] = { 'e', 'r', 'a', 'I', 'e', 'n', 't' }; static const symbol s_6_28[5] = { 0xE8, 'r', 'e', 'n', 't' }; static const symbol s_6_29[6] = { 'a', 's', 's', 'e', 'n', 't' }; static const symbol s_6_30[5] = { 'e', 'r', 'o', 'n', 't' }; static const symbol s_6_31[2] = { 0xE2, 't' }; static const symbol s_6_32[2] = { 'e', 'z' }; static const symbol s_6_33[3] = { 'i', 'e', 'z' }; static const symbol s_6_34[5] = { 'e', 'r', 'i', 'e', 'z' }; static const symbol s_6_35[6] = { 'a', 's', 's', 'i', 'e', 'z' }; static const symbol s_6_36[4] = { 'e', 'r', 'e', 'z' }; static const symbol s_6_37[1] = { 0xE9 }; static const struct among a_6[38] = { /* 0 */ { 1, s_6_0, -1, 3, 0}, /* 1 */ { 3, s_6_1, 0, 2, 0}, /* 2 */ { 4, s_6_2, -1, 3, 0}, /* 3 */ { 4, s_6_3, -1, 3, 0}, /* 4 */ { 2, s_6_4, -1, 2, 0}, /* 5 */ { 2, s_6_5, -1, 3, 0}, /* 6 */ { 4, s_6_6, 5, 2, 0}, /* 7 */ { 2, s_6_7, -1, 2, 0}, /* 8 */ { 2, s_6_8, -1, 3, 0}, /* 9 */ { 4, s_6_9, 8, 2, 0}, /* 10 */ { 4, s_6_10, -1, 3, 0}, /* 11 */ { 5, s_6_11, -1, 3, 0}, /* 12 */ { 5, s_6_12, -1, 3, 0}, /* 13 */ { 4, s_6_13, -1, 3, 0}, /* 14 */ { 3, s_6_14, -1, 2, 0}, /* 15 */ { 3, s_6_15, -1, 3, 0}, /* 16 */ { 5, s_6_16, 15, 2, 0}, /* 17 */ { 4, s_6_17, -1, 1, 0}, /* 18 */ { 6, s_6_18, 17, 2, 0}, /* 19 */ { 7, s_6_19, 17, 3, 0}, /* 20 */ { 5, s_6_20, -1, 2, 0}, /* 21 */ { 4, s_6_21, -1, 3, 0}, /* 22 */ { 2, s_6_22, -1, 2, 0}, /* 23 */ { 3, s_6_23, -1, 3, 0}, /* 24 */ { 5, s_6_24, 23, 2, 0}, /* 25 */ { 3, s_6_25, -1, 3, 0}, /* 26 */ { 5, s_6_26, -1, 3, 0}, /* 27 */ { 7, s_6_27, 26, 2, 0}, /* 28 */ { 5, s_6_28, -1, 2, 0}, /* 29 */ { 6, s_6_29, -1, 3, 0}, /* 30 */ { 5, s_6_30, -1, 2, 0}, /* 31 */ { 2, s_6_31, -1, 3, 0}, /* 32 */ { 2, s_6_32, -1, 2, 0}, /* 33 */ { 3, s_6_33, 32, 2, 0}, /* 34 */ { 5, s_6_34, 33, 2, 0}, /* 35 */ { 6, s_6_35, 33, 3, 0}, /* 36 */ { 4, s_6_36, 32, 2, 0}, /* 37 */ { 1, s_6_37, -1, 2, 0} }; static const symbol s_7_0[1] = { 'e' }; static const symbol s_7_1[4] = { 'I', 0xE8, 'r', 'e' }; static const symbol s_7_2[4] = { 'i', 0xE8, 'r', 'e' }; static const symbol s_7_3[3] = { 'i', 'o', 'n' }; static const symbol s_7_4[3] = { 'I', 'e', 'r' }; static const symbol s_7_5[3] = { 'i', 'e', 'r' }; static const symbol s_7_6[1] = { 0xEB }; static const struct among a_7[7] = { /* 0 */ { 1, s_7_0, -1, 3, 0}, /* 1 */ { 4, s_7_1, 0, 2, 0}, /* 2 */ { 4, s_7_2, 0, 2, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 3, s_7_4, -1, 2, 0}, /* 5 */ { 3, s_7_5, -1, 2, 0}, /* 6 */ { 1, s_7_6, -1, 4, 0} }; static const symbol s_8_0[3] = { 'e', 'l', 'l' }; static const symbol s_8_1[4] = { 'e', 'i', 'l', 'l' }; static const symbol s_8_2[3] = { 'e', 'n', 'n' }; static const symbol s_8_3[3] = { 'o', 'n', 'n' }; static const symbol s_8_4[3] = { 'e', 't', 't' }; static const struct among a_8[5] = { /* 0 */ { 3, s_8_0, -1, -1, 0}, /* 1 */ { 4, s_8_1, -1, -1, 0}, /* 2 */ { 3, s_8_2, -1, -1, 0}, /* 3 */ { 3, s_8_3, -1, -1, 0}, /* 4 */ { 3, s_8_4, -1, -1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5 }; static const unsigned char g_keep_with_s[] = { 1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const symbol s_0[] = { 'u' }; static const symbol s_1[] = { 'U' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'I' }; static const symbol s_4[] = { 'y' }; static const symbol s_5[] = { 'Y' }; static const symbol s_6[] = { 'y' }; static const symbol s_7[] = { 'Y' }; static const symbol s_8[] = { 'q' }; static const symbol s_9[] = { 'u' }; static const symbol s_10[] = { 'U' }; static const symbol s_11[] = { 'i' }; static const symbol s_12[] = { 'u' }; static const symbol s_13[] = { 'y' }; static const symbol s_14[] = { 'i', 'c' }; static const symbol s_15[] = { 'i', 'q', 'U' }; static const symbol s_16[] = { 'l', 'o', 'g' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'e', 'n', 't' }; static const symbol s_19[] = { 'a', 't' }; static const symbol s_20[] = { 'e', 'u', 'x' }; static const symbol s_21[] = { 'i' }; static const symbol s_22[] = { 'a', 'b', 'l' }; static const symbol s_23[] = { 'i', 'q', 'U' }; static const symbol s_24[] = { 'a', 't' }; static const symbol s_25[] = { 'i', 'c' }; static const symbol s_26[] = { 'i', 'q', 'U' }; static const symbol s_27[] = { 'e', 'a', 'u' }; static const symbol s_28[] = { 'a', 'l' }; static const symbol s_29[] = { 'e', 'u', 'x' }; static const symbol s_30[] = { 'a', 'n', 't' }; static const symbol s_31[] = { 'e', 'n', 't' }; static const symbol s_32[] = { 'e' }; static const symbol s_33[] = { 's' }; static const symbol s_34[] = { 's' }; static const symbol s_35[] = { 't' }; static const symbol s_36[] = { 'i' }; static const symbol s_37[] = { 'g', 'u' }; static const symbol s_38[] = { 0xE9 }; static const symbol s_39[] = { 0xE8 }; static const symbol s_40[] = { 'e' }; static const symbol s_41[] = { 'Y' }; static const symbol s_42[] = { 'i' }; static const symbol s_43[] = { 0xE7 }; static const symbol s_44[] = { 'c' }; static int r_prelude(struct SN_env * z) { while(1) { /* repeat, line 38 */ int c1 = z->c; while(1) { /* goto, line 38 */ int c2 = z->c; { int c3 = z->c; /* or, line 44 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab3; z->bra = z->c; /* [, line 40 */ { int c4 = z->c; /* or, line 40 */ if (!(eq_s(z, 1, s_0))) goto lab5; z->ket = z->c; /* ], line 40 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab5; { int ret = slice_from_s(z, 1, s_1); /* <-, line 40 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = c4; if (!(eq_s(z, 1, s_2))) goto lab6; z->ket = z->c; /* ], line 41 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab6; { int ret = slice_from_s(z, 1, s_3); /* <-, line 41 */ if (ret < 0) return ret; } goto lab4; lab6: z->c = c4; if (!(eq_s(z, 1, s_4))) goto lab3; z->ket = z->c; /* ], line 42 */ { int ret = slice_from_s(z, 1, s_5); /* <-, line 42 */ if (ret < 0) return ret; } } lab4: goto lab2; lab3: z->c = c3; z->bra = z->c; /* [, line 45 */ if (!(eq_s(z, 1, s_6))) goto lab7; z->ket = z->c; /* ], line 45 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab7; { int ret = slice_from_s(z, 1, s_7); /* <-, line 45 */ if (ret < 0) return ret; } goto lab2; lab7: z->c = c3; if (!(eq_s(z, 1, s_8))) goto lab1; z->bra = z->c; /* [, line 47 */ if (!(eq_s(z, 1, s_9))) goto lab1; z->ket = z->c; /* ], line 47 */ { int ret = slice_from_s(z, 1, s_10); /* <-, line 47 */ if (ret < 0) return ret; } } lab2: z->c = c2; break; lab1: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* goto, line 38 */ } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 56 */ { int c2 = z->c; /* or, line 58 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab2; if (in_grouping(z, g_v, 97, 251, 0)) goto lab2; if (z->c >= z->l) goto lab2; z->c++; /* next, line 57 */ goto lab1; lab2: z->c = c2; if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((331776 >> (z->p[z->c + 2] & 0x1f)) & 1)) goto lab3; if (!(find_among(z, a_0, 3))) goto lab3; /* among, line 59 */ goto lab1; lab3: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* next, line 66 */ { /* gopast */ /* grouping v, line 66 */ int ret = out_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab0; z->c += ret; } } lab1: z->I[0] = z->c; /* setmark pV, line 67 */ lab0: z->c = c1; } { int c3 = z->c; /* do, line 69 */ { /* gopast */ /* grouping v, line 70 */ int ret = out_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 70 */ int ret = in_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 70 */ { /* gopast */ /* grouping v, line 71 */ int ret = out_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 71 */ int ret = in_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 71 */ lab4: z->c = c3; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 75 */ int c1 = z->c; z->bra = z->c; /* [, line 77 */ if (z->c >= z->l || z->p[z->c + 0] >> 5 != 2 || !((35652096 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 4; else among_var = find_among(z, a_1, 4); /* substring, line 77 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 77 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_13); /* <-, line 80 */ if (ret < 0) return ret; } break; case 4: if (z->c >= z->l) goto lab0; z->c++; /* next, line 81 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 92 */ among_var = find_among_b(z, a_4, 43); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 96 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 99 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 100 */ z->ket = z->c; /* [, line 100 */ if (!(eq_s_b(z, 2, s_14))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 100 */ { int m1 = z->l - z->c; (void)m1; /* or, line 100 */ { int ret = r_R2(z); if (ret == 0) goto lab2; /* call R2, line 100 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 100 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m1; { int ret = slice_from_s(z, 3, s_15); /* <-, line 100 */ if (ret < 0) return ret; } } lab1: lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 104 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_16); /* <-, line 104 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 107 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_17); /* <-, line 107 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 110 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_18); /* <-, line 110 */ if (ret < 0) return ret; } break; case 6: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 114 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 114 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 115 */ z->ket = z->c; /* [, line 116 */ among_var = find_among_b(z, a_2, 6); /* substring, line 116 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 116 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab3; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 117 */ if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 117 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } break; case 2: { int m2 = z->l - z->c; (void)m2; /* or, line 118 */ { int ret = r_R2(z); if (ret == 0) goto lab5; /* call R2, line 118 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 118 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = z->l - m2; { int ret = r_R1(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R1, line 118 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_20); /* <-, line 118 */ if (ret < 0) return ret; } } lab4: break; case 3: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 120 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 120 */ if (ret < 0) return ret; } break; case 4: { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call RV, line 122 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_21); /* <-, line 122 */ if (ret < 0) return ret; } break; } lab3: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 129 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 129 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 130 */ z->ket = z->c; /* [, line 131 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab6; } among_var = find_among_b(z, a_3, 3); /* substring, line 131 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab6; } z->bra = z->c; /* ], line 131 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab6; } case 1: { int m3 = z->l - z->c; (void)m3; /* or, line 132 */ { int ret = r_R2(z); if (ret == 0) goto lab8; /* call R2, line 132 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 132 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m3; { int ret = slice_from_s(z, 3, s_22); /* <-, line 132 */ if (ret < 0) return ret; } } lab7: break; case 2: { int m4 = z->l - z->c; (void)m4; /* or, line 133 */ { int ret = r_R2(z); if (ret == 0) goto lab10; /* call R2, line 133 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 133 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m4; { int ret = slice_from_s(z, 3, s_23); /* <-, line 133 */ if (ret < 0) return ret; } } lab9: break; case 3: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; } lab6: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 141 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 141 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 142 */ z->ket = z->c; /* [, line 142 */ if (!(eq_s_b(z, 2, s_24))) { z->c = z->l - m_keep; goto lab11; } z->bra = z->c; /* ], line 142 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab11; } /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 142 */ if (!(eq_s_b(z, 2, s_25))) { z->c = z->l - m_keep; goto lab11; } z->bra = z->c; /* ], line 142 */ { int m5 = z->l - z->c; (void)m5; /* or, line 142 */ { int ret = r_R2(z); if (ret == 0) goto lab13; /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } goto lab12; lab13: z->c = z->l - m5; { int ret = slice_from_s(z, 3, s_26); /* <-, line 142 */ if (ret < 0) return ret; } } lab12: lab11: ; } break; case 9: { int ret = slice_from_s(z, 3, s_27); /* <-, line 144 */ if (ret < 0) return ret; } break; case 10: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 145 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_28); /* <-, line 145 */ if (ret < 0) return ret; } break; case 11: { int m6 = z->l - z->c; (void)m6; /* or, line 147 */ { int ret = r_R2(z); if (ret == 0) goto lab15; /* call R2, line 147 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } goto lab14; lab15: z->c = z->l - m6; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 147 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_29); /* <-, line 147 */ if (ret < 0) return ret; } } lab14: break; case 12: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 150 */ if (ret < 0) return ret; } if (out_grouping_b(z, g_v, 97, 251, 0)) return 0; { int ret = slice_del(z); /* delete, line 150 */ if (ret < 0) return ret; } break; case 13: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 155 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_30); /* <-, line 155 */ if (ret < 0) return ret; } return 0; /* fail, line 155 */ break; case 14: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 156 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_31); /* <-, line 156 */ if (ret < 0) return ret; } return 0; /* fail, line 156 */ break; case 15: { int m_test = z->l - z->c; /* test, line 158 */ if (in_grouping_b(z, g_v, 97, 251, 0)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 158 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } return 0; /* fail, line 158 */ break; } return 1; } static int r_i_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 163 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 163 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 164 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68944418 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_5, 35); /* substring, line 164 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 164 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: if (out_grouping_b(z, g_v, 97, 251, 0)) { z->lb = mlimit; return 0; } { int ret = slice_del(z); /* delete, line 170 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 174 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 174 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 175 */ among_var = find_among_b(z, a_6, 38); /* substring, line 175 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 175 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = r_R2(z); if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 177 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 185 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 190 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 191 */ z->ket = z->c; /* [, line 191 */ if (!(eq_s_b(z, 1, s_32))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 191 */ { int ret = slice_del(z); /* delete, line 191 */ if (ret < 0) return ret; } lab0: ; } break; } z->lb = mlimit; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 199 */ z->ket = z->c; /* [, line 199 */ if (!(eq_s_b(z, 1, s_33))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 199 */ { int m_test = z->l - z->c; /* test, line 199 */ if (out_grouping_b(z, g_keep_with_s, 97, 232, 0)) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 199 */ if (ret < 0) return ret; } lab0: ; } { int mlimit; /* setlimit, line 200 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 200 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 201 */ among_var = find_among_b(z, a_7, 7); /* substring, line 201 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 201 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = r_R2(z); if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 202 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* or, line 202 */ if (!(eq_s_b(z, 1, s_34))) goto lab2; goto lab1; lab2: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_35))) { z->lb = mlimit; return 0; } } lab1: { int ret = slice_del(z); /* delete, line 202 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_36); /* <-, line 204 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 205 */ if (ret < 0) return ret; } break; case 4: if (!(eq_s_b(z, 2, s_37))) { z->lb = mlimit; return 0; } { int ret = slice_del(z); /* delete, line 206 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_un_double(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 212 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1069056 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_8, 5))) return 0; /* among, line 212 */ z->c = z->l - m_test; } z->ket = z->c; /* [, line 212 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 212 */ z->bra = z->c; /* ], line 212 */ { int ret = slice_del(z); /* delete, line 212 */ if (ret < 0) return ret; } return 1; } static int r_un_accent(struct SN_env * z) { { int i = 1; while(1) { /* atleast, line 216 */ if (out_grouping_b(z, g_v, 97, 251, 0)) goto lab0; i--; continue; lab0: break; } if (i > 0) return 0; } z->ket = z->c; /* [, line 217 */ { int m1 = z->l - z->c; (void)m1; /* or, line 217 */ if (!(eq_s_b(z, 1, s_38))) goto lab2; goto lab1; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_39))) return 0; } lab1: z->bra = z->c; /* ], line 217 */ { int ret = slice_from_s(z, 1, s_40); /* <-, line 217 */ if (ret < 0) return ret; } return 1; } extern int french_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 223 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 223 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 224 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 224 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 225 */ { int m3 = z->l - z->c; (void)m3; /* do, line 227 */ { int m4 = z->l - z->c; (void)m4; /* or, line 237 */ { int m5 = z->l - z->c; (void)m5; /* and, line 233 */ { int m6 = z->l - z->c; (void)m6; /* or, line 229 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab6; /* call standard_suffix, line 229 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_i_verb_suffix(z); if (ret == 0) goto lab7; /* call i_verb_suffix, line 230 */ if (ret < 0) return ret; } goto lab5; lab7: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ if (ret < 0) return ret; } } lab5: z->c = z->l - m5; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 234 */ z->ket = z->c; /* [, line 234 */ { int m7 = z->l - z->c; (void)m7; /* or, line 234 */ if (!(eq_s_b(z, 1, s_41))) goto lab10; z->bra = z->c; /* ], line 234 */ { int ret = slice_from_s(z, 1, s_42); /* <-, line 234 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m7; if (!(eq_s_b(z, 1, s_43))) { z->c = z->l - m_keep; goto lab8; } z->bra = z->c; /* ], line 235 */ { int ret = slice_from_s(z, 1, s_44); /* <-, line 235 */ if (ret < 0) return ret; } } lab9: lab8: ; } } goto lab3; lab4: z->c = z->l - m4; { int ret = r_residual_suffix(z); if (ret == 0) goto lab2; /* call residual_suffix, line 238 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m8 = z->l - z->c; (void)m8; /* do, line 243 */ { int ret = r_un_double(z); if (ret == 0) goto lab11; /* call un_double, line 243 */ if (ret < 0) return ret; } lab11: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 244 */ { int ret = r_un_accent(z); if (ret == 0) goto lab12; /* call un_accent, line 244 */ if (ret < 0) return ret; } lab12: z->c = z->l - m9; } z->c = z->lb; { int c10 = z->c; /* do, line 246 */ { int ret = r_postlude(z); if (ret == 0) goto lab13; /* call postlude, line 246 */ if (ret < 0) return ret; } lab13: z->c = c10; } return 1; } extern struct SN_env * french_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void french_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_french.h000066400000000000000000000005051217574114600312160ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * french_ISO_8859_1_create_env(void); extern void french_ISO_8859_1_close_env(struct SN_env * z); extern int french_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_german.c000066400000000000000000000424351217574114600312250ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int german_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * german_ISO_8859_1_create_env(void); extern void german_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 'U' }; static const symbol s_0_2[1] = { 'Y' }; static const symbol s_0_3[1] = { 0xE4 }; static const symbol s_0_4[1] = { 0xF6 }; static const symbol s_0_5[1] = { 0xFC }; static const struct among a_0[6] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 1, s_0_1, 0, 2, 0}, /* 2 */ { 1, s_0_2, 0, 1, 0}, /* 3 */ { 1, s_0_3, 0, 3, 0}, /* 4 */ { 1, s_0_4, 0, 4, 0}, /* 5 */ { 1, s_0_5, 0, 5, 0} }; static const symbol s_1_0[1] = { 'e' }; static const symbol s_1_1[2] = { 'e', 'm' }; static const symbol s_1_2[2] = { 'e', 'n' }; static const symbol s_1_3[3] = { 'e', 'r', 'n' }; static const symbol s_1_4[2] = { 'e', 'r' }; static const symbol s_1_5[1] = { 's' }; static const symbol s_1_6[2] = { 'e', 's' }; static const struct among a_1[7] = { /* 0 */ { 1, s_1_0, -1, 2, 0}, /* 1 */ { 2, s_1_1, -1, 1, 0}, /* 2 */ { 2, s_1_2, -1, 2, 0}, /* 3 */ { 3, s_1_3, -1, 1, 0}, /* 4 */ { 2, s_1_4, -1, 1, 0}, /* 5 */ { 1, s_1_5, -1, 3, 0}, /* 6 */ { 2, s_1_6, 5, 2, 0} }; static const symbol s_2_0[2] = { 'e', 'n' }; static const symbol s_2_1[2] = { 'e', 'r' }; static const symbol s_2_2[2] = { 's', 't' }; static const symbol s_2_3[3] = { 'e', 's', 't' }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 2, s_2_1, -1, 1, 0}, /* 2 */ { 2, s_2_2, -1, 2, 0}, /* 3 */ { 3, s_2_3, 2, 1, 0} }; static const symbol s_3_0[2] = { 'i', 'g' }; static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0} }; static const symbol s_4_0[3] = { 'e', 'n', 'd' }; static const symbol s_4_1[2] = { 'i', 'g' }; static const symbol s_4_2[3] = { 'u', 'n', 'g' }; static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; static const symbol s_4_5[2] = { 'i', 'k' }; static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; static const struct among a_4[8] = { /* 0 */ { 3, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 2, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 3, 0}, /* 4 */ { 4, s_4_4, -1, 2, 0}, /* 5 */ { 2, s_4_5, -1, 2, 0}, /* 6 */ { 4, s_4_6, -1, 3, 0}, /* 7 */ { 4, s_4_7, -1, 4, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; static const unsigned char g_s_ending[] = { 117, 30, 5 }; static const unsigned char g_st_ending[] = { 117, 30, 4 }; static const symbol s_0[] = { 0xDF }; static const symbol s_1[] = { 's', 's' }; static const symbol s_2[] = { 'u' }; static const symbol s_3[] = { 'U' }; static const symbol s_4[] = { 'y' }; static const symbol s_5[] = { 'Y' }; static const symbol s_6[] = { 'y' }; static const symbol s_7[] = { 'u' }; static const symbol s_8[] = { 'a' }; static const symbol s_9[] = { 'o' }; static const symbol s_10[] = { 'u' }; static const symbol s_11[] = { 's' }; static const symbol s_12[] = { 'n', 'i', 's' }; static const symbol s_13[] = { 'i', 'g' }; static const symbol s_14[] = { 'e' }; static const symbol s_15[] = { 'e' }; static const symbol s_16[] = { 'e', 'r' }; static const symbol s_17[] = { 'e', 'n' }; static int r_prelude(struct SN_env * z) { { int c_test = z->c; /* test, line 35 */ while(1) { /* repeat, line 35 */ int c1 = z->c; { int c2 = z->c; /* or, line 38 */ z->bra = z->c; /* [, line 37 */ if (!(eq_s(z, 1, s_0))) goto lab2; z->ket = z->c; /* ], line 37 */ { int ret = slice_from_s(z, 2, s_1); /* <-, line 37 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* next, line 38 */ } lab1: continue; lab0: z->c = c1; break; } z->c = c_test; } while(1) { /* repeat, line 41 */ int c3 = z->c; while(1) { /* goto, line 41 */ int c4 = z->c; if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; z->bra = z->c; /* [, line 42 */ { int c5 = z->c; /* or, line 42 */ if (!(eq_s(z, 1, s_2))) goto lab6; z->ket = z->c; /* ], line 42 */ if (in_grouping(z, g_v, 97, 252, 0)) goto lab6; { int ret = slice_from_s(z, 1, s_3); /* <-, line 42 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = c5; if (!(eq_s(z, 1, s_4))) goto lab4; z->ket = z->c; /* ], line 43 */ if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; { int ret = slice_from_s(z, 1, s_5); /* <-, line 43 */ if (ret < 0) return ret; } } lab5: z->c = c4; break; lab4: z->c = c4; if (z->c >= z->l) goto lab3; z->c++; /* goto, line 41 */ } continue; lab3: z->c = c3; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c_test = z->c; /* test, line 52 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) return 0; z->c = ret; /* hop, line 52 */ } z->I[2] = z->c; /* setmark x, line 52 */ z->c = c_test; } { /* gopast */ /* grouping v, line 54 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 54 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 54 */ /* try, line 55 */ if (!(z->I[0] < z->I[2])) goto lab0; z->I[0] = z->I[2]; lab0: { /* gopast */ /* grouping v, line 56 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 56 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 56 */ return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 60 */ int c1 = z->c; z->bra = z->c; /* [, line 62 */ among_var = find_among(z, a_0, 6); /* substring, line 62 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 62 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_6); /* <-, line 63 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_7); /* <-, line 64 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_8); /* <-, line 65 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_9); /* <-, line 66 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_10); /* <-, line 67 */ if (ret < 0) return ret; } break; case 6: if (z->c >= z->l) goto lab0; z->c++; /* next, line 68 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 79 */ z->ket = z->c; /* [, line 80 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; among_var = find_among_b(z, a_1, 7); /* substring, line 80 */ if (!(among_var)) goto lab0; z->bra = z->c; /* ], line 80 */ { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 80 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_del(z); /* delete, line 82 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 85 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 86 */ z->ket = z->c; /* [, line 86 */ if (!(eq_s_b(z, 1, s_11))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 86 */ if (!(eq_s_b(z, 3, s_12))) { z->c = z->l - m_keep; goto lab1; } { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } lab1: ; } break; case 3: if (in_grouping_b(z, g_s_ending, 98, 116, 0)) goto lab0; { int ret = slice_del(z); /* delete, line 89 */ if (ret < 0) return ret; } break; } lab0: z->c = z->l - m1; } { int m2 = z->l - z->c; (void)m2; /* do, line 93 */ z->ket = z->c; /* [, line 94 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; among_var = find_among_b(z, a_2, 4); /* substring, line 94 */ if (!(among_var)) goto lab2; z->bra = z->c; /* ], line 94 */ { int ret = r_R1(z); if (ret == 0) goto lab2; /* call R1, line 94 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab2; case 1: { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b(z, g_st_ending, 98, 116, 0)) goto lab2; { int ret = z->c - 3; if (z->lb > ret || ret > z->l) goto lab2; z->c = ret; /* hop, line 99 */ } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; } lab2: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 103 */ z->ket = z->c; /* [, line 104 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3; among_var = find_among_b(z, a_4, 8); /* substring, line 104 */ if (!(among_var)) goto lab3; z->bra = z->c; /* ], line 104 */ { int ret = r_R2(z); if (ret == 0) goto lab3; /* call R2, line 104 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab3; case 1: { int ret = slice_del(z); /* delete, line 106 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 107 */ z->ket = z->c; /* [, line 107 */ if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab4; } z->bra = z->c; /* ], line 107 */ { int m4 = z->l - z->c; (void)m4; /* not, line 107 */ if (!(eq_s_b(z, 1, s_14))) goto lab5; { z->c = z->l - m_keep; goto lab4; } lab5: z->c = z->l - m4; } { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 107 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 107 */ if (ret < 0) return ret; } lab4: ; } break; case 2: { int m5 = z->l - z->c; (void)m5; /* not, line 110 */ if (!(eq_s_b(z, 1, s_15))) goto lab6; goto lab3; lab6: z->c = z->l - m5; } { int ret = slice_del(z); /* delete, line 110 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ z->ket = z->c; /* [, line 115 */ { int m6 = z->l - z->c; (void)m6; /* or, line 115 */ if (!(eq_s_b(z, 2, s_16))) goto lab9; goto lab8; lab9: z->c = z->l - m6; if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab7; } } lab8: z->bra = z->c; /* ], line 115 */ { int ret = r_R1(z); if (ret == 0) { z->c = z->l - m_keep; goto lab7; } /* call R1, line 115 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 115 */ if (ret < 0) return ret; } lab7: ; } break; case 4: { int ret = slice_del(z); /* delete, line 119 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 120 */ z->ket = z->c; /* [, line 121 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab10; } among_var = find_among_b(z, a_3, 2); /* substring, line 121 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab10; } z->bra = z->c; /* ], line 121 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call R2, line 121 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab10; } case 1: { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } break; } lab10: ; } break; } lab3: z->c = z->l - m3; } return 1; } extern int german_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 134 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 134 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 135 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 135 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 136 */ { int m3 = z->l - z->c; (void)m3; /* do, line 137 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab2; /* call standard_suffix, line 137 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; { int c4 = z->c; /* do, line 138 */ { int ret = r_postlude(z); if (ret == 0) goto lab3; /* call postlude, line 138 */ if (ret < 0) return ret; } lab3: z->c = c4; } return 1; } extern struct SN_env * german_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void german_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_german.h000066400000000000000000000005051217574114600312220ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * german_ISO_8859_1_create_env(void); extern void german_ISO_8859_1_close_env(struct SN_env * z); extern int german_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c000066400000000000000000001176071217574114600317340ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int hungarian_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_double(struct SN_env * z); static int r_undouble(struct SN_env * z); static int r_factive(struct SN_env * z); static int r_instrum(struct SN_env * z); static int r_plur_owner(struct SN_env * z); static int r_sing_owner(struct SN_env * z); static int r_owned(struct SN_env * z); static int r_plural(struct SN_env * z); static int r_case_other(struct SN_env * z); static int r_case_special(struct SN_env * z); static int r_case(struct SN_env * z); static int r_v_ending(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * hungarian_ISO_8859_1_create_env(void); extern void hungarian_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[2] = { 'c', 's' }; static const symbol s_0_1[3] = { 'd', 'z', 's' }; static const symbol s_0_2[2] = { 'g', 'y' }; static const symbol s_0_3[2] = { 'l', 'y' }; static const symbol s_0_4[2] = { 'n', 'y' }; static const symbol s_0_5[2] = { 's', 'z' }; static const symbol s_0_6[2] = { 't', 'y' }; static const symbol s_0_7[2] = { 'z', 's' }; static const struct among a_0[8] = { /* 0 */ { 2, s_0_0, -1, -1, 0}, /* 1 */ { 3, s_0_1, -1, -1, 0}, /* 2 */ { 2, s_0_2, -1, -1, 0}, /* 3 */ { 2, s_0_3, -1, -1, 0}, /* 4 */ { 2, s_0_4, -1, -1, 0}, /* 5 */ { 2, s_0_5, -1, -1, 0}, /* 6 */ { 2, s_0_6, -1, -1, 0}, /* 7 */ { 2, s_0_7, -1, -1, 0} }; static const symbol s_1_0[1] = { 0xE1 }; static const symbol s_1_1[1] = { 0xE9 }; static const struct among a_1[2] = { /* 0 */ { 1, s_1_0, -1, 1, 0}, /* 1 */ { 1, s_1_1, -1, 2, 0} }; static const symbol s_2_0[2] = { 'b', 'b' }; static const symbol s_2_1[2] = { 'c', 'c' }; static const symbol s_2_2[2] = { 'd', 'd' }; static const symbol s_2_3[2] = { 'f', 'f' }; static const symbol s_2_4[2] = { 'g', 'g' }; static const symbol s_2_5[2] = { 'j', 'j' }; static const symbol s_2_6[2] = { 'k', 'k' }; static const symbol s_2_7[2] = { 'l', 'l' }; static const symbol s_2_8[2] = { 'm', 'm' }; static const symbol s_2_9[2] = { 'n', 'n' }; static const symbol s_2_10[2] = { 'p', 'p' }; static const symbol s_2_11[2] = { 'r', 'r' }; static const symbol s_2_12[3] = { 'c', 'c', 's' }; static const symbol s_2_13[2] = { 's', 's' }; static const symbol s_2_14[3] = { 'z', 'z', 's' }; static const symbol s_2_15[2] = { 't', 't' }; static const symbol s_2_16[2] = { 'v', 'v' }; static const symbol s_2_17[3] = { 'g', 'g', 'y' }; static const symbol s_2_18[3] = { 'l', 'l', 'y' }; static const symbol s_2_19[3] = { 'n', 'n', 'y' }; static const symbol s_2_20[3] = { 't', 't', 'y' }; static const symbol s_2_21[3] = { 's', 's', 'z' }; static const symbol s_2_22[2] = { 'z', 'z' }; static const struct among a_2[23] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, -1, 0}, /* 4 */ { 2, s_2_4, -1, -1, 0}, /* 5 */ { 2, s_2_5, -1, -1, 0}, /* 6 */ { 2, s_2_6, -1, -1, 0}, /* 7 */ { 2, s_2_7, -1, -1, 0}, /* 8 */ { 2, s_2_8, -1, -1, 0}, /* 9 */ { 2, s_2_9, -1, -1, 0}, /* 10 */ { 2, s_2_10, -1, -1, 0}, /* 11 */ { 2, s_2_11, -1, -1, 0}, /* 12 */ { 3, s_2_12, -1, -1, 0}, /* 13 */ { 2, s_2_13, -1, -1, 0}, /* 14 */ { 3, s_2_14, -1, -1, 0}, /* 15 */ { 2, s_2_15, -1, -1, 0}, /* 16 */ { 2, s_2_16, -1, -1, 0}, /* 17 */ { 3, s_2_17, -1, -1, 0}, /* 18 */ { 3, s_2_18, -1, -1, 0}, /* 19 */ { 3, s_2_19, -1, -1, 0}, /* 20 */ { 3, s_2_20, -1, -1, 0}, /* 21 */ { 3, s_2_21, -1, -1, 0}, /* 22 */ { 2, s_2_22, -1, -1, 0} }; static const symbol s_3_0[2] = { 'a', 'l' }; static const symbol s_3_1[2] = { 'e', 'l' }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 2, s_3_1, -1, 2, 0} }; static const symbol s_4_0[2] = { 'b', 'a' }; static const symbol s_4_1[2] = { 'r', 'a' }; static const symbol s_4_2[2] = { 'b', 'e' }; static const symbol s_4_3[2] = { 'r', 'e' }; static const symbol s_4_4[2] = { 'i', 'g' }; static const symbol s_4_5[3] = { 'n', 'a', 'k' }; static const symbol s_4_6[3] = { 'n', 'e', 'k' }; static const symbol s_4_7[3] = { 'v', 'a', 'l' }; static const symbol s_4_8[3] = { 'v', 'e', 'l' }; static const symbol s_4_9[2] = { 'u', 'l' }; static const symbol s_4_10[3] = { 'n', 0xE1, 'l' }; static const symbol s_4_11[3] = { 'n', 0xE9, 'l' }; static const symbol s_4_12[3] = { 'b', 0xF3, 'l' }; static const symbol s_4_13[3] = { 'r', 0xF3, 'l' }; static const symbol s_4_14[3] = { 't', 0xF3, 'l' }; static const symbol s_4_15[3] = { 'b', 0xF5, 'l' }; static const symbol s_4_16[3] = { 'r', 0xF5, 'l' }; static const symbol s_4_17[3] = { 't', 0xF5, 'l' }; static const symbol s_4_18[2] = { 0xFC, 'l' }; static const symbol s_4_19[1] = { 'n' }; static const symbol s_4_20[2] = { 'a', 'n' }; static const symbol s_4_21[3] = { 'b', 'a', 'n' }; static const symbol s_4_22[2] = { 'e', 'n' }; static const symbol s_4_23[3] = { 'b', 'e', 'n' }; static const symbol s_4_24[6] = { 'k', 0xE9, 'p', 'p', 'e', 'n' }; static const symbol s_4_25[2] = { 'o', 'n' }; static const symbol s_4_26[2] = { 0xF6, 'n' }; static const symbol s_4_27[4] = { 'k', 0xE9, 'p', 'p' }; static const symbol s_4_28[3] = { 'k', 'o', 'r' }; static const symbol s_4_29[1] = { 't' }; static const symbol s_4_30[2] = { 'a', 't' }; static const symbol s_4_31[2] = { 'e', 't' }; static const symbol s_4_32[4] = { 'k', 0xE9, 'n', 't' }; static const symbol s_4_33[6] = { 'a', 'n', 'k', 0xE9, 'n', 't' }; static const symbol s_4_34[6] = { 'e', 'n', 'k', 0xE9, 'n', 't' }; static const symbol s_4_35[6] = { 'o', 'n', 'k', 0xE9, 'n', 't' }; static const symbol s_4_36[2] = { 'o', 't' }; static const symbol s_4_37[3] = { 0xE9, 'r', 't' }; static const symbol s_4_38[2] = { 0xF6, 't' }; static const symbol s_4_39[3] = { 'h', 'e', 'z' }; static const symbol s_4_40[3] = { 'h', 'o', 'z' }; static const symbol s_4_41[3] = { 'h', 0xF6, 'z' }; static const symbol s_4_42[2] = { 'v', 0xE1 }; static const symbol s_4_43[2] = { 'v', 0xE9 }; static const struct among a_4[44] = { /* 0 */ { 2, s_4_0, -1, -1, 0}, /* 1 */ { 2, s_4_1, -1, -1, 0}, /* 2 */ { 2, s_4_2, -1, -1, 0}, /* 3 */ { 2, s_4_3, -1, -1, 0}, /* 4 */ { 2, s_4_4, -1, -1, 0}, /* 5 */ { 3, s_4_5, -1, -1, 0}, /* 6 */ { 3, s_4_6, -1, -1, 0}, /* 7 */ { 3, s_4_7, -1, -1, 0}, /* 8 */ { 3, s_4_8, -1, -1, 0}, /* 9 */ { 2, s_4_9, -1, -1, 0}, /* 10 */ { 3, s_4_10, -1, -1, 0}, /* 11 */ { 3, s_4_11, -1, -1, 0}, /* 12 */ { 3, s_4_12, -1, -1, 0}, /* 13 */ { 3, s_4_13, -1, -1, 0}, /* 14 */ { 3, s_4_14, -1, -1, 0}, /* 15 */ { 3, s_4_15, -1, -1, 0}, /* 16 */ { 3, s_4_16, -1, -1, 0}, /* 17 */ { 3, s_4_17, -1, -1, 0}, /* 18 */ { 2, s_4_18, -1, -1, 0}, /* 19 */ { 1, s_4_19, -1, -1, 0}, /* 20 */ { 2, s_4_20, 19, -1, 0}, /* 21 */ { 3, s_4_21, 20, -1, 0}, /* 22 */ { 2, s_4_22, 19, -1, 0}, /* 23 */ { 3, s_4_23, 22, -1, 0}, /* 24 */ { 6, s_4_24, 22, -1, 0}, /* 25 */ { 2, s_4_25, 19, -1, 0}, /* 26 */ { 2, s_4_26, 19, -1, 0}, /* 27 */ { 4, s_4_27, -1, -1, 0}, /* 28 */ { 3, s_4_28, -1, -1, 0}, /* 29 */ { 1, s_4_29, -1, -1, 0}, /* 30 */ { 2, s_4_30, 29, -1, 0}, /* 31 */ { 2, s_4_31, 29, -1, 0}, /* 32 */ { 4, s_4_32, 29, -1, 0}, /* 33 */ { 6, s_4_33, 32, -1, 0}, /* 34 */ { 6, s_4_34, 32, -1, 0}, /* 35 */ { 6, s_4_35, 32, -1, 0}, /* 36 */ { 2, s_4_36, 29, -1, 0}, /* 37 */ { 3, s_4_37, 29, -1, 0}, /* 38 */ { 2, s_4_38, 29, -1, 0}, /* 39 */ { 3, s_4_39, -1, -1, 0}, /* 40 */ { 3, s_4_40, -1, -1, 0}, /* 41 */ { 3, s_4_41, -1, -1, 0}, /* 42 */ { 2, s_4_42, -1, -1, 0}, /* 43 */ { 2, s_4_43, -1, -1, 0} }; static const symbol s_5_0[2] = { 0xE1, 'n' }; static const symbol s_5_1[2] = { 0xE9, 'n' }; static const symbol s_5_2[6] = { 0xE1, 'n', 'k', 0xE9, 'n', 't' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 2, 0}, /* 1 */ { 2, s_5_1, -1, 1, 0}, /* 2 */ { 6, s_5_2, -1, 3, 0} }; static const symbol s_6_0[4] = { 's', 't', 'u', 'l' }; static const symbol s_6_1[5] = { 'a', 's', 't', 'u', 'l' }; static const symbol s_6_2[5] = { 0xE1, 's', 't', 'u', 'l' }; static const symbol s_6_3[4] = { 's', 't', 0xFC, 'l' }; static const symbol s_6_4[5] = { 'e', 's', 't', 0xFC, 'l' }; static const symbol s_6_5[5] = { 0xE9, 's', 't', 0xFC, 'l' }; static const struct among a_6[6] = { /* 0 */ { 4, s_6_0, -1, 2, 0}, /* 1 */ { 5, s_6_1, 0, 1, 0}, /* 2 */ { 5, s_6_2, 0, 3, 0}, /* 3 */ { 4, s_6_3, -1, 2, 0}, /* 4 */ { 5, s_6_4, 3, 1, 0}, /* 5 */ { 5, s_6_5, 3, 4, 0} }; static const symbol s_7_0[1] = { 0xE1 }; static const symbol s_7_1[1] = { 0xE9 }; static const struct among a_7[2] = { /* 0 */ { 1, s_7_0, -1, 1, 0}, /* 1 */ { 1, s_7_1, -1, 2, 0} }; static const symbol s_8_0[1] = { 'k' }; static const symbol s_8_1[2] = { 'a', 'k' }; static const symbol s_8_2[2] = { 'e', 'k' }; static const symbol s_8_3[2] = { 'o', 'k' }; static const symbol s_8_4[2] = { 0xE1, 'k' }; static const symbol s_8_5[2] = { 0xE9, 'k' }; static const symbol s_8_6[2] = { 0xF6, 'k' }; static const struct among a_8[7] = { /* 0 */ { 1, s_8_0, -1, 7, 0}, /* 1 */ { 2, s_8_1, 0, 4, 0}, /* 2 */ { 2, s_8_2, 0, 6, 0}, /* 3 */ { 2, s_8_3, 0, 5, 0}, /* 4 */ { 2, s_8_4, 0, 1, 0}, /* 5 */ { 2, s_8_5, 0, 2, 0}, /* 6 */ { 2, s_8_6, 0, 3, 0} }; static const symbol s_9_0[2] = { 0xE9, 'i' }; static const symbol s_9_1[3] = { 0xE1, 0xE9, 'i' }; static const symbol s_9_2[3] = { 0xE9, 0xE9, 'i' }; static const symbol s_9_3[1] = { 0xE9 }; static const symbol s_9_4[2] = { 'k', 0xE9 }; static const symbol s_9_5[3] = { 'a', 'k', 0xE9 }; static const symbol s_9_6[3] = { 'e', 'k', 0xE9 }; static const symbol s_9_7[3] = { 'o', 'k', 0xE9 }; static const symbol s_9_8[3] = { 0xE1, 'k', 0xE9 }; static const symbol s_9_9[3] = { 0xE9, 'k', 0xE9 }; static const symbol s_9_10[3] = { 0xF6, 'k', 0xE9 }; static const symbol s_9_11[2] = { 0xE9, 0xE9 }; static const struct among a_9[12] = { /* 0 */ { 2, s_9_0, -1, 7, 0}, /* 1 */ { 3, s_9_1, 0, 6, 0}, /* 2 */ { 3, s_9_2, 0, 5, 0}, /* 3 */ { 1, s_9_3, -1, 9, 0}, /* 4 */ { 2, s_9_4, 3, 4, 0}, /* 5 */ { 3, s_9_5, 4, 1, 0}, /* 6 */ { 3, s_9_6, 4, 1, 0}, /* 7 */ { 3, s_9_7, 4, 1, 0}, /* 8 */ { 3, s_9_8, 4, 3, 0}, /* 9 */ { 3, s_9_9, 4, 2, 0}, /* 10 */ { 3, s_9_10, 4, 1, 0}, /* 11 */ { 2, s_9_11, 3, 8, 0} }; static const symbol s_10_0[1] = { 'a' }; static const symbol s_10_1[2] = { 'j', 'a' }; static const symbol s_10_2[1] = { 'd' }; static const symbol s_10_3[2] = { 'a', 'd' }; static const symbol s_10_4[2] = { 'e', 'd' }; static const symbol s_10_5[2] = { 'o', 'd' }; static const symbol s_10_6[2] = { 0xE1, 'd' }; static const symbol s_10_7[2] = { 0xE9, 'd' }; static const symbol s_10_8[2] = { 0xF6, 'd' }; static const symbol s_10_9[1] = { 'e' }; static const symbol s_10_10[2] = { 'j', 'e' }; static const symbol s_10_11[2] = { 'n', 'k' }; static const symbol s_10_12[3] = { 'u', 'n', 'k' }; static const symbol s_10_13[3] = { 0xE1, 'n', 'k' }; static const symbol s_10_14[3] = { 0xE9, 'n', 'k' }; static const symbol s_10_15[3] = { 0xFC, 'n', 'k' }; static const symbol s_10_16[2] = { 'u', 'k' }; static const symbol s_10_17[3] = { 'j', 'u', 'k' }; static const symbol s_10_18[4] = { 0xE1, 'j', 'u', 'k' }; static const symbol s_10_19[2] = { 0xFC, 'k' }; static const symbol s_10_20[3] = { 'j', 0xFC, 'k' }; static const symbol s_10_21[4] = { 0xE9, 'j', 0xFC, 'k' }; static const symbol s_10_22[1] = { 'm' }; static const symbol s_10_23[2] = { 'a', 'm' }; static const symbol s_10_24[2] = { 'e', 'm' }; static const symbol s_10_25[2] = { 'o', 'm' }; static const symbol s_10_26[2] = { 0xE1, 'm' }; static const symbol s_10_27[2] = { 0xE9, 'm' }; static const symbol s_10_28[1] = { 'o' }; static const symbol s_10_29[1] = { 0xE1 }; static const symbol s_10_30[1] = { 0xE9 }; static const struct among a_10[31] = { /* 0 */ { 1, s_10_0, -1, 18, 0}, /* 1 */ { 2, s_10_1, 0, 17, 0}, /* 2 */ { 1, s_10_2, -1, 16, 0}, /* 3 */ { 2, s_10_3, 2, 13, 0}, /* 4 */ { 2, s_10_4, 2, 13, 0}, /* 5 */ { 2, s_10_5, 2, 13, 0}, /* 6 */ { 2, s_10_6, 2, 14, 0}, /* 7 */ { 2, s_10_7, 2, 15, 0}, /* 8 */ { 2, s_10_8, 2, 13, 0}, /* 9 */ { 1, s_10_9, -1, 18, 0}, /* 10 */ { 2, s_10_10, 9, 17, 0}, /* 11 */ { 2, s_10_11, -1, 4, 0}, /* 12 */ { 3, s_10_12, 11, 1, 0}, /* 13 */ { 3, s_10_13, 11, 2, 0}, /* 14 */ { 3, s_10_14, 11, 3, 0}, /* 15 */ { 3, s_10_15, 11, 1, 0}, /* 16 */ { 2, s_10_16, -1, 8, 0}, /* 17 */ { 3, s_10_17, 16, 7, 0}, /* 18 */ { 4, s_10_18, 17, 5, 0}, /* 19 */ { 2, s_10_19, -1, 8, 0}, /* 20 */ { 3, s_10_20, 19, 7, 0}, /* 21 */ { 4, s_10_21, 20, 6, 0}, /* 22 */ { 1, s_10_22, -1, 12, 0}, /* 23 */ { 2, s_10_23, 22, 9, 0}, /* 24 */ { 2, s_10_24, 22, 9, 0}, /* 25 */ { 2, s_10_25, 22, 9, 0}, /* 26 */ { 2, s_10_26, 22, 10, 0}, /* 27 */ { 2, s_10_27, 22, 11, 0}, /* 28 */ { 1, s_10_28, -1, 18, 0}, /* 29 */ { 1, s_10_29, -1, 19, 0}, /* 30 */ { 1, s_10_30, -1, 20, 0} }; static const symbol s_11_0[2] = { 'i', 'd' }; static const symbol s_11_1[3] = { 'a', 'i', 'd' }; static const symbol s_11_2[4] = { 'j', 'a', 'i', 'd' }; static const symbol s_11_3[3] = { 'e', 'i', 'd' }; static const symbol s_11_4[4] = { 'j', 'e', 'i', 'd' }; static const symbol s_11_5[3] = { 0xE1, 'i', 'd' }; static const symbol s_11_6[3] = { 0xE9, 'i', 'd' }; static const symbol s_11_7[1] = { 'i' }; static const symbol s_11_8[2] = { 'a', 'i' }; static const symbol s_11_9[3] = { 'j', 'a', 'i' }; static const symbol s_11_10[2] = { 'e', 'i' }; static const symbol s_11_11[3] = { 'j', 'e', 'i' }; static const symbol s_11_12[2] = { 0xE1, 'i' }; static const symbol s_11_13[2] = { 0xE9, 'i' }; static const symbol s_11_14[4] = { 'i', 't', 'e', 'k' }; static const symbol s_11_15[5] = { 'e', 'i', 't', 'e', 'k' }; static const symbol s_11_16[6] = { 'j', 'e', 'i', 't', 'e', 'k' }; static const symbol s_11_17[5] = { 0xE9, 'i', 't', 'e', 'k' }; static const symbol s_11_18[2] = { 'i', 'k' }; static const symbol s_11_19[3] = { 'a', 'i', 'k' }; static const symbol s_11_20[4] = { 'j', 'a', 'i', 'k' }; static const symbol s_11_21[3] = { 'e', 'i', 'k' }; static const symbol s_11_22[4] = { 'j', 'e', 'i', 'k' }; static const symbol s_11_23[3] = { 0xE1, 'i', 'k' }; static const symbol s_11_24[3] = { 0xE9, 'i', 'k' }; static const symbol s_11_25[3] = { 'i', 'n', 'k' }; static const symbol s_11_26[4] = { 'a', 'i', 'n', 'k' }; static const symbol s_11_27[5] = { 'j', 'a', 'i', 'n', 'k' }; static const symbol s_11_28[4] = { 'e', 'i', 'n', 'k' }; static const symbol s_11_29[5] = { 'j', 'e', 'i', 'n', 'k' }; static const symbol s_11_30[4] = { 0xE1, 'i', 'n', 'k' }; static const symbol s_11_31[4] = { 0xE9, 'i', 'n', 'k' }; static const symbol s_11_32[5] = { 'a', 'i', 't', 'o', 'k' }; static const symbol s_11_33[6] = { 'j', 'a', 'i', 't', 'o', 'k' }; static const symbol s_11_34[5] = { 0xE1, 'i', 't', 'o', 'k' }; static const symbol s_11_35[2] = { 'i', 'm' }; static const symbol s_11_36[3] = { 'a', 'i', 'm' }; static const symbol s_11_37[4] = { 'j', 'a', 'i', 'm' }; static const symbol s_11_38[3] = { 'e', 'i', 'm' }; static const symbol s_11_39[4] = { 'j', 'e', 'i', 'm' }; static const symbol s_11_40[3] = { 0xE1, 'i', 'm' }; static const symbol s_11_41[3] = { 0xE9, 'i', 'm' }; static const struct among a_11[42] = { /* 0 */ { 2, s_11_0, -1, 10, 0}, /* 1 */ { 3, s_11_1, 0, 9, 0}, /* 2 */ { 4, s_11_2, 1, 6, 0}, /* 3 */ { 3, s_11_3, 0, 9, 0}, /* 4 */ { 4, s_11_4, 3, 6, 0}, /* 5 */ { 3, s_11_5, 0, 7, 0}, /* 6 */ { 3, s_11_6, 0, 8, 0}, /* 7 */ { 1, s_11_7, -1, 15, 0}, /* 8 */ { 2, s_11_8, 7, 14, 0}, /* 9 */ { 3, s_11_9, 8, 11, 0}, /* 10 */ { 2, s_11_10, 7, 14, 0}, /* 11 */ { 3, s_11_11, 10, 11, 0}, /* 12 */ { 2, s_11_12, 7, 12, 0}, /* 13 */ { 2, s_11_13, 7, 13, 0}, /* 14 */ { 4, s_11_14, -1, 24, 0}, /* 15 */ { 5, s_11_15, 14, 21, 0}, /* 16 */ { 6, s_11_16, 15, 20, 0}, /* 17 */ { 5, s_11_17, 14, 23, 0}, /* 18 */ { 2, s_11_18, -1, 29, 0}, /* 19 */ { 3, s_11_19, 18, 26, 0}, /* 20 */ { 4, s_11_20, 19, 25, 0}, /* 21 */ { 3, s_11_21, 18, 26, 0}, /* 22 */ { 4, s_11_22, 21, 25, 0}, /* 23 */ { 3, s_11_23, 18, 27, 0}, /* 24 */ { 3, s_11_24, 18, 28, 0}, /* 25 */ { 3, s_11_25, -1, 20, 0}, /* 26 */ { 4, s_11_26, 25, 17, 0}, /* 27 */ { 5, s_11_27, 26, 16, 0}, /* 28 */ { 4, s_11_28, 25, 17, 0}, /* 29 */ { 5, s_11_29, 28, 16, 0}, /* 30 */ { 4, s_11_30, 25, 18, 0}, /* 31 */ { 4, s_11_31, 25, 19, 0}, /* 32 */ { 5, s_11_32, -1, 21, 0}, /* 33 */ { 6, s_11_33, 32, 20, 0}, /* 34 */ { 5, s_11_34, -1, 22, 0}, /* 35 */ { 2, s_11_35, -1, 5, 0}, /* 36 */ { 3, s_11_36, 35, 4, 0}, /* 37 */ { 4, s_11_37, 36, 1, 0}, /* 38 */ { 3, s_11_38, 35, 4, 0}, /* 39 */ { 4, s_11_39, 38, 1, 0}, /* 40 */ { 3, s_11_40, 35, 2, 0}, /* 41 */ { 3, s_11_41, 35, 3, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'e' }; static const symbol s_3[] = { 'a' }; static const symbol s_4[] = { 'a' }; static const symbol s_5[] = { 'a' }; static const symbol s_6[] = { 'e' }; static const symbol s_7[] = { 'a' }; static const symbol s_8[] = { 'e' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'a' }; static const symbol s_11[] = { 'e' }; static const symbol s_12[] = { 'a' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'a' }; static const symbol s_15[] = { 'e' }; static const symbol s_16[] = { 'a' }; static const symbol s_17[] = { 'e' }; static const symbol s_18[] = { 'a' }; static const symbol s_19[] = { 'e' }; static const symbol s_20[] = { 'a' }; static const symbol s_21[] = { 'e' }; static const symbol s_22[] = { 'a' }; static const symbol s_23[] = { 'e' }; static const symbol s_24[] = { 'a' }; static const symbol s_25[] = { 'e' }; static const symbol s_26[] = { 'a' }; static const symbol s_27[] = { 'e' }; static const symbol s_28[] = { 'a' }; static const symbol s_29[] = { 'e' }; static const symbol s_30[] = { 'a' }; static const symbol s_31[] = { 'e' }; static const symbol s_32[] = { 'a' }; static const symbol s_33[] = { 'e' }; static const symbol s_34[] = { 'a' }; static const symbol s_35[] = { 'e' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c1 = z->c; /* or, line 51 */ if (in_grouping(z, g_v, 97, 252, 0)) goto lab1; if (in_grouping(z, g_v, 97, 252, 1) < 0) goto lab1; /* goto */ /* non v, line 48 */ { int c2 = z->c; /* or, line 49 */ if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 3 || !((101187584 >> (z->p[z->c + 1] & 0x1f)) & 1)) goto lab3; if (!(find_among(z, a_0, 8))) goto lab3; /* among, line 49 */ goto lab2; lab3: z->c = c2; if (z->c >= z->l) goto lab1; z->c++; /* next, line 49 */ } lab2: z->I[0] = z->c; /* setmark p1, line 50 */ goto lab0; lab1: z->c = c1; if (out_grouping(z, g_v, 97, 252, 0)) return 0; { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 53 */ } lab0: return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_v_ending(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 61 */ if (z->c <= z->lb || (z->p[z->c - 1] != 225 && z->p[z->c - 1] != 233)) return 0; among_var = find_among_b(z, a_1, 2); /* substring, line 61 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 61 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 61 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 62 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 63 */ if (ret < 0) return ret; } break; } return 1; } static int r_double(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 68 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((106790108 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 23))) return 0; /* among, line 68 */ z->c = z->l - m_test; } return 1; } static int r_undouble(struct SN_env * z) { if (z->c <= z->lb) return 0; z->c--; /* next, line 73 */ z->ket = z->c; /* [, line 73 */ { int ret = z->c - 1; if (z->lb > ret || ret > z->l) return 0; z->c = ret; /* hop, line 73 */ } z->bra = z->c; /* ], line 73 */ { int ret = slice_del(z); /* delete, line 73 */ if (ret < 0) return ret; } return 1; } static int r_instrum(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 77 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] != 108) return 0; among_var = find_among_b(z, a_3, 2); /* substring, line 77 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 77 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 77 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 79 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 82 */ if (ret < 0) return ret; } return 1; } static int r_case(struct SN_env * z) { z->ket = z->c; /* [, line 87 */ if (!(find_among_b(z, a_4, 44))) return 0; /* substring, line 87 */ z->bra = z->c; /* ], line 87 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 87 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 111 */ if (ret < 0) return ret; } { int ret = r_v_ending(z); if (ret == 0) return 0; /* call v_ending, line 112 */ if (ret < 0) return ret; } return 1; } static int r_case_special(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 116 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 110 && z->p[z->c - 1] != 116)) return 0; among_var = find_among_b(z, a_5, 3); /* substring, line 116 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 116 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 116 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_2); /* <-, line 117 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_3); /* <-, line 118 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_4); /* <-, line 119 */ if (ret < 0) return ret; } break; } return 1; } static int r_case_other(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 124 */ if (z->c - 3 <= z->lb || z->p[z->c - 1] != 108) return 0; among_var = find_among_b(z, a_6, 6); /* substring, line 124 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 124 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 124 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 126 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_5); /* <-, line 127 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_6); /* <-, line 128 */ if (ret < 0) return ret; } break; } return 1; } static int r_factive(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 133 */ if (z->c <= z->lb || (z->p[z->c - 1] != 225 && z->p[z->c - 1] != 233)) return 0; among_var = find_among_b(z, a_7, 2); /* substring, line 133 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 133 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 133 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 134 */ if (ret < 0) return ret; } break; case 2: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 135 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 137 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 138 */ if (ret < 0) return ret; } return 1; } static int r_plural(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 142 */ if (z->c <= z->lb || z->p[z->c - 1] != 107) return 0; among_var = find_among_b(z, a_8, 7); /* substring, line 142 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 142 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 142 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_7); /* <-, line 143 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_8); /* <-, line 144 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 146 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 148 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 149 */ if (ret < 0) return ret; } break; } return 1; } static int r_owned(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 154 */ if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 233)) return 0; among_var = find_among_b(z, a_9, 12); /* substring, line 154 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 154 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 154 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 155 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_9); /* <-, line 156 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_10); /* <-, line 157 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_11); /* <-, line 159 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_12); /* <-, line 160 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 161 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 1, s_13); /* <-, line 162 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } break; } return 1; } static int r_sing_owner(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 168 */ among_var = find_among_b(z, a_10, 31); /* substring, line 168 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 168 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 168 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 169 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_14); /* <-, line 170 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_15); /* <-, line 171 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 172 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_16); /* <-, line 173 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_17); /* <-, line 174 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 175 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 1, s_18); /* <-, line 178 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 1, s_19); /* <-, line 179 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_del(z); /* delete, line 180 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_del(z); /* delete, line 181 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 1, s_20); /* <-, line 182 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_from_s(z, 1, s_21); /* <-, line 183 */ if (ret < 0) return ret; } break; case 16: { int ret = slice_del(z); /* delete, line 184 */ if (ret < 0) return ret; } break; case 17: { int ret = slice_del(z); /* delete, line 185 */ if (ret < 0) return ret; } break; case 18: { int ret = slice_del(z); /* delete, line 186 */ if (ret < 0) return ret; } break; case 19: { int ret = slice_from_s(z, 1, s_22); /* <-, line 187 */ if (ret < 0) return ret; } break; case 20: { int ret = slice_from_s(z, 1, s_23); /* <-, line 188 */ if (ret < 0) return ret; } break; } return 1; } static int r_plur_owner(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 193 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((10768 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_11, 42); /* substring, line 193 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 193 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 193 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_24); /* <-, line 195 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_25); /* <-, line 196 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 197 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 198 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 199 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 1, s_26); /* <-, line 200 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 1, s_27); /* <-, line 201 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 202 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_del(z); /* delete, line 203 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_del(z); /* delete, line 204 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 1, s_28); /* <-, line 205 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_from_s(z, 1, s_29); /* <-, line 206 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_del(z); /* delete, line 207 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_del(z); /* delete, line 208 */ if (ret < 0) return ret; } break; case 16: { int ret = slice_del(z); /* delete, line 209 */ if (ret < 0) return ret; } break; case 17: { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } break; case 18: { int ret = slice_from_s(z, 1, s_30); /* <-, line 211 */ if (ret < 0) return ret; } break; case 19: { int ret = slice_from_s(z, 1, s_31); /* <-, line 212 */ if (ret < 0) return ret; } break; case 20: { int ret = slice_del(z); /* delete, line 214 */ if (ret < 0) return ret; } break; case 21: { int ret = slice_del(z); /* delete, line 215 */ if (ret < 0) return ret; } break; case 22: { int ret = slice_from_s(z, 1, s_32); /* <-, line 216 */ if (ret < 0) return ret; } break; case 23: { int ret = slice_from_s(z, 1, s_33); /* <-, line 217 */ if (ret < 0) return ret; } break; case 24: { int ret = slice_del(z); /* delete, line 218 */ if (ret < 0) return ret; } break; case 25: { int ret = slice_del(z); /* delete, line 219 */ if (ret < 0) return ret; } break; case 26: { int ret = slice_del(z); /* delete, line 220 */ if (ret < 0) return ret; } break; case 27: { int ret = slice_from_s(z, 1, s_34); /* <-, line 221 */ if (ret < 0) return ret; } break; case 28: { int ret = slice_from_s(z, 1, s_35); /* <-, line 222 */ if (ret < 0) return ret; } break; case 29: { int ret = slice_del(z); /* delete, line 223 */ if (ret < 0) return ret; } break; } return 1; } extern int hungarian_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 229 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 229 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 230 */ { int m2 = z->l - z->c; (void)m2; /* do, line 231 */ { int ret = r_instrum(z); if (ret == 0) goto lab1; /* call instrum, line 231 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 232 */ { int ret = r_case(z); if (ret == 0) goto lab2; /* call case, line 232 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 233 */ { int ret = r_case_special(z); if (ret == 0) goto lab3; /* call case_special, line 233 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 234 */ { int ret = r_case_other(z); if (ret == 0) goto lab4; /* call case_other, line 234 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* do, line 235 */ { int ret = r_factive(z); if (ret == 0) goto lab5; /* call factive, line 235 */ if (ret < 0) return ret; } lab5: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 236 */ { int ret = r_owned(z); if (ret == 0) goto lab6; /* call owned, line 236 */ if (ret < 0) return ret; } lab6: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 237 */ { int ret = r_sing_owner(z); if (ret == 0) goto lab7; /* call sing_owner, line 237 */ if (ret < 0) return ret; } lab7: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 238 */ { int ret = r_plur_owner(z); if (ret == 0) goto lab8; /* call plur_owner, line 238 */ if (ret < 0) return ret; } lab8: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 239 */ { int ret = r_plural(z); if (ret == 0) goto lab9; /* call plural, line 239 */ if (ret < 0) return ret; } lab9: z->c = z->l - m10; } z->c = z->lb; return 1; } extern struct SN_env * hungarian_ISO_8859_1_create_env(void) { return SN_create_env(0, 1, 0); } extern void hungarian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h000066400000000000000000000005161217574114600317270ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * hungarian_ISO_8859_1_create_env(void); extern void hungarian_ISO_8859_1_close_env(struct SN_env * z); extern int hungarian_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_italian.c000066400000000000000000001161051217574114600313710ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int italian_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_vowel_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_attached_pronoun(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * italian_ISO_8859_1_create_env(void); extern void italian_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 'q', 'u' }; static const symbol s_0_2[1] = { 0xE1 }; static const symbol s_0_3[1] = { 0xE9 }; static const symbol s_0_4[1] = { 0xED }; static const symbol s_0_5[1] = { 0xF3 }; static const symbol s_0_6[1] = { 0xFA }; static const struct among a_0[7] = { /* 0 */ { 0, 0, -1, 7, 0}, /* 1 */ { 2, s_0_1, 0, 6, 0}, /* 2 */ { 1, s_0_2, 0, 1, 0}, /* 3 */ { 1, s_0_3, 0, 2, 0}, /* 4 */ { 1, s_0_4, 0, 3, 0}, /* 5 */ { 1, s_0_5, 0, 4, 0}, /* 6 */ { 1, s_0_6, 0, 5, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'U' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_1_1, 0, 1, 0}, /* 2 */ { 1, s_1_2, 0, 2, 0} }; static const symbol s_2_0[2] = { 'l', 'a' }; static const symbol s_2_1[4] = { 'c', 'e', 'l', 'a' }; static const symbol s_2_2[6] = { 'g', 'l', 'i', 'e', 'l', 'a' }; static const symbol s_2_3[4] = { 'm', 'e', 'l', 'a' }; static const symbol s_2_4[4] = { 't', 'e', 'l', 'a' }; static const symbol s_2_5[4] = { 'v', 'e', 'l', 'a' }; static const symbol s_2_6[2] = { 'l', 'e' }; static const symbol s_2_7[4] = { 'c', 'e', 'l', 'e' }; static const symbol s_2_8[6] = { 'g', 'l', 'i', 'e', 'l', 'e' }; static const symbol s_2_9[4] = { 'm', 'e', 'l', 'e' }; static const symbol s_2_10[4] = { 't', 'e', 'l', 'e' }; static const symbol s_2_11[4] = { 'v', 'e', 'l', 'e' }; static const symbol s_2_12[2] = { 'n', 'e' }; static const symbol s_2_13[4] = { 'c', 'e', 'n', 'e' }; static const symbol s_2_14[6] = { 'g', 'l', 'i', 'e', 'n', 'e' }; static const symbol s_2_15[4] = { 'm', 'e', 'n', 'e' }; static const symbol s_2_16[4] = { 's', 'e', 'n', 'e' }; static const symbol s_2_17[4] = { 't', 'e', 'n', 'e' }; static const symbol s_2_18[4] = { 'v', 'e', 'n', 'e' }; static const symbol s_2_19[2] = { 'c', 'i' }; static const symbol s_2_20[2] = { 'l', 'i' }; static const symbol s_2_21[4] = { 'c', 'e', 'l', 'i' }; static const symbol s_2_22[6] = { 'g', 'l', 'i', 'e', 'l', 'i' }; static const symbol s_2_23[4] = { 'm', 'e', 'l', 'i' }; static const symbol s_2_24[4] = { 't', 'e', 'l', 'i' }; static const symbol s_2_25[4] = { 'v', 'e', 'l', 'i' }; static const symbol s_2_26[3] = { 'g', 'l', 'i' }; static const symbol s_2_27[2] = { 'm', 'i' }; static const symbol s_2_28[2] = { 's', 'i' }; static const symbol s_2_29[2] = { 't', 'i' }; static const symbol s_2_30[2] = { 'v', 'i' }; static const symbol s_2_31[2] = { 'l', 'o' }; static const symbol s_2_32[4] = { 'c', 'e', 'l', 'o' }; static const symbol s_2_33[6] = { 'g', 'l', 'i', 'e', 'l', 'o' }; static const symbol s_2_34[4] = { 'm', 'e', 'l', 'o' }; static const symbol s_2_35[4] = { 't', 'e', 'l', 'o' }; static const symbol s_2_36[4] = { 'v', 'e', 'l', 'o' }; static const struct among a_2[37] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 4, s_2_1, 0, -1, 0}, /* 2 */ { 6, s_2_2, 0, -1, 0}, /* 3 */ { 4, s_2_3, 0, -1, 0}, /* 4 */ { 4, s_2_4, 0, -1, 0}, /* 5 */ { 4, s_2_5, 0, -1, 0}, /* 6 */ { 2, s_2_6, -1, -1, 0}, /* 7 */ { 4, s_2_7, 6, -1, 0}, /* 8 */ { 6, s_2_8, 6, -1, 0}, /* 9 */ { 4, s_2_9, 6, -1, 0}, /* 10 */ { 4, s_2_10, 6, -1, 0}, /* 11 */ { 4, s_2_11, 6, -1, 0}, /* 12 */ { 2, s_2_12, -1, -1, 0}, /* 13 */ { 4, s_2_13, 12, -1, 0}, /* 14 */ { 6, s_2_14, 12, -1, 0}, /* 15 */ { 4, s_2_15, 12, -1, 0}, /* 16 */ { 4, s_2_16, 12, -1, 0}, /* 17 */ { 4, s_2_17, 12, -1, 0}, /* 18 */ { 4, s_2_18, 12, -1, 0}, /* 19 */ { 2, s_2_19, -1, -1, 0}, /* 20 */ { 2, s_2_20, -1, -1, 0}, /* 21 */ { 4, s_2_21, 20, -1, 0}, /* 22 */ { 6, s_2_22, 20, -1, 0}, /* 23 */ { 4, s_2_23, 20, -1, 0}, /* 24 */ { 4, s_2_24, 20, -1, 0}, /* 25 */ { 4, s_2_25, 20, -1, 0}, /* 26 */ { 3, s_2_26, 20, -1, 0}, /* 27 */ { 2, s_2_27, -1, -1, 0}, /* 28 */ { 2, s_2_28, -1, -1, 0}, /* 29 */ { 2, s_2_29, -1, -1, 0}, /* 30 */ { 2, s_2_30, -1, -1, 0}, /* 31 */ { 2, s_2_31, -1, -1, 0}, /* 32 */ { 4, s_2_32, 31, -1, 0}, /* 33 */ { 6, s_2_33, 31, -1, 0}, /* 34 */ { 4, s_2_34, 31, -1, 0}, /* 35 */ { 4, s_2_35, 31, -1, 0}, /* 36 */ { 4, s_2_36, 31, -1, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_3_1[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_3_2[2] = { 'a', 'r' }; static const symbol s_3_3[2] = { 'e', 'r' }; static const symbol s_3_4[2] = { 'i', 'r' }; static const struct among a_3[5] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 2, s_3_2, -1, 2, 0}, /* 3 */ { 2, s_3_3, -1, 2, 0}, /* 4 */ { 2, s_3_4, -1, 2, 0} }; static const symbol s_4_0[2] = { 'i', 'c' }; static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_4_2[2] = { 'o', 's' }; static const symbol s_4_3[2] = { 'i', 'v' }; static const struct among a_4[4] = { /* 0 */ { 2, s_4_0, -1, -1, 0}, /* 1 */ { 4, s_4_1, -1, -1, 0}, /* 2 */ { 2, s_4_2, -1, -1, 0}, /* 3 */ { 2, s_4_3, -1, 1, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_5_2[2] = { 'i', 'v' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, -1, 1, 0} }; static const symbol s_6_0[3] = { 'i', 'c', 'a' }; static const symbol s_6_1[5] = { 'l', 'o', 'g', 'i', 'a' }; static const symbol s_6_2[3] = { 'o', 's', 'a' }; static const symbol s_6_3[4] = { 'i', 's', 't', 'a' }; static const symbol s_6_4[3] = { 'i', 'v', 'a' }; static const symbol s_6_5[4] = { 'a', 'n', 'z', 'a' }; static const symbol s_6_6[4] = { 'e', 'n', 'z', 'a' }; static const symbol s_6_7[3] = { 'i', 'c', 'e' }; static const symbol s_6_8[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; static const symbol s_6_9[4] = { 'i', 'c', 'h', 'e' }; static const symbol s_6_10[5] = { 'l', 'o', 'g', 'i', 'e' }; static const symbol s_6_11[5] = { 'a', 'b', 'i', 'l', 'e' }; static const symbol s_6_12[5] = { 'i', 'b', 'i', 'l', 'e' }; static const symbol s_6_13[6] = { 'u', 's', 'i', 'o', 'n', 'e' }; static const symbol s_6_14[6] = { 'a', 'z', 'i', 'o', 'n', 'e' }; static const symbol s_6_15[6] = { 'u', 'z', 'i', 'o', 'n', 'e' }; static const symbol s_6_16[5] = { 'a', 't', 'o', 'r', 'e' }; static const symbol s_6_17[3] = { 'o', 's', 'e' }; static const symbol s_6_18[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_19[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_20[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_21[4] = { 'i', 's', 't', 'e' }; static const symbol s_6_22[3] = { 'i', 'v', 'e' }; static const symbol s_6_23[4] = { 'a', 'n', 'z', 'e' }; static const symbol s_6_24[4] = { 'e', 'n', 'z', 'e' }; static const symbol s_6_25[3] = { 'i', 'c', 'i' }; static const symbol s_6_26[6] = { 'a', 't', 'r', 'i', 'c', 'i' }; static const symbol s_6_27[4] = { 'i', 'c', 'h', 'i' }; static const symbol s_6_28[5] = { 'a', 'b', 'i', 'l', 'i' }; static const symbol s_6_29[5] = { 'i', 'b', 'i', 'l', 'i' }; static const symbol s_6_30[4] = { 'i', 's', 'm', 'i' }; static const symbol s_6_31[6] = { 'u', 's', 'i', 'o', 'n', 'i' }; static const symbol s_6_32[6] = { 'a', 'z', 'i', 'o', 'n', 'i' }; static const symbol s_6_33[6] = { 'u', 'z', 'i', 'o', 'n', 'i' }; static const symbol s_6_34[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_6_35[3] = { 'o', 's', 'i' }; static const symbol s_6_36[4] = { 'a', 'n', 't', 'i' }; static const symbol s_6_37[6] = { 'a', 'm', 'e', 'n', 't', 'i' }; static const symbol s_6_38[6] = { 'i', 'm', 'e', 'n', 't', 'i' }; static const symbol s_6_39[4] = { 'i', 's', 't', 'i' }; static const symbol s_6_40[3] = { 'i', 'v', 'i' }; static const symbol s_6_41[3] = { 'i', 'c', 'o' }; static const symbol s_6_42[4] = { 'i', 's', 'm', 'o' }; static const symbol s_6_43[3] = { 'o', 's', 'o' }; static const symbol s_6_44[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; static const symbol s_6_45[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; static const symbol s_6_46[3] = { 'i', 'v', 'o' }; static const symbol s_6_47[3] = { 'i', 't', 0xE0 }; static const symbol s_6_48[4] = { 'i', 's', 't', 0xE0 }; static const symbol s_6_49[4] = { 'i', 's', 't', 0xE8 }; static const symbol s_6_50[4] = { 'i', 's', 't', 0xEC }; static const struct among a_6[51] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 5, s_6_1, -1, 3, 0}, /* 2 */ { 3, s_6_2, -1, 1, 0}, /* 3 */ { 4, s_6_3, -1, 1, 0}, /* 4 */ { 3, s_6_4, -1, 9, 0}, /* 5 */ { 4, s_6_5, -1, 1, 0}, /* 6 */ { 4, s_6_6, -1, 5, 0}, /* 7 */ { 3, s_6_7, -1, 1, 0}, /* 8 */ { 6, s_6_8, 7, 1, 0}, /* 9 */ { 4, s_6_9, -1, 1, 0}, /* 10 */ { 5, s_6_10, -1, 3, 0}, /* 11 */ { 5, s_6_11, -1, 1, 0}, /* 12 */ { 5, s_6_12, -1, 1, 0}, /* 13 */ { 6, s_6_13, -1, 4, 0}, /* 14 */ { 6, s_6_14, -1, 2, 0}, /* 15 */ { 6, s_6_15, -1, 4, 0}, /* 16 */ { 5, s_6_16, -1, 2, 0}, /* 17 */ { 3, s_6_17, -1, 1, 0}, /* 18 */ { 4, s_6_18, -1, 1, 0}, /* 19 */ { 5, s_6_19, -1, 1, 0}, /* 20 */ { 6, s_6_20, 19, 7, 0}, /* 21 */ { 4, s_6_21, -1, 1, 0}, /* 22 */ { 3, s_6_22, -1, 9, 0}, /* 23 */ { 4, s_6_23, -1, 1, 0}, /* 24 */ { 4, s_6_24, -1, 5, 0}, /* 25 */ { 3, s_6_25, -1, 1, 0}, /* 26 */ { 6, s_6_26, 25, 1, 0}, /* 27 */ { 4, s_6_27, -1, 1, 0}, /* 28 */ { 5, s_6_28, -1, 1, 0}, /* 29 */ { 5, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, -1, 1, 0}, /* 31 */ { 6, s_6_31, -1, 4, 0}, /* 32 */ { 6, s_6_32, -1, 2, 0}, /* 33 */ { 6, s_6_33, -1, 4, 0}, /* 34 */ { 5, s_6_34, -1, 2, 0}, /* 35 */ { 3, s_6_35, -1, 1, 0}, /* 36 */ { 4, s_6_36, -1, 1, 0}, /* 37 */ { 6, s_6_37, -1, 6, 0}, /* 38 */ { 6, s_6_38, -1, 6, 0}, /* 39 */ { 4, s_6_39, -1, 1, 0}, /* 40 */ { 3, s_6_40, -1, 9, 0}, /* 41 */ { 3, s_6_41, -1, 1, 0}, /* 42 */ { 4, s_6_42, -1, 1, 0}, /* 43 */ { 3, s_6_43, -1, 1, 0}, /* 44 */ { 6, s_6_44, -1, 6, 0}, /* 45 */ { 6, s_6_45, -1, 6, 0}, /* 46 */ { 3, s_6_46, -1, 9, 0}, /* 47 */ { 3, s_6_47, -1, 8, 0}, /* 48 */ { 4, s_6_48, -1, 1, 0}, /* 49 */ { 4, s_6_49, -1, 1, 0}, /* 50 */ { 4, s_6_50, -1, 1, 0} }; static const symbol s_7_0[4] = { 'i', 's', 'c', 'a' }; static const symbol s_7_1[4] = { 'e', 'n', 'd', 'a' }; static const symbol s_7_2[3] = { 'a', 't', 'a' }; static const symbol s_7_3[3] = { 'i', 't', 'a' }; static const symbol s_7_4[3] = { 'u', 't', 'a' }; static const symbol s_7_5[3] = { 'a', 'v', 'a' }; static const symbol s_7_6[3] = { 'e', 'v', 'a' }; static const symbol s_7_7[3] = { 'i', 'v', 'a' }; static const symbol s_7_8[6] = { 'e', 'r', 'e', 'b', 'b', 'e' }; static const symbol s_7_9[6] = { 'i', 'r', 'e', 'b', 'b', 'e' }; static const symbol s_7_10[4] = { 'i', 's', 'c', 'e' }; static const symbol s_7_11[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_7_12[3] = { 'a', 'r', 'e' }; static const symbol s_7_13[3] = { 'e', 'r', 'e' }; static const symbol s_7_14[3] = { 'i', 'r', 'e' }; static const symbol s_7_15[4] = { 'a', 's', 's', 'e' }; static const symbol s_7_16[3] = { 'a', 't', 'e' }; static const symbol s_7_17[5] = { 'a', 'v', 'a', 't', 'e' }; static const symbol s_7_18[5] = { 'e', 'v', 'a', 't', 'e' }; static const symbol s_7_19[5] = { 'i', 'v', 'a', 't', 'e' }; static const symbol s_7_20[3] = { 'e', 't', 'e' }; static const symbol s_7_21[5] = { 'e', 'r', 'e', 't', 'e' }; static const symbol s_7_22[5] = { 'i', 'r', 'e', 't', 'e' }; static const symbol s_7_23[3] = { 'i', 't', 'e' }; static const symbol s_7_24[6] = { 'e', 'r', 'e', 's', 't', 'e' }; static const symbol s_7_25[6] = { 'i', 'r', 'e', 's', 't', 'e' }; static const symbol s_7_26[3] = { 'u', 't', 'e' }; static const symbol s_7_27[4] = { 'e', 'r', 'a', 'i' }; static const symbol s_7_28[4] = { 'i', 'r', 'a', 'i' }; static const symbol s_7_29[4] = { 'i', 's', 'c', 'i' }; static const symbol s_7_30[4] = { 'e', 'n', 'd', 'i' }; static const symbol s_7_31[4] = { 'e', 'r', 'e', 'i' }; static const symbol s_7_32[4] = { 'i', 'r', 'e', 'i' }; static const symbol s_7_33[4] = { 'a', 's', 's', 'i' }; static const symbol s_7_34[3] = { 'a', 't', 'i' }; static const symbol s_7_35[3] = { 'i', 't', 'i' }; static const symbol s_7_36[6] = { 'e', 'r', 'e', 's', 't', 'i' }; static const symbol s_7_37[6] = { 'i', 'r', 'e', 's', 't', 'i' }; static const symbol s_7_38[3] = { 'u', 't', 'i' }; static const symbol s_7_39[3] = { 'a', 'v', 'i' }; static const symbol s_7_40[3] = { 'e', 'v', 'i' }; static const symbol s_7_41[3] = { 'i', 'v', 'i' }; static const symbol s_7_42[4] = { 'i', 's', 'c', 'o' }; static const symbol s_7_43[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_7_44[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_7_45[4] = { 'Y', 'a', 'm', 'o' }; static const symbol s_7_46[4] = { 'i', 'a', 'm', 'o' }; static const symbol s_7_47[5] = { 'a', 'v', 'a', 'm', 'o' }; static const symbol s_7_48[5] = { 'e', 'v', 'a', 'm', 'o' }; static const symbol s_7_49[5] = { 'i', 'v', 'a', 'm', 'o' }; static const symbol s_7_50[5] = { 'e', 'r', 'e', 'm', 'o' }; static const symbol s_7_51[5] = { 'i', 'r', 'e', 'm', 'o' }; static const symbol s_7_52[6] = { 'a', 's', 's', 'i', 'm', 'o' }; static const symbol s_7_53[4] = { 'a', 'm', 'm', 'o' }; static const symbol s_7_54[4] = { 'e', 'm', 'm', 'o' }; static const symbol s_7_55[6] = { 'e', 'r', 'e', 'm', 'm', 'o' }; static const symbol s_7_56[6] = { 'i', 'r', 'e', 'm', 'm', 'o' }; static const symbol s_7_57[4] = { 'i', 'm', 'm', 'o' }; static const symbol s_7_58[3] = { 'a', 'n', 'o' }; static const symbol s_7_59[6] = { 'i', 's', 'c', 'a', 'n', 'o' }; static const symbol s_7_60[5] = { 'a', 'v', 'a', 'n', 'o' }; static const symbol s_7_61[5] = { 'e', 'v', 'a', 'n', 'o' }; static const symbol s_7_62[5] = { 'i', 'v', 'a', 'n', 'o' }; static const symbol s_7_63[6] = { 'e', 'r', 'a', 'n', 'n', 'o' }; static const symbol s_7_64[6] = { 'i', 'r', 'a', 'n', 'n', 'o' }; static const symbol s_7_65[3] = { 'o', 'n', 'o' }; static const symbol s_7_66[6] = { 'i', 's', 'c', 'o', 'n', 'o' }; static const symbol s_7_67[5] = { 'a', 'r', 'o', 'n', 'o' }; static const symbol s_7_68[5] = { 'e', 'r', 'o', 'n', 'o' }; static const symbol s_7_69[5] = { 'i', 'r', 'o', 'n', 'o' }; static const symbol s_7_70[8] = { 'e', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; static const symbol s_7_71[8] = { 'i', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; static const symbol s_7_72[6] = { 'a', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_73[6] = { 'e', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_74[6] = { 'i', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_75[3] = { 'a', 't', 'o' }; static const symbol s_7_76[3] = { 'i', 't', 'o' }; static const symbol s_7_77[3] = { 'u', 't', 'o' }; static const symbol s_7_78[3] = { 'a', 'v', 'o' }; static const symbol s_7_79[3] = { 'e', 'v', 'o' }; static const symbol s_7_80[3] = { 'i', 'v', 'o' }; static const symbol s_7_81[2] = { 'a', 'r' }; static const symbol s_7_82[2] = { 'i', 'r' }; static const symbol s_7_83[3] = { 'e', 'r', 0xE0 }; static const symbol s_7_84[3] = { 'i', 'r', 0xE0 }; static const symbol s_7_85[3] = { 'e', 'r', 0xF2 }; static const symbol s_7_86[3] = { 'i', 'r', 0xF2 }; static const struct among a_7[87] = { /* 0 */ { 4, s_7_0, -1, 1, 0}, /* 1 */ { 4, s_7_1, -1, 1, 0}, /* 2 */ { 3, s_7_2, -1, 1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 3, s_7_4, -1, 1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 3, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 6, s_7_8, -1, 1, 0}, /* 9 */ { 6, s_7_9, -1, 1, 0}, /* 10 */ { 4, s_7_10, -1, 1, 0}, /* 11 */ { 4, s_7_11, -1, 1, 0}, /* 12 */ { 3, s_7_12, -1, 1, 0}, /* 13 */ { 3, s_7_13, -1, 1, 0}, /* 14 */ { 3, s_7_14, -1, 1, 0}, /* 15 */ { 4, s_7_15, -1, 1, 0}, /* 16 */ { 3, s_7_16, -1, 1, 0}, /* 17 */ { 5, s_7_17, 16, 1, 0}, /* 18 */ { 5, s_7_18, 16, 1, 0}, /* 19 */ { 5, s_7_19, 16, 1, 0}, /* 20 */ { 3, s_7_20, -1, 1, 0}, /* 21 */ { 5, s_7_21, 20, 1, 0}, /* 22 */ { 5, s_7_22, 20, 1, 0}, /* 23 */ { 3, s_7_23, -1, 1, 0}, /* 24 */ { 6, s_7_24, -1, 1, 0}, /* 25 */ { 6, s_7_25, -1, 1, 0}, /* 26 */ { 3, s_7_26, -1, 1, 0}, /* 27 */ { 4, s_7_27, -1, 1, 0}, /* 28 */ { 4, s_7_28, -1, 1, 0}, /* 29 */ { 4, s_7_29, -1, 1, 0}, /* 30 */ { 4, s_7_30, -1, 1, 0}, /* 31 */ { 4, s_7_31, -1, 1, 0}, /* 32 */ { 4, s_7_32, -1, 1, 0}, /* 33 */ { 4, s_7_33, -1, 1, 0}, /* 34 */ { 3, s_7_34, -1, 1, 0}, /* 35 */ { 3, s_7_35, -1, 1, 0}, /* 36 */ { 6, s_7_36, -1, 1, 0}, /* 37 */ { 6, s_7_37, -1, 1, 0}, /* 38 */ { 3, s_7_38, -1, 1, 0}, /* 39 */ { 3, s_7_39, -1, 1, 0}, /* 40 */ { 3, s_7_40, -1, 1, 0}, /* 41 */ { 3, s_7_41, -1, 1, 0}, /* 42 */ { 4, s_7_42, -1, 1, 0}, /* 43 */ { 4, s_7_43, -1, 1, 0}, /* 44 */ { 4, s_7_44, -1, 1, 0}, /* 45 */ { 4, s_7_45, -1, 1, 0}, /* 46 */ { 4, s_7_46, -1, 1, 0}, /* 47 */ { 5, s_7_47, -1, 1, 0}, /* 48 */ { 5, s_7_48, -1, 1, 0}, /* 49 */ { 5, s_7_49, -1, 1, 0}, /* 50 */ { 5, s_7_50, -1, 1, 0}, /* 51 */ { 5, s_7_51, -1, 1, 0}, /* 52 */ { 6, s_7_52, -1, 1, 0}, /* 53 */ { 4, s_7_53, -1, 1, 0}, /* 54 */ { 4, s_7_54, -1, 1, 0}, /* 55 */ { 6, s_7_55, 54, 1, 0}, /* 56 */ { 6, s_7_56, 54, 1, 0}, /* 57 */ { 4, s_7_57, -1, 1, 0}, /* 58 */ { 3, s_7_58, -1, 1, 0}, /* 59 */ { 6, s_7_59, 58, 1, 0}, /* 60 */ { 5, s_7_60, 58, 1, 0}, /* 61 */ { 5, s_7_61, 58, 1, 0}, /* 62 */ { 5, s_7_62, 58, 1, 0}, /* 63 */ { 6, s_7_63, -1, 1, 0}, /* 64 */ { 6, s_7_64, -1, 1, 0}, /* 65 */ { 3, s_7_65, -1, 1, 0}, /* 66 */ { 6, s_7_66, 65, 1, 0}, /* 67 */ { 5, s_7_67, 65, 1, 0}, /* 68 */ { 5, s_7_68, 65, 1, 0}, /* 69 */ { 5, s_7_69, 65, 1, 0}, /* 70 */ { 8, s_7_70, -1, 1, 0}, /* 71 */ { 8, s_7_71, -1, 1, 0}, /* 72 */ { 6, s_7_72, -1, 1, 0}, /* 73 */ { 6, s_7_73, -1, 1, 0}, /* 74 */ { 6, s_7_74, -1, 1, 0}, /* 75 */ { 3, s_7_75, -1, 1, 0}, /* 76 */ { 3, s_7_76, -1, 1, 0}, /* 77 */ { 3, s_7_77, -1, 1, 0}, /* 78 */ { 3, s_7_78, -1, 1, 0}, /* 79 */ { 3, s_7_79, -1, 1, 0}, /* 80 */ { 3, s_7_80, -1, 1, 0}, /* 81 */ { 2, s_7_81, -1, 1, 0}, /* 82 */ { 2, s_7_82, -1, 1, 0}, /* 83 */ { 3, s_7_83, -1, 1, 0}, /* 84 */ { 3, s_7_84, -1, 1, 0}, /* 85 */ { 3, s_7_85, -1, 1, 0}, /* 86 */ { 3, s_7_86, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1 }; static const unsigned char g_AEIO[] = { 17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2 }; static const unsigned char g_CG[] = { 17 }; static const symbol s_0[] = { 0xE0 }; static const symbol s_1[] = { 0xE8 }; static const symbol s_2[] = { 0xEC }; static const symbol s_3[] = { 0xF2 }; static const symbol s_4[] = { 0xF9 }; static const symbol s_5[] = { 'q', 'U' }; static const symbol s_6[] = { 'u' }; static const symbol s_7[] = { 'U' }; static const symbol s_8[] = { 'i' }; static const symbol s_9[] = { 'I' }; static const symbol s_10[] = { 'i' }; static const symbol s_11[] = { 'u' }; static const symbol s_12[] = { 'e' }; static const symbol s_13[] = { 'i', 'c' }; static const symbol s_14[] = { 'l', 'o', 'g' }; static const symbol s_15[] = { 'u' }; static const symbol s_16[] = { 'e', 'n', 't', 'e' }; static const symbol s_17[] = { 'a', 't' }; static const symbol s_18[] = { 'a', 't' }; static const symbol s_19[] = { 'i', 'c' }; static const symbol s_20[] = { 'i' }; static const symbol s_21[] = { 'h' }; static int r_prelude(struct SN_env * z) { int among_var; { int c_test = z->c; /* test, line 35 */ while(1) { /* repeat, line 35 */ int c1 = z->c; z->bra = z->c; /* [, line 36 */ among_var = find_among(z, a_0, 7); /* substring, line 36 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 36 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 37 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 38 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 39 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 40 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 41 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_5); /* <-, line 42 */ if (ret < 0) return ret; } break; case 7: if (z->c >= z->l) goto lab0; z->c++; /* next, line 43 */ break; } continue; lab0: z->c = c1; break; } z->c = c_test; } while(1) { /* repeat, line 46 */ int c2 = z->c; while(1) { /* goto, line 46 */ int c3 = z->c; if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; z->bra = z->c; /* [, line 47 */ { int c4 = z->c; /* or, line 47 */ if (!(eq_s(z, 1, s_6))) goto lab4; z->ket = z->c; /* ], line 47 */ if (in_grouping(z, g_v, 97, 249, 0)) goto lab4; { int ret = slice_from_s(z, 1, s_7); /* <-, line 47 */ if (ret < 0) return ret; } goto lab3; lab4: z->c = c4; if (!(eq_s(z, 1, s_8))) goto lab2; z->ket = z->c; /* ], line 48 */ if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; { int ret = slice_from_s(z, 1, s_9); /* <-, line 48 */ if (ret < 0) return ret; } } lab3: z->c = c3; break; lab2: z->c = c3; if (z->c >= z->l) goto lab1; z->c++; /* goto, line 46 */ } continue; lab1: z->c = c2; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 58 */ { int c2 = z->c; /* or, line 60 */ if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; { int c3 = z->c; /* or, line 59 */ if (out_grouping(z, g_v, 97, 249, 0)) goto lab4; { /* gopast */ /* grouping v, line 59 */ int ret = out_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; { /* gopast */ /* non v, line 59 */ int ret = in_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping(z, g_v, 97, 249, 0)) goto lab0; { int c4 = z->c; /* or, line 61 */ if (out_grouping(z, g_v, 97, 249, 0)) goto lab6; { /* gopast */ /* grouping v, line 61 */ int ret = out_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping(z, g_v, 97, 249, 0)) goto lab0; if (z->c >= z->l) goto lab0; z->c++; /* next, line 61 */ } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 62 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 64 */ { /* gopast */ /* grouping v, line 65 */ int ret = out_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 65 */ int ret = in_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 65 */ { /* gopast */ /* grouping v, line 66 */ int ret = out_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 66 */ int ret = in_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 66 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 70 */ int c1 = z->c; z->bra = z->c; /* [, line 72 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 72 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 72 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_10); /* <-, line 73 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_11); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 75 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_attached_pronoun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 87 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33314 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 37))) return 0; /* substring, line 87 */ z->bra = z->c; /* ], line 87 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; among_var = find_among_b(z, a_3, 5); /* among, line 97 */ if (!(among_var)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 97 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 98 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 99 */ if (ret < 0) return ret; } break; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 104 */ among_var = find_among_b(z, a_6, 51); /* substring, line 104 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 104 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 111 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 111 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 113 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ z->ket = z->c; /* [, line 114 */ if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 114 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 114 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 114 */ if (ret < 0) return ret; } lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_14); /* <-, line 117 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 119 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_15); /* <-, line 119 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 121 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_16); /* <-, line 121 */ if (ret < 0) return ret; } break; case 6: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 123 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } break; case 7: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 125 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 126 */ z->ket = z->c; /* [, line 127 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4722696 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_4, 4); /* substring, line 127 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 127 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 127 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: z->ket = z->c; /* [, line 128 */ if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 128 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 128 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 128 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ z->ket = z->c; /* [, line 136 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_5, 3); /* substring, line 136 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 136 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 137 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 137 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 9: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 143 */ z->ket = z->c; /* [, line 143 */ if (!(eq_s_b(z, 2, s_18))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 143 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 143 */ if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 143 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } lab3: ; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 148 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 148 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 149 */ among_var = find_among_b(z, a_7, 87); /* substring, line 149 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 149 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_vowel_suffix(struct SN_env * z) { { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 171 */ z->ket = z->c; /* [, line 172 */ if (in_grouping_b(z, g_AEIO, 97, 242, 0)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 172 */ { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 172 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 172 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 173 */ if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 173 */ { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 173 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 173 */ if (ret < 0) return ret; } lab0: ; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 175 */ z->ket = z->c; /* [, line 176 */ if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 176 */ if (in_grouping_b(z, g_CG, 99, 103, 0)) { z->c = z->l - m_keep; goto lab1; } { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call RV, line 176 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } lab1: ; } return 1; } extern int italian_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 182 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 182 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 183 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 183 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 184 */ { int m3 = z->l - z->c; (void)m3; /* do, line 185 */ { int ret = r_attached_pronoun(z); if (ret == 0) goto lab2; /* call attached_pronoun, line 185 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 186 */ { int m5 = z->l - z->c; (void)m5; /* or, line 186 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab5; /* call standard_suffix, line 186 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = z->l - m5; { int ret = r_verb_suffix(z); if (ret == 0) goto lab3; /* call verb_suffix, line 186 */ if (ret < 0) return ret; } } lab4: lab3: z->c = z->l - m4; } { int m6 = z->l - z->c; (void)m6; /* do, line 187 */ { int ret = r_vowel_suffix(z); if (ret == 0) goto lab6; /* call vowel_suffix, line 187 */ if (ret < 0) return ret; } lab6: z->c = z->l - m6; } z->c = z->lb; { int c7 = z->c; /* do, line 189 */ { int ret = r_postlude(z); if (ret == 0) goto lab7; /* call postlude, line 189 */ if (ret < 0) return ret; } lab7: z->c = c7; } return 1; } extern struct SN_env * italian_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void italian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_italian.h000066400000000000000000000005101217574114600313660ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * italian_ISO_8859_1_create_env(void); extern void italian_ISO_8859_1_close_env(struct SN_env * z); extern int italian_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c000066400000000000000000000234031217574114600317370ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int norwegian_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * norwegian_ISO_8859_1_create_env(void); extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'a' }; static const symbol s_0_1[1] = { 'e' }; static const symbol s_0_2[3] = { 'e', 'd', 'e' }; static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_0_5[3] = { 'a', 'n', 'e' }; static const symbol s_0_6[3] = { 'e', 'n', 'e' }; static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; static const symbol s_0_9[2] = { 'e', 'n' }; static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; static const symbol s_0_11[2] = { 'a', 'r' }; static const symbol s_0_12[2] = { 'e', 'r' }; static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; static const symbol s_0_14[1] = { 's' }; static const symbol s_0_15[2] = { 'a', 's' }; static const symbol s_0_16[2] = { 'e', 's' }; static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; static const symbol s_0_21[3] = { 'e', 'n', 's' }; static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; static const symbol s_0_23[3] = { 'e', 'r', 's' }; static const symbol s_0_24[3] = { 'e', 't', 's' }; static const symbol s_0_25[2] = { 'e', 't' }; static const symbol s_0_26[3] = { 'h', 'e', 't' }; static const symbol s_0_27[3] = { 'e', 'r', 't' }; static const symbol s_0_28[3] = { 'a', 's', 't' }; static const struct among a_0[29] = { /* 0 */ { 1, s_0_0, -1, 1, 0}, /* 1 */ { 1, s_0_1, -1, 1, 0}, /* 2 */ { 3, s_0_2, 1, 1, 0}, /* 3 */ { 4, s_0_3, 1, 1, 0}, /* 4 */ { 4, s_0_4, 1, 1, 0}, /* 5 */ { 3, s_0_5, 1, 1, 0}, /* 6 */ { 3, s_0_6, 1, 1, 0}, /* 7 */ { 6, s_0_7, 6, 1, 0}, /* 8 */ { 4, s_0_8, 1, 3, 0}, /* 9 */ { 2, s_0_9, -1, 1, 0}, /* 10 */ { 5, s_0_10, 9, 1, 0}, /* 11 */ { 2, s_0_11, -1, 1, 0}, /* 12 */ { 2, s_0_12, -1, 1, 0}, /* 13 */ { 5, s_0_13, 12, 1, 0}, /* 14 */ { 1, s_0_14, -1, 2, 0}, /* 15 */ { 2, s_0_15, 14, 1, 0}, /* 16 */ { 2, s_0_16, 14, 1, 0}, /* 17 */ { 4, s_0_17, 16, 1, 0}, /* 18 */ { 5, s_0_18, 16, 1, 0}, /* 19 */ { 4, s_0_19, 16, 1, 0}, /* 20 */ { 7, s_0_20, 19, 1, 0}, /* 21 */ { 3, s_0_21, 14, 1, 0}, /* 22 */ { 6, s_0_22, 21, 1, 0}, /* 23 */ { 3, s_0_23, 14, 1, 0}, /* 24 */ { 3, s_0_24, 14, 1, 0}, /* 25 */ { 2, s_0_25, -1, 1, 0}, /* 26 */ { 3, s_0_26, 25, 1, 0}, /* 27 */ { 3, s_0_27, -1, 3, 0}, /* 28 */ { 3, s_0_28, -1, 1, 0} }; static const symbol s_1_0[2] = { 'd', 't' }; static const symbol s_1_1[2] = { 'v', 't' }; static const struct among a_1[2] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0} }; static const symbol s_2_0[3] = { 'l', 'e', 'g' }; static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; static const symbol s_2_2[2] = { 'i', 'g' }; static const symbol s_2_3[3] = { 'e', 'i', 'g' }; static const symbol s_2_4[3] = { 'l', 'i', 'g' }; static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; static const symbol s_2_6[3] = { 'e', 'l', 's' }; static const symbol s_2_7[3] = { 'l', 'o', 'v' }; static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; static const struct among a_2[11] = { /* 0 */ { 3, s_2_0, -1, 1, 0}, /* 1 */ { 4, s_2_1, 0, 1, 0}, /* 2 */ { 2, s_2_2, -1, 1, 0}, /* 3 */ { 3, s_2_3, 2, 1, 0}, /* 4 */ { 3, s_2_4, 2, 1, 0}, /* 5 */ { 4, s_2_5, 4, 1, 0}, /* 6 */ { 3, s_2_6, -1, 1, 0}, /* 7 */ { 3, s_2_7, -1, 1, 0}, /* 8 */ { 4, s_2_8, 7, 1, 0}, /* 9 */ { 4, s_2_9, 7, 1, 0}, /* 10 */ { 7, s_2_10, 9, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; static const symbol s_0[] = { 'k' }; static const symbol s_1[] = { 'e', 'r' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 30 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) return 0; z->c = ret; /* hop, line 30 */ } z->I[1] = z->c; /* setmark x, line 30 */ z->c = c_test; } if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ { /* gopast */ /* non v, line 31 */ int ret = in_grouping(z, g_v, 97, 248, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 31 */ /* try, line 32 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 38 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 38 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 38 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 38 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 2: { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ if (in_grouping_b(z, g_s_ending, 98, 122, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_0))) return 0; if (out_grouping_b(z, g_v, 97, 248, 0)) return 0; } lab0: { int ret = slice_del(z); /* delete, line 46 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 53 */ { int mlimit; /* setlimit, line 54 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 54 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 54 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ z->bra = z->c; /* ], line 54 */ z->lb = mlimit; } z->c = z->l - m_test; } if (z->c <= z->lb) return 0; z->c--; /* next, line 59 */ z->bra = z->c; /* ], line 59 */ { int ret = slice_del(z); /* delete, line 59 */ if (ret < 0) return ret; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 63 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 63 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 63 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 63 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 67 */ if (ret < 0) return ret; } break; } return 1; } extern int norwegian_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 74 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 74 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 75 */ { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 76 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 78 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } z->c = z->lb; return 1; } extern struct SN_env * norwegian_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } extern void norwegian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h000066400000000000000000000005161217574114600317440ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * norwegian_ISO_8859_1_create_env(void); extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); extern int norwegian_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_porter.c000066400000000000000000000605561217574114600312730ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int porter_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_Step_5b(struct SN_env * z); static int r_Step_5a(struct SN_env * z); static int r_Step_4(struct SN_env * z); static int r_Step_3(struct SN_env * z); static int r_Step_2(struct SN_env * z); static int r_Step_1c(struct SN_env * z); static int r_Step_1b(struct SN_env * z); static int r_Step_1a(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_shortv(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * porter_ISO_8859_1_create_env(void); extern void porter_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 's' }; static const symbol s_0_1[3] = { 'i', 'e', 's' }; static const symbol s_0_2[4] = { 's', 's', 'e', 's' }; static const symbol s_0_3[2] = { 's', 's' }; static const struct among a_0[4] = { /* 0 */ { 1, s_0_0, -1, 3, 0}, /* 1 */ { 3, s_0_1, 0, 2, 0}, /* 2 */ { 4, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, -1, 0} }; static const symbol s_1_1[2] = { 'b', 'b' }; static const symbol s_1_2[2] = { 'd', 'd' }; static const symbol s_1_3[2] = { 'f', 'f' }; static const symbol s_1_4[2] = { 'g', 'g' }; static const symbol s_1_5[2] = { 'b', 'l' }; static const symbol s_1_6[2] = { 'm', 'm' }; static const symbol s_1_7[2] = { 'n', 'n' }; static const symbol s_1_8[2] = { 'p', 'p' }; static const symbol s_1_9[2] = { 'r', 'r' }; static const symbol s_1_10[2] = { 'a', 't' }; static const symbol s_1_11[2] = { 't', 't' }; static const symbol s_1_12[2] = { 'i', 'z' }; static const struct among a_1[13] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_1_1, 0, 2, 0}, /* 2 */ { 2, s_1_2, 0, 2, 0}, /* 3 */ { 2, s_1_3, 0, 2, 0}, /* 4 */ { 2, s_1_4, 0, 2, 0}, /* 5 */ { 2, s_1_5, 0, 1, 0}, /* 6 */ { 2, s_1_6, 0, 2, 0}, /* 7 */ { 2, s_1_7, 0, 2, 0}, /* 8 */ { 2, s_1_8, 0, 2, 0}, /* 9 */ { 2, s_1_9, 0, 2, 0}, /* 10 */ { 2, s_1_10, 0, 1, 0}, /* 11 */ { 2, s_1_11, 0, 2, 0}, /* 12 */ { 2, s_1_12, 0, 1, 0} }; static const symbol s_2_0[2] = { 'e', 'd' }; static const symbol s_2_1[3] = { 'e', 'e', 'd' }; static const symbol s_2_2[3] = { 'i', 'n', 'g' }; static const struct among a_2[3] = { /* 0 */ { 2, s_2_0, -1, 2, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 3, s_2_2, -1, 2, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 'c', 'i' }; static const symbol s_3_1[4] = { 'e', 'n', 'c', 'i' }; static const symbol s_3_2[4] = { 'a', 'b', 'l', 'i' }; static const symbol s_3_3[3] = { 'e', 'l', 'i' }; static const symbol s_3_4[4] = { 'a', 'l', 'l', 'i' }; static const symbol s_3_5[5] = { 'o', 'u', 's', 'l', 'i' }; static const symbol s_3_6[5] = { 'e', 'n', 't', 'l', 'i' }; static const symbol s_3_7[5] = { 'a', 'l', 'i', 't', 'i' }; static const symbol s_3_8[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; static const symbol s_3_9[5] = { 'i', 'v', 'i', 't', 'i' }; static const symbol s_3_10[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_3_11[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_3_12[5] = { 'a', 'l', 'i', 's', 'm' }; static const symbol s_3_13[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_3_14[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; static const symbol s_3_15[4] = { 'i', 'z', 'e', 'r' }; static const symbol s_3_16[4] = { 'a', 't', 'o', 'r' }; static const symbol s_3_17[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; static const symbol s_3_18[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; static const symbol s_3_19[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; static const struct among a_3[20] = { /* 0 */ { 4, s_3_0, -1, 3, 0}, /* 1 */ { 4, s_3_1, -1, 2, 0}, /* 2 */ { 4, s_3_2, -1, 4, 0}, /* 3 */ { 3, s_3_3, -1, 6, 0}, /* 4 */ { 4, s_3_4, -1, 9, 0}, /* 5 */ { 5, s_3_5, -1, 12, 0}, /* 6 */ { 5, s_3_6, -1, 5, 0}, /* 7 */ { 5, s_3_7, -1, 10, 0}, /* 8 */ { 6, s_3_8, -1, 14, 0}, /* 9 */ { 5, s_3_9, -1, 13, 0}, /* 10 */ { 6, s_3_10, -1, 1, 0}, /* 11 */ { 7, s_3_11, 10, 8, 0}, /* 12 */ { 5, s_3_12, -1, 10, 0}, /* 13 */ { 5, s_3_13, -1, 8, 0}, /* 14 */ { 7, s_3_14, 13, 7, 0}, /* 15 */ { 4, s_3_15, -1, 7, 0}, /* 16 */ { 4, s_3_16, -1, 8, 0}, /* 17 */ { 7, s_3_17, -1, 13, 0}, /* 18 */ { 7, s_3_18, -1, 11, 0}, /* 19 */ { 7, s_3_19, -1, 12, 0} }; static const symbol s_4_0[5] = { 'i', 'c', 'a', 't', 'e' }; static const symbol s_4_1[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_4_2[5] = { 'a', 'l', 'i', 'z', 'e' }; static const symbol s_4_3[5] = { 'i', 'c', 'i', 't', 'i' }; static const symbol s_4_4[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_4_5[3] = { 'f', 'u', 'l' }; static const symbol s_4_6[4] = { 'n', 'e', 's', 's' }; static const struct among a_4[7] = { /* 0 */ { 5, s_4_0, -1, 2, 0}, /* 1 */ { 5, s_4_1, -1, 3, 0}, /* 2 */ { 5, s_4_2, -1, 1, 0}, /* 3 */ { 5, s_4_3, -1, 2, 0}, /* 4 */ { 4, s_4_4, -1, 2, 0}, /* 5 */ { 3, s_4_5, -1, 3, 0}, /* 6 */ { 4, s_4_6, -1, 3, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_5_2[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_5_3[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_5_4[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_5_5[3] = { 'a', 't', 'e' }; static const symbol s_5_6[3] = { 'i', 'v', 'e' }; static const symbol s_5_7[3] = { 'i', 'z', 'e' }; static const symbol s_5_8[3] = { 'i', 't', 'i' }; static const symbol s_5_9[2] = { 'a', 'l' }; static const symbol s_5_10[3] = { 'i', 's', 'm' }; static const symbol s_5_11[3] = { 'i', 'o', 'n' }; static const symbol s_5_12[2] = { 'e', 'r' }; static const symbol s_5_13[3] = { 'o', 'u', 's' }; static const symbol s_5_14[3] = { 'a', 'n', 't' }; static const symbol s_5_15[3] = { 'e', 'n', 't' }; static const symbol s_5_16[4] = { 'm', 'e', 'n', 't' }; static const symbol s_5_17[5] = { 'e', 'm', 'e', 'n', 't' }; static const symbol s_5_18[2] = { 'o', 'u' }; static const struct among a_5[19] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 4, s_5_2, -1, 1, 0}, /* 3 */ { 4, s_5_3, -1, 1, 0}, /* 4 */ { 4, s_5_4, -1, 1, 0}, /* 5 */ { 3, s_5_5, -1, 1, 0}, /* 6 */ { 3, s_5_6, -1, 1, 0}, /* 7 */ { 3, s_5_7, -1, 1, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 2, s_5_9, -1, 1, 0}, /* 10 */ { 3, s_5_10, -1, 1, 0}, /* 11 */ { 3, s_5_11, -1, 2, 0}, /* 12 */ { 2, s_5_12, -1, 1, 0}, /* 13 */ { 3, s_5_13, -1, 1, 0}, /* 14 */ { 3, s_5_14, -1, 1, 0}, /* 15 */ { 3, s_5_15, -1, 1, 0}, /* 16 */ { 4, s_5_16, 15, 1, 0}, /* 17 */ { 5, s_5_17, 16, 1, 0}, /* 18 */ { 2, s_5_18, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1 }; static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; static const symbol s_0[] = { 's', 's' }; static const symbol s_1[] = { 'i' }; static const symbol s_2[] = { 'e', 'e' }; static const symbol s_3[] = { 'e' }; static const symbol s_4[] = { 'e' }; static const symbol s_5[] = { 'y' }; static const symbol s_6[] = { 'Y' }; static const symbol s_7[] = { 'i' }; static const symbol s_8[] = { 't', 'i', 'o', 'n' }; static const symbol s_9[] = { 'e', 'n', 'c', 'e' }; static const symbol s_10[] = { 'a', 'n', 'c', 'e' }; static const symbol s_11[] = { 'a', 'b', 'l', 'e' }; static const symbol s_12[] = { 'e', 'n', 't' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'i', 'z', 'e' }; static const symbol s_15[] = { 'a', 't', 'e' }; static const symbol s_16[] = { 'a', 'l' }; static const symbol s_17[] = { 'a', 'l' }; static const symbol s_18[] = { 'f', 'u', 'l' }; static const symbol s_19[] = { 'o', 'u', 's' }; static const symbol s_20[] = { 'i', 'v', 'e' }; static const symbol s_21[] = { 'b', 'l', 'e' }; static const symbol s_22[] = { 'a', 'l' }; static const symbol s_23[] = { 'i', 'c' }; static const symbol s_24[] = { 's' }; static const symbol s_25[] = { 't' }; static const symbol s_26[] = { 'e' }; static const symbol s_27[] = { 'l' }; static const symbol s_28[] = { 'l' }; static const symbol s_29[] = { 'y' }; static const symbol s_30[] = { 'Y' }; static const symbol s_31[] = { 'y' }; static const symbol s_32[] = { 'Y' }; static const symbol s_33[] = { 'Y' }; static const symbol s_34[] = { 'y' }; static int r_shortv(struct SN_env * z) { if (out_grouping_b(z, g_v_WXY, 89, 121, 0)) return 0; if (in_grouping_b(z, g_v, 97, 121, 0)) return 0; if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_Step_1a(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 25 */ if (z->c <= z->lb || z->p[z->c - 1] != 115) return 0; among_var = find_among_b(z, a_0, 4); /* substring, line 25 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 25 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 26 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 27 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 29 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_1b(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 34 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; among_var = find_among_b(z, a_2, 3); /* substring, line 34 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 34 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 35 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_2); /* <-, line 35 */ if (ret < 0) return ret; } break; case 2: { int m_test = z->l - z->c; /* test, line 38 */ { /* gopast */ /* grouping v, line 38 */ int ret = out_grouping_b(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 38 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 39 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else among_var = find_among_b(z, a_1, 13); /* substring, line 39 */ if (!(among_var)) return 0; z->c = z->l - m_test; } switch(among_var) { case 0: return 0; case 1: { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_3); /* <+, line 41 */ z->c = c_keep; if (ret < 0) return ret; } break; case 2: z->ket = z->c; /* [, line 44 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 44 */ z->bra = z->c; /* ], line 44 */ { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 3: if (z->c != z->I[0]) return 0; /* atmark, line 45 */ { int m_test = z->l - z->c; /* test, line 45 */ { int ret = r_shortv(z); if (ret == 0) return 0; /* call shortv, line 45 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_4); /* <+, line 45 */ z->c = c_keep; if (ret < 0) return ret; } break; } break; } return 1; } static int r_Step_1c(struct SN_env * z) { z->ket = z->c; /* [, line 52 */ { int m1 = z->l - z->c; (void)m1; /* or, line 52 */ if (!(eq_s_b(z, 1, s_5))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_6))) return 0; } lab0: z->bra = z->c; /* ], line 52 */ { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping_b(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } { int ret = slice_from_s(z, 1, s_7); /* <-, line 54 */ if (ret < 0) return ret; } return 1; } static int r_Step_2(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 58 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_3, 20); /* substring, line 58 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 58 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 58 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_8); /* <-, line 59 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_9); /* <-, line 60 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_10); /* <-, line 61 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 4, s_11); /* <-, line 62 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_12); /* <-, line 63 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_13); /* <-, line 64 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_14); /* <-, line 66 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 3, s_15); /* <-, line 68 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 2, s_16); /* <-, line 69 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 2, s_17); /* <-, line 71 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 3, s_18); /* <-, line 72 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 3, s_19); /* <-, line 74 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_from_s(z, 3, s_20); /* <-, line 76 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 3, s_21); /* <-, line 77 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_3(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 82 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 7); /* substring, line 82 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 82 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 82 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_22); /* <-, line 83 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_23); /* <-, line 85 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 87 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_4(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 92 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3961384 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 19); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 92 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 95 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 96 */ if (!(eq_s_b(z, 1, s_24))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_25))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_5a(struct SN_env * z) { z->ket = z->c; /* [, line 101 */ if (!(eq_s_b(z, 1, s_26))) return 0; z->bra = z->c; /* ], line 101 */ { int m1 = z->l - z->c; (void)m1; /* or, line 102 */ { int ret = r_R2(z); if (ret == 0) goto lab1; /* call R2, line 102 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 102 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ { int ret = r_shortv(z); if (ret == 0) goto lab2; /* call shortv, line 102 */ if (ret < 0) return ret; } return 0; lab2: z->c = z->l - m2; } } lab0: { int ret = slice_del(z); /* delete, line 103 */ if (ret < 0) return ret; } return 1; } static int r_Step_5b(struct SN_env * z) { z->ket = z->c; /* [, line 107 */ if (!(eq_s_b(z, 1, s_27))) return 0; z->bra = z->c; /* ], line 107 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 108 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_28))) return 0; { int ret = slice_del(z); /* delete, line 109 */ if (ret < 0) return ret; } return 1; } extern int porter_ISO_8859_1_stem(struct SN_env * z) { z->B[0] = 0; /* unset Y_found, line 115 */ { int c1 = z->c; /* do, line 116 */ z->bra = z->c; /* [, line 116 */ if (!(eq_s(z, 1, s_29))) goto lab0; z->ket = z->c; /* ], line 116 */ { int ret = slice_from_s(z, 1, s_30); /* <-, line 116 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 116 */ lab0: z->c = c1; } { int c2 = z->c; /* do, line 117 */ while(1) { /* repeat, line 117 */ int c3 = z->c; while(1) { /* goto, line 117 */ int c4 = z->c; if (in_grouping(z, g_v, 97, 121, 0)) goto lab3; z->bra = z->c; /* [, line 117 */ if (!(eq_s(z, 1, s_31))) goto lab3; z->ket = z->c; /* ], line 117 */ z->c = c4; break; lab3: z->c = c4; if (z->c >= z->l) goto lab2; z->c++; /* goto, line 117 */ } { int ret = slice_from_s(z, 1, s_32); /* <-, line 117 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 117 */ continue; lab2: z->c = c3; break; } z->c = c2; } z->I[0] = z->l; z->I[1] = z->l; { int c5 = z->c; /* do, line 121 */ { /* gopast */ /* grouping v, line 122 */ int ret = out_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 122 */ int ret = in_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 122 */ { /* gopast */ /* grouping v, line 123 */ int ret = out_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 123 */ int ret = in_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 123 */ lab4: z->c = c5; } z->lb = z->c; z->c = z->l; /* backwards, line 126 */ { int m6 = z->l - z->c; (void)m6; /* do, line 127 */ { int ret = r_Step_1a(z); if (ret == 0) goto lab5; /* call Step_1a, line 127 */ if (ret < 0) return ret; } lab5: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 128 */ { int ret = r_Step_1b(z); if (ret == 0) goto lab6; /* call Step_1b, line 128 */ if (ret < 0) return ret; } lab6: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 129 */ { int ret = r_Step_1c(z); if (ret == 0) goto lab7; /* call Step_1c, line 129 */ if (ret < 0) return ret; } lab7: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 130 */ { int ret = r_Step_2(z); if (ret == 0) goto lab8; /* call Step_2, line 130 */ if (ret < 0) return ret; } lab8: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 131 */ { int ret = r_Step_3(z); if (ret == 0) goto lab9; /* call Step_3, line 131 */ if (ret < 0) return ret; } lab9: z->c = z->l - m10; } { int m11 = z->l - z->c; (void)m11; /* do, line 132 */ { int ret = r_Step_4(z); if (ret == 0) goto lab10; /* call Step_4, line 132 */ if (ret < 0) return ret; } lab10: z->c = z->l - m11; } { int m12 = z->l - z->c; (void)m12; /* do, line 133 */ { int ret = r_Step_5a(z); if (ret == 0) goto lab11; /* call Step_5a, line 133 */ if (ret < 0) return ret; } lab11: z->c = z->l - m12; } { int m13 = z->l - z->c; (void)m13; /* do, line 134 */ { int ret = r_Step_5b(z); if (ret == 0) goto lab12; /* call Step_5b, line 134 */ if (ret < 0) return ret; } lab12: z->c = z->l - m13; } z->c = z->lb; { int c14 = z->c; /* do, line 137 */ if (!(z->B[0])) goto lab13; /* Boolean test Y_found, line 137 */ while(1) { /* repeat, line 137 */ int c15 = z->c; while(1) { /* goto, line 137 */ int c16 = z->c; z->bra = z->c; /* [, line 137 */ if (!(eq_s(z, 1, s_33))) goto lab15; z->ket = z->c; /* ], line 137 */ z->c = c16; break; lab15: z->c = c16; if (z->c >= z->l) goto lab14; z->c++; /* goto, line 137 */ } { int ret = slice_from_s(z, 1, s_34); /* <-, line 137 */ if (ret < 0) return ret; } continue; lab14: z->c = c15; break; } lab13: z->c = c14; } return 1; } extern struct SN_env * porter_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } extern void porter_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_porter.h000066400000000000000000000005051217574114600312640ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * porter_ISO_8859_1_create_env(void); extern void porter_ISO_8859_1_close_env(struct SN_env * z); extern int porter_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c000066400000000000000000001127461217574114600321610ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int portuguese_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_residual_form(struct SN_env * z); static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * portuguese_ISO_8859_1_create_env(void); extern void portuguese_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 0xE3 }; static const symbol s_0_2[1] = { 0xF5 }; static const struct among a_0[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 2, 0} }; static const symbol s_1_1[2] = { 'a', '~' }; static const symbol s_1_2[2] = { 'o', '~' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_1_1, 0, 1, 0}, /* 2 */ { 2, s_1_2, 0, 2, 0} }; static const symbol s_2_0[2] = { 'i', 'c' }; static const symbol s_2_1[2] = { 'a', 'd' }; static const symbol s_2_2[2] = { 'o', 's' }; static const symbol s_2_3[2] = { 'i', 'v' }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, 1, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 't', 'e' }; static const symbol s_3_1[4] = { 'a', 'v', 'e', 'l' }; static const symbol s_3_2[4] = { 0xED, 'v', 'e', 'l' }; static const struct among a_3[3] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 4, s_3_2, -1, 1, 0} }; static const symbol s_4_0[2] = { 'i', 'c' }; static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_4_2[2] = { 'i', 'v' }; static const struct among a_4[3] = { /* 0 */ { 2, s_4_0, -1, 1, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 2, s_4_2, -1, 1, 0} }; static const symbol s_5_0[3] = { 'i', 'c', 'a' }; static const symbol s_5_1[5] = { 0xE2, 'n', 'c', 'i', 'a' }; static const symbol s_5_2[5] = { 0xEA, 'n', 'c', 'i', 'a' }; static const symbol s_5_3[3] = { 'i', 'r', 'a' }; static const symbol s_5_4[5] = { 'a', 'd', 'o', 'r', 'a' }; static const symbol s_5_5[3] = { 'o', 's', 'a' }; static const symbol s_5_6[4] = { 'i', 's', 't', 'a' }; static const symbol s_5_7[3] = { 'i', 'v', 'a' }; static const symbol s_5_8[3] = { 'e', 'z', 'a' }; static const symbol s_5_9[5] = { 'l', 'o', 'g', 0xED, 'a' }; static const symbol s_5_10[5] = { 'i', 'd', 'a', 'd', 'e' }; static const symbol s_5_11[4] = { 'a', 'n', 't', 'e' }; static const symbol s_5_12[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_5_13[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_5_14[4] = { 0xE1, 'v', 'e', 'l' }; static const symbol s_5_15[4] = { 0xED, 'v', 'e', 'l' }; static const symbol s_5_16[5] = { 'u', 'c', 'i', 0xF3, 'n' }; static const symbol s_5_17[3] = { 'i', 'c', 'o' }; static const symbol s_5_18[4] = { 'i', 's', 'm', 'o' }; static const symbol s_5_19[3] = { 'o', 's', 'o' }; static const symbol s_5_20[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; static const symbol s_5_21[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; static const symbol s_5_22[3] = { 'i', 'v', 'o' }; static const symbol s_5_23[5] = { 'a', 0xE7, 'a', '~', 'o' }; static const symbol s_5_24[4] = { 'a', 'd', 'o', 'r' }; static const symbol s_5_25[4] = { 'i', 'c', 'a', 's' }; static const symbol s_5_26[6] = { 0xEA, 'n', 'c', 'i', 'a', 's' }; static const symbol s_5_27[4] = { 'i', 'r', 'a', 's' }; static const symbol s_5_28[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; static const symbol s_5_29[4] = { 'o', 's', 'a', 's' }; static const symbol s_5_30[5] = { 'i', 's', 't', 'a', 's' }; static const symbol s_5_31[4] = { 'i', 'v', 'a', 's' }; static const symbol s_5_32[4] = { 'e', 'z', 'a', 's' }; static const symbol s_5_33[6] = { 'l', 'o', 'g', 0xED, 'a', 's' }; static const symbol s_5_34[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; static const symbol s_5_35[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_5_36[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; static const symbol s_5_37[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_5_38[6] = { 'a', 0xE7, 'o', '~', 'e', 's' }; static const symbol s_5_39[4] = { 'i', 'c', 'o', 's' }; static const symbol s_5_40[5] = { 'i', 's', 'm', 'o', 's' }; static const symbol s_5_41[4] = { 'o', 's', 'o', 's' }; static const symbol s_5_42[7] = { 'a', 'm', 'e', 'n', 't', 'o', 's' }; static const symbol s_5_43[7] = { 'i', 'm', 'e', 'n', 't', 'o', 's' }; static const symbol s_5_44[4] = { 'i', 'v', 'o', 's' }; static const struct among a_5[45] = { /* 0 */ { 3, s_5_0, -1, 1, 0}, /* 1 */ { 5, s_5_1, -1, 1, 0}, /* 2 */ { 5, s_5_2, -1, 4, 0}, /* 3 */ { 3, s_5_3, -1, 9, 0}, /* 4 */ { 5, s_5_4, -1, 1, 0}, /* 5 */ { 3, s_5_5, -1, 1, 0}, /* 6 */ { 4, s_5_6, -1, 1, 0}, /* 7 */ { 3, s_5_7, -1, 8, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 5, s_5_9, -1, 2, 0}, /* 10 */ { 5, s_5_10, -1, 7, 0}, /* 11 */ { 4, s_5_11, -1, 1, 0}, /* 12 */ { 5, s_5_12, -1, 6, 0}, /* 13 */ { 6, s_5_13, 12, 5, 0}, /* 14 */ { 4, s_5_14, -1, 1, 0}, /* 15 */ { 4, s_5_15, -1, 1, 0}, /* 16 */ { 5, s_5_16, -1, 3, 0}, /* 17 */ { 3, s_5_17, -1, 1, 0}, /* 18 */ { 4, s_5_18, -1, 1, 0}, /* 19 */ { 3, s_5_19, -1, 1, 0}, /* 20 */ { 6, s_5_20, -1, 1, 0}, /* 21 */ { 6, s_5_21, -1, 1, 0}, /* 22 */ { 3, s_5_22, -1, 8, 0}, /* 23 */ { 5, s_5_23, -1, 1, 0}, /* 24 */ { 4, s_5_24, -1, 1, 0}, /* 25 */ { 4, s_5_25, -1, 1, 0}, /* 26 */ { 6, s_5_26, -1, 4, 0}, /* 27 */ { 4, s_5_27, -1, 9, 0}, /* 28 */ { 6, s_5_28, -1, 1, 0}, /* 29 */ { 4, s_5_29, -1, 1, 0}, /* 30 */ { 5, s_5_30, -1, 1, 0}, /* 31 */ { 4, s_5_31, -1, 8, 0}, /* 32 */ { 4, s_5_32, -1, 1, 0}, /* 33 */ { 6, s_5_33, -1, 2, 0}, /* 34 */ { 6, s_5_34, -1, 7, 0}, /* 35 */ { 7, s_5_35, -1, 3, 0}, /* 36 */ { 6, s_5_36, -1, 1, 0}, /* 37 */ { 5, s_5_37, -1, 1, 0}, /* 38 */ { 6, s_5_38, -1, 1, 0}, /* 39 */ { 4, s_5_39, -1, 1, 0}, /* 40 */ { 5, s_5_40, -1, 1, 0}, /* 41 */ { 4, s_5_41, -1, 1, 0}, /* 42 */ { 7, s_5_42, -1, 1, 0}, /* 43 */ { 7, s_5_43, -1, 1, 0}, /* 44 */ { 4, s_5_44, -1, 8, 0} }; static const symbol s_6_0[3] = { 'a', 'd', 'a' }; static const symbol s_6_1[3] = { 'i', 'd', 'a' }; static const symbol s_6_2[2] = { 'i', 'a' }; static const symbol s_6_3[4] = { 'a', 'r', 'i', 'a' }; static const symbol s_6_4[4] = { 'e', 'r', 'i', 'a' }; static const symbol s_6_5[4] = { 'i', 'r', 'i', 'a' }; static const symbol s_6_6[3] = { 'a', 'r', 'a' }; static const symbol s_6_7[3] = { 'e', 'r', 'a' }; static const symbol s_6_8[3] = { 'i', 'r', 'a' }; static const symbol s_6_9[3] = { 'a', 'v', 'a' }; static const symbol s_6_10[4] = { 'a', 's', 's', 'e' }; static const symbol s_6_11[4] = { 'e', 's', 's', 'e' }; static const symbol s_6_12[4] = { 'i', 's', 's', 'e' }; static const symbol s_6_13[4] = { 'a', 's', 't', 'e' }; static const symbol s_6_14[4] = { 'e', 's', 't', 'e' }; static const symbol s_6_15[4] = { 'i', 's', 't', 'e' }; static const symbol s_6_16[2] = { 'e', 'i' }; static const symbol s_6_17[4] = { 'a', 'r', 'e', 'i' }; static const symbol s_6_18[4] = { 'e', 'r', 'e', 'i' }; static const symbol s_6_19[4] = { 'i', 'r', 'e', 'i' }; static const symbol s_6_20[2] = { 'a', 'm' }; static const symbol s_6_21[3] = { 'i', 'a', 'm' }; static const symbol s_6_22[5] = { 'a', 'r', 'i', 'a', 'm' }; static const symbol s_6_23[5] = { 'e', 'r', 'i', 'a', 'm' }; static const symbol s_6_24[5] = { 'i', 'r', 'i', 'a', 'm' }; static const symbol s_6_25[4] = { 'a', 'r', 'a', 'm' }; static const symbol s_6_26[4] = { 'e', 'r', 'a', 'm' }; static const symbol s_6_27[4] = { 'i', 'r', 'a', 'm' }; static const symbol s_6_28[4] = { 'a', 'v', 'a', 'm' }; static const symbol s_6_29[2] = { 'e', 'm' }; static const symbol s_6_30[4] = { 'a', 'r', 'e', 'm' }; static const symbol s_6_31[4] = { 'e', 'r', 'e', 'm' }; static const symbol s_6_32[4] = { 'i', 'r', 'e', 'm' }; static const symbol s_6_33[5] = { 'a', 's', 's', 'e', 'm' }; static const symbol s_6_34[5] = { 'e', 's', 's', 'e', 'm' }; static const symbol s_6_35[5] = { 'i', 's', 's', 'e', 'm' }; static const symbol s_6_36[3] = { 'a', 'd', 'o' }; static const symbol s_6_37[3] = { 'i', 'd', 'o' }; static const symbol s_6_38[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_6_39[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_6_40[4] = { 'i', 'n', 'd', 'o' }; static const symbol s_6_41[5] = { 'a', 'r', 'a', '~', 'o' }; static const symbol s_6_42[5] = { 'e', 'r', 'a', '~', 'o' }; static const symbol s_6_43[5] = { 'i', 'r', 'a', '~', 'o' }; static const symbol s_6_44[2] = { 'a', 'r' }; static const symbol s_6_45[2] = { 'e', 'r' }; static const symbol s_6_46[2] = { 'i', 'r' }; static const symbol s_6_47[2] = { 'a', 's' }; static const symbol s_6_48[4] = { 'a', 'd', 'a', 's' }; static const symbol s_6_49[4] = { 'i', 'd', 'a', 's' }; static const symbol s_6_50[3] = { 'i', 'a', 's' }; static const symbol s_6_51[5] = { 'a', 'r', 'i', 'a', 's' }; static const symbol s_6_52[5] = { 'e', 'r', 'i', 'a', 's' }; static const symbol s_6_53[5] = { 'i', 'r', 'i', 'a', 's' }; static const symbol s_6_54[4] = { 'a', 'r', 'a', 's' }; static const symbol s_6_55[4] = { 'e', 'r', 'a', 's' }; static const symbol s_6_56[4] = { 'i', 'r', 'a', 's' }; static const symbol s_6_57[4] = { 'a', 'v', 'a', 's' }; static const symbol s_6_58[2] = { 'e', 's' }; static const symbol s_6_59[5] = { 'a', 'r', 'd', 'e', 's' }; static const symbol s_6_60[5] = { 'e', 'r', 'd', 'e', 's' }; static const symbol s_6_61[5] = { 'i', 'r', 'd', 'e', 's' }; static const symbol s_6_62[4] = { 'a', 'r', 'e', 's' }; static const symbol s_6_63[4] = { 'e', 'r', 'e', 's' }; static const symbol s_6_64[4] = { 'i', 'r', 'e', 's' }; static const symbol s_6_65[5] = { 'a', 's', 's', 'e', 's' }; static const symbol s_6_66[5] = { 'e', 's', 's', 'e', 's' }; static const symbol s_6_67[5] = { 'i', 's', 's', 'e', 's' }; static const symbol s_6_68[5] = { 'a', 's', 't', 'e', 's' }; static const symbol s_6_69[5] = { 'e', 's', 't', 'e', 's' }; static const symbol s_6_70[5] = { 'i', 's', 't', 'e', 's' }; static const symbol s_6_71[2] = { 'i', 's' }; static const symbol s_6_72[3] = { 'a', 'i', 's' }; static const symbol s_6_73[3] = { 'e', 'i', 's' }; static const symbol s_6_74[5] = { 'a', 'r', 'e', 'i', 's' }; static const symbol s_6_75[5] = { 'e', 'r', 'e', 'i', 's' }; static const symbol s_6_76[5] = { 'i', 'r', 'e', 'i', 's' }; static const symbol s_6_77[5] = { 0xE1, 'r', 'e', 'i', 's' }; static const symbol s_6_78[5] = { 0xE9, 'r', 'e', 'i', 's' }; static const symbol s_6_79[5] = { 0xED, 'r', 'e', 'i', 's' }; static const symbol s_6_80[6] = { 0xE1, 's', 's', 'e', 'i', 's' }; static const symbol s_6_81[6] = { 0xE9, 's', 's', 'e', 'i', 's' }; static const symbol s_6_82[6] = { 0xED, 's', 's', 'e', 'i', 's' }; static const symbol s_6_83[5] = { 0xE1, 'v', 'e', 'i', 's' }; static const symbol s_6_84[4] = { 0xED, 'e', 'i', 's' }; static const symbol s_6_85[6] = { 'a', 'r', 0xED, 'e', 'i', 's' }; static const symbol s_6_86[6] = { 'e', 'r', 0xED, 'e', 'i', 's' }; static const symbol s_6_87[6] = { 'i', 'r', 0xED, 'e', 'i', 's' }; static const symbol s_6_88[4] = { 'a', 'd', 'o', 's' }; static const symbol s_6_89[4] = { 'i', 'd', 'o', 's' }; static const symbol s_6_90[4] = { 'a', 'm', 'o', 's' }; static const symbol s_6_91[6] = { 0xE1, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_92[6] = { 0xE9, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_93[6] = { 0xED, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_94[6] = { 0xE1, 'v', 'a', 'm', 'o', 's' }; static const symbol s_6_95[5] = { 0xED, 'a', 'm', 'o', 's' }; static const symbol s_6_96[7] = { 'a', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_6_97[7] = { 'e', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_6_98[7] = { 'i', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_6_99[4] = { 'e', 'm', 'o', 's' }; static const symbol s_6_100[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_101[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_102[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_103[7] = { 0xE1, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_104[7] = { 0xEA, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_105[7] = { 0xED, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_106[4] = { 'i', 'm', 'o', 's' }; static const symbol s_6_107[5] = { 'a', 'r', 'm', 'o', 's' }; static const symbol s_6_108[5] = { 'e', 'r', 'm', 'o', 's' }; static const symbol s_6_109[5] = { 'i', 'r', 'm', 'o', 's' }; static const symbol s_6_110[4] = { 0xE1, 'm', 'o', 's' }; static const symbol s_6_111[4] = { 'a', 'r', 0xE1, 's' }; static const symbol s_6_112[4] = { 'e', 'r', 0xE1, 's' }; static const symbol s_6_113[4] = { 'i', 'r', 0xE1, 's' }; static const symbol s_6_114[2] = { 'e', 'u' }; static const symbol s_6_115[2] = { 'i', 'u' }; static const symbol s_6_116[2] = { 'o', 'u' }; static const symbol s_6_117[3] = { 'a', 'r', 0xE1 }; static const symbol s_6_118[3] = { 'e', 'r', 0xE1 }; static const symbol s_6_119[3] = { 'i', 'r', 0xE1 }; static const struct among a_6[120] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 3, s_6_1, -1, 1, 0}, /* 2 */ { 2, s_6_2, -1, 1, 0}, /* 3 */ { 4, s_6_3, 2, 1, 0}, /* 4 */ { 4, s_6_4, 2, 1, 0}, /* 5 */ { 4, s_6_5, 2, 1, 0}, /* 6 */ { 3, s_6_6, -1, 1, 0}, /* 7 */ { 3, s_6_7, -1, 1, 0}, /* 8 */ { 3, s_6_8, -1, 1, 0}, /* 9 */ { 3, s_6_9, -1, 1, 0}, /* 10 */ { 4, s_6_10, -1, 1, 0}, /* 11 */ { 4, s_6_11, -1, 1, 0}, /* 12 */ { 4, s_6_12, -1, 1, 0}, /* 13 */ { 4, s_6_13, -1, 1, 0}, /* 14 */ { 4, s_6_14, -1, 1, 0}, /* 15 */ { 4, s_6_15, -1, 1, 0}, /* 16 */ { 2, s_6_16, -1, 1, 0}, /* 17 */ { 4, s_6_17, 16, 1, 0}, /* 18 */ { 4, s_6_18, 16, 1, 0}, /* 19 */ { 4, s_6_19, 16, 1, 0}, /* 20 */ { 2, s_6_20, -1, 1, 0}, /* 21 */ { 3, s_6_21, 20, 1, 0}, /* 22 */ { 5, s_6_22, 21, 1, 0}, /* 23 */ { 5, s_6_23, 21, 1, 0}, /* 24 */ { 5, s_6_24, 21, 1, 0}, /* 25 */ { 4, s_6_25, 20, 1, 0}, /* 26 */ { 4, s_6_26, 20, 1, 0}, /* 27 */ { 4, s_6_27, 20, 1, 0}, /* 28 */ { 4, s_6_28, 20, 1, 0}, /* 29 */ { 2, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, 29, 1, 0}, /* 31 */ { 4, s_6_31, 29, 1, 0}, /* 32 */ { 4, s_6_32, 29, 1, 0}, /* 33 */ { 5, s_6_33, 29, 1, 0}, /* 34 */ { 5, s_6_34, 29, 1, 0}, /* 35 */ { 5, s_6_35, 29, 1, 0}, /* 36 */ { 3, s_6_36, -1, 1, 0}, /* 37 */ { 3, s_6_37, -1, 1, 0}, /* 38 */ { 4, s_6_38, -1, 1, 0}, /* 39 */ { 4, s_6_39, -1, 1, 0}, /* 40 */ { 4, s_6_40, -1, 1, 0}, /* 41 */ { 5, s_6_41, -1, 1, 0}, /* 42 */ { 5, s_6_42, -1, 1, 0}, /* 43 */ { 5, s_6_43, -1, 1, 0}, /* 44 */ { 2, s_6_44, -1, 1, 0}, /* 45 */ { 2, s_6_45, -1, 1, 0}, /* 46 */ { 2, s_6_46, -1, 1, 0}, /* 47 */ { 2, s_6_47, -1, 1, 0}, /* 48 */ { 4, s_6_48, 47, 1, 0}, /* 49 */ { 4, s_6_49, 47, 1, 0}, /* 50 */ { 3, s_6_50, 47, 1, 0}, /* 51 */ { 5, s_6_51, 50, 1, 0}, /* 52 */ { 5, s_6_52, 50, 1, 0}, /* 53 */ { 5, s_6_53, 50, 1, 0}, /* 54 */ { 4, s_6_54, 47, 1, 0}, /* 55 */ { 4, s_6_55, 47, 1, 0}, /* 56 */ { 4, s_6_56, 47, 1, 0}, /* 57 */ { 4, s_6_57, 47, 1, 0}, /* 58 */ { 2, s_6_58, -1, 1, 0}, /* 59 */ { 5, s_6_59, 58, 1, 0}, /* 60 */ { 5, s_6_60, 58, 1, 0}, /* 61 */ { 5, s_6_61, 58, 1, 0}, /* 62 */ { 4, s_6_62, 58, 1, 0}, /* 63 */ { 4, s_6_63, 58, 1, 0}, /* 64 */ { 4, s_6_64, 58, 1, 0}, /* 65 */ { 5, s_6_65, 58, 1, 0}, /* 66 */ { 5, s_6_66, 58, 1, 0}, /* 67 */ { 5, s_6_67, 58, 1, 0}, /* 68 */ { 5, s_6_68, 58, 1, 0}, /* 69 */ { 5, s_6_69, 58, 1, 0}, /* 70 */ { 5, s_6_70, 58, 1, 0}, /* 71 */ { 2, s_6_71, -1, 1, 0}, /* 72 */ { 3, s_6_72, 71, 1, 0}, /* 73 */ { 3, s_6_73, 71, 1, 0}, /* 74 */ { 5, s_6_74, 73, 1, 0}, /* 75 */ { 5, s_6_75, 73, 1, 0}, /* 76 */ { 5, s_6_76, 73, 1, 0}, /* 77 */ { 5, s_6_77, 73, 1, 0}, /* 78 */ { 5, s_6_78, 73, 1, 0}, /* 79 */ { 5, s_6_79, 73, 1, 0}, /* 80 */ { 6, s_6_80, 73, 1, 0}, /* 81 */ { 6, s_6_81, 73, 1, 0}, /* 82 */ { 6, s_6_82, 73, 1, 0}, /* 83 */ { 5, s_6_83, 73, 1, 0}, /* 84 */ { 4, s_6_84, 73, 1, 0}, /* 85 */ { 6, s_6_85, 84, 1, 0}, /* 86 */ { 6, s_6_86, 84, 1, 0}, /* 87 */ { 6, s_6_87, 84, 1, 0}, /* 88 */ { 4, s_6_88, -1, 1, 0}, /* 89 */ { 4, s_6_89, -1, 1, 0}, /* 90 */ { 4, s_6_90, -1, 1, 0}, /* 91 */ { 6, s_6_91, 90, 1, 0}, /* 92 */ { 6, s_6_92, 90, 1, 0}, /* 93 */ { 6, s_6_93, 90, 1, 0}, /* 94 */ { 6, s_6_94, 90, 1, 0}, /* 95 */ { 5, s_6_95, 90, 1, 0}, /* 96 */ { 7, s_6_96, 95, 1, 0}, /* 97 */ { 7, s_6_97, 95, 1, 0}, /* 98 */ { 7, s_6_98, 95, 1, 0}, /* 99 */ { 4, s_6_99, -1, 1, 0}, /*100 */ { 6, s_6_100, 99, 1, 0}, /*101 */ { 6, s_6_101, 99, 1, 0}, /*102 */ { 6, s_6_102, 99, 1, 0}, /*103 */ { 7, s_6_103, 99, 1, 0}, /*104 */ { 7, s_6_104, 99, 1, 0}, /*105 */ { 7, s_6_105, 99, 1, 0}, /*106 */ { 4, s_6_106, -1, 1, 0}, /*107 */ { 5, s_6_107, -1, 1, 0}, /*108 */ { 5, s_6_108, -1, 1, 0}, /*109 */ { 5, s_6_109, -1, 1, 0}, /*110 */ { 4, s_6_110, -1, 1, 0}, /*111 */ { 4, s_6_111, -1, 1, 0}, /*112 */ { 4, s_6_112, -1, 1, 0}, /*113 */ { 4, s_6_113, -1, 1, 0}, /*114 */ { 2, s_6_114, -1, 1, 0}, /*115 */ { 2, s_6_115, -1, 1, 0}, /*116 */ { 2, s_6_116, -1, 1, 0}, /*117 */ { 3, s_6_117, -1, 1, 0}, /*118 */ { 3, s_6_118, -1, 1, 0}, /*119 */ { 3, s_6_119, -1, 1, 0} }; static const symbol s_7_0[1] = { 'a' }; static const symbol s_7_1[1] = { 'i' }; static const symbol s_7_2[1] = { 'o' }; static const symbol s_7_3[2] = { 'o', 's' }; static const symbol s_7_4[1] = { 0xE1 }; static const symbol s_7_5[1] = { 0xED }; static const symbol s_7_6[1] = { 0xF3 }; static const struct among a_7[7] = { /* 0 */ { 1, s_7_0, -1, 1, 0}, /* 1 */ { 1, s_7_1, -1, 1, 0}, /* 2 */ { 1, s_7_2, -1, 1, 0}, /* 3 */ { 2, s_7_3, -1, 1, 0}, /* 4 */ { 1, s_7_4, -1, 1, 0}, /* 5 */ { 1, s_7_5, -1, 1, 0}, /* 6 */ { 1, s_7_6, -1, 1, 0} }; static const symbol s_8_0[1] = { 'e' }; static const symbol s_8_1[1] = { 0xE7 }; static const symbol s_8_2[1] = { 0xE9 }; static const symbol s_8_3[1] = { 0xEA }; static const struct among a_8[4] = { /* 0 */ { 1, s_8_0, -1, 1, 0}, /* 1 */ { 1, s_8_1, -1, 2, 0}, /* 2 */ { 1, s_8_2, -1, 1, 0}, /* 3 */ { 1, s_8_3, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2 }; static const symbol s_0[] = { 'a', '~' }; static const symbol s_1[] = { 'o', '~' }; static const symbol s_2[] = { 0xE3 }; static const symbol s_3[] = { 0xF5 }; static const symbol s_4[] = { 'l', 'o', 'g' }; static const symbol s_5[] = { 'u' }; static const symbol s_6[] = { 'e', 'n', 't', 'e' }; static const symbol s_7[] = { 'a', 't' }; static const symbol s_8[] = { 'a', 't' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'i', 'r' }; static const symbol s_11[] = { 'u' }; static const symbol s_12[] = { 'g' }; static const symbol s_13[] = { 'i' }; static const symbol s_14[] = { 'c' }; static const symbol s_15[] = { 'c' }; static const symbol s_16[] = { 'i' }; static const symbol s_17[] = { 'c' }; static int r_prelude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 36 */ int c1 = z->c; z->bra = z->c; /* [, line 37 */ if (z->c >= z->l || (z->p[z->c + 0] != 227 && z->p[z->c + 0] != 245)) among_var = 3; else among_var = find_among(z, a_0, 3); /* substring, line 37 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 37 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 38 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_1); /* <-, line 39 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 40 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 50 */ { int c2 = z->c; /* or, line 52 */ if (in_grouping(z, g_v, 97, 250, 0)) goto lab2; { int c3 = z->c; /* or, line 51 */ if (out_grouping(z, g_v, 97, 250, 0)) goto lab4; { /* gopast */ /* grouping v, line 51 */ int ret = out_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping(z, g_v, 97, 250, 0)) goto lab2; { /* gopast */ /* non v, line 51 */ int ret = in_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping(z, g_v, 97, 250, 0)) goto lab0; { int c4 = z->c; /* or, line 53 */ if (out_grouping(z, g_v, 97, 250, 0)) goto lab6; { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping(z, g_v, 97, 250, 0)) goto lab0; if (z->c >= z->l) goto lab0; z->c++; /* next, line 53 */ } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 54 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 56 */ { /* gopast */ /* grouping v, line 57 */ int ret = out_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 57 */ int ret = in_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 57 */ { /* gopast */ /* grouping v, line 58 */ int ret = out_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 58 */ int ret = in_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 58 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 62 */ int c1 = z->c; z->bra = z->c; /* [, line 63 */ if (z->c + 1 >= z->l || z->p[z->c + 1] != 126) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 63 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 63 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_2); /* <-, line 64 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_3); /* <-, line 65 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 66 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 77 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((839714 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 45); /* substring, line 77 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 77 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 93 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 93 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 98 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_4); /* <-, line 98 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 102 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_5); /* <-, line 102 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 106 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_6); /* <-, line 106 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 110 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 110 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ z->ket = z->c; /* [, line 112 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_2, 4); /* substring, line 112 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 112 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 112 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 112 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: z->ket = z->c; /* [, line 113 */ if (!(eq_s_b(z, 2, s_7))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 113 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 113 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } break; } lab0: ; } break; case 6: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 122 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 123 */ z->ket = z->c; /* [, line 124 */ if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_3, 3); /* substring, line 124 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 124 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 127 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ z->ket = z->c; /* [, line 136 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_4, 3); /* substring, line 136 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 136 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 139 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 139 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 146 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 146 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 147 */ z->ket = z->c; /* [, line 148 */ if (!(eq_s_b(z, 2, s_8))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 148 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 148 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 148 */ if (ret < 0) return ret; } lab3: ; } break; case 9: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 153 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_9))) return 0; { int ret = slice_from_s(z, 2, s_10); /* <-, line 154 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 159 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 159 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 160 */ among_var = find_among_b(z, a_6, 120); /* substring, line 160 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 160 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 184 */ among_var = find_among_b(z, a_7, 7); /* substring, line 184 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 184 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 187 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 187 */ if (ret < 0) return ret; } break; } return 1; } static int r_residual_form(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 192 */ among_var = find_among_b(z, a_8, 4); /* substring, line 192 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 192 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 194 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 194 */ { int m1 = z->l - z->c; (void)m1; /* or, line 194 */ if (!(eq_s_b(z, 1, s_11))) goto lab1; z->bra = z->c; /* ], line 194 */ { int m_test = z->l - z->c; /* test, line 194 */ if (!(eq_s_b(z, 1, s_12))) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_13))) return 0; z->bra = z->c; /* ], line 195 */ { int m_test = z->l - z->c; /* test, line 195 */ if (!(eq_s_b(z, 1, s_14))) return 0; z->c = z->l - m_test; } } lab0: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 195 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 195 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_15); /* <-, line 196 */ if (ret < 0) return ret; } break; } return 1; } extern int portuguese_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 202 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 202 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 203 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 203 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 204 */ { int m3 = z->l - z->c; (void)m3; /* do, line 205 */ { int m4 = z->l - z->c; (void)m4; /* or, line 209 */ { int m5 = z->l - z->c; (void)m5; /* and, line 207 */ { int m6 = z->l - z->c; (void)m6; /* or, line 206 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab6; /* call standard_suffix, line 206 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 206 */ if (ret < 0) return ret; } } lab5: z->c = z->l - m5; { int m7 = z->l - z->c; (void)m7; /* do, line 207 */ z->ket = z->c; /* [, line 207 */ if (!(eq_s_b(z, 1, s_16))) goto lab7; z->bra = z->c; /* ], line 207 */ { int m_test = z->l - z->c; /* test, line 207 */ if (!(eq_s_b(z, 1, s_17))) goto lab7; z->c = z->l - m_test; } { int ret = r_RV(z); if (ret == 0) goto lab7; /* call RV, line 207 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 207 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } } goto lab3; lab4: z->c = z->l - m4; { int ret = r_residual_suffix(z); if (ret == 0) goto lab2; /* call residual_suffix, line 209 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m8 = z->l - z->c; (void)m8; /* do, line 211 */ { int ret = r_residual_form(z); if (ret == 0) goto lab8; /* call residual_form, line 211 */ if (ret < 0) return ret; } lab8: z->c = z->l - m8; } z->c = z->lb; { int c9 = z->c; /* do, line 213 */ { int ret = r_postlude(z); if (ret == 0) goto lab9; /* call postlude, line 213 */ if (ret < 0) return ret; } lab9: z->c = c9; } return 1; } extern struct SN_env * portuguese_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void portuguese_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h000066400000000000000000000005211217574114600321510ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * portuguese_ISO_8859_1_create_env(void); extern void portuguese_ISO_8859_1_close_env(struct SN_env * z); extern int portuguese_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c000066400000000000000000001201461217574114600314150ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int spanish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_y_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_attached_pronoun(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * spanish_ISO_8859_1_create_env(void); extern void spanish_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 0xE1 }; static const symbol s_0_2[1] = { 0xE9 }; static const symbol s_0_3[1] = { 0xED }; static const symbol s_0_4[1] = { 0xF3 }; static const symbol s_0_5[1] = { 0xFA }; static const struct among a_0[6] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 2, 0}, /* 3 */ { 1, s_0_3, 0, 3, 0}, /* 4 */ { 1, s_0_4, 0, 4, 0}, /* 5 */ { 1, s_0_5, 0, 5, 0} }; static const symbol s_1_0[2] = { 'l', 'a' }; static const symbol s_1_1[4] = { 's', 'e', 'l', 'a' }; static const symbol s_1_2[2] = { 'l', 'e' }; static const symbol s_1_3[2] = { 'm', 'e' }; static const symbol s_1_4[2] = { 's', 'e' }; static const symbol s_1_5[2] = { 'l', 'o' }; static const symbol s_1_6[4] = { 's', 'e', 'l', 'o' }; static const symbol s_1_7[3] = { 'l', 'a', 's' }; static const symbol s_1_8[5] = { 's', 'e', 'l', 'a', 's' }; static const symbol s_1_9[3] = { 'l', 'e', 's' }; static const symbol s_1_10[3] = { 'l', 'o', 's' }; static const symbol s_1_11[5] = { 's', 'e', 'l', 'o', 's' }; static const symbol s_1_12[3] = { 'n', 'o', 's' }; static const struct among a_1[13] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 4, s_1_1, 0, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 2, s_1_4, -1, -1, 0}, /* 5 */ { 2, s_1_5, -1, -1, 0}, /* 6 */ { 4, s_1_6, 5, -1, 0}, /* 7 */ { 3, s_1_7, -1, -1, 0}, /* 8 */ { 5, s_1_8, 7, -1, 0}, /* 9 */ { 3, s_1_9, -1, -1, 0}, /* 10 */ { 3, s_1_10, -1, -1, 0}, /* 11 */ { 5, s_1_11, 10, -1, 0}, /* 12 */ { 3, s_1_12, -1, -1, 0} }; static const symbol s_2_0[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_2_1[5] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_2_2[5] = { 'y', 'e', 'n', 'd', 'o' }; static const symbol s_2_3[4] = { 0xE1, 'n', 'd', 'o' }; static const symbol s_2_4[5] = { 'i', 0xE9, 'n', 'd', 'o' }; static const symbol s_2_5[2] = { 'a', 'r' }; static const symbol s_2_6[2] = { 'e', 'r' }; static const symbol s_2_7[2] = { 'i', 'r' }; static const symbol s_2_8[2] = { 0xE1, 'r' }; static const symbol s_2_9[2] = { 0xE9, 'r' }; static const symbol s_2_10[2] = { 0xED, 'r' }; static const struct among a_2[11] = { /* 0 */ { 4, s_2_0, -1, 6, 0}, /* 1 */ { 5, s_2_1, -1, 6, 0}, /* 2 */ { 5, s_2_2, -1, 7, 0}, /* 3 */ { 4, s_2_3, -1, 2, 0}, /* 4 */ { 5, s_2_4, -1, 1, 0}, /* 5 */ { 2, s_2_5, -1, 6, 0}, /* 6 */ { 2, s_2_6, -1, 6, 0}, /* 7 */ { 2, s_2_7, -1, 6, 0}, /* 8 */ { 2, s_2_8, -1, 3, 0}, /* 9 */ { 2, s_2_9, -1, 4, 0}, /* 10 */ { 2, s_2_10, -1, 5, 0} }; static const symbol s_3_0[2] = { 'i', 'c' }; static const symbol s_3_1[2] = { 'a', 'd' }; static const symbol s_3_2[2] = { 'o', 's' }; static const symbol s_3_3[2] = { 'i', 'v' }; static const struct among a_3[4] = { /* 0 */ { 2, s_3_0, -1, -1, 0}, /* 1 */ { 2, s_3_1, -1, -1, 0}, /* 2 */ { 2, s_3_2, -1, -1, 0}, /* 3 */ { 2, s_3_3, -1, 1, 0} }; static const symbol s_4_0[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_4_1[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_4_2[4] = { 'a', 'n', 't', 'e' }; static const struct among a_4[3] = { /* 0 */ { 4, s_4_0, -1, 1, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 4, s_4_2, -1, 1, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_5_2[2] = { 'i', 'v' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, -1, 1, 0} }; static const symbol s_6_0[3] = { 'i', 'c', 'a' }; static const symbol s_6_1[5] = { 'a', 'n', 'c', 'i', 'a' }; static const symbol s_6_2[5] = { 'e', 'n', 'c', 'i', 'a' }; static const symbol s_6_3[5] = { 'a', 'd', 'o', 'r', 'a' }; static const symbol s_6_4[3] = { 'o', 's', 'a' }; static const symbol s_6_5[4] = { 'i', 's', 't', 'a' }; static const symbol s_6_6[3] = { 'i', 'v', 'a' }; static const symbol s_6_7[4] = { 'a', 'n', 'z', 'a' }; static const symbol s_6_8[5] = { 'l', 'o', 'g', 0xED, 'a' }; static const symbol s_6_9[4] = { 'i', 'd', 'a', 'd' }; static const symbol s_6_10[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_6_11[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_6_12[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_13[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_14[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_15[5] = { 'a', 'c', 'i', 0xF3, 'n' }; static const symbol s_6_16[5] = { 'u', 'c', 'i', 0xF3, 'n' }; static const symbol s_6_17[3] = { 'i', 'c', 'o' }; static const symbol s_6_18[4] = { 'i', 's', 'm', 'o' }; static const symbol s_6_19[3] = { 'o', 's', 'o' }; static const symbol s_6_20[7] = { 'a', 'm', 'i', 'e', 'n', 't', 'o' }; static const symbol s_6_21[7] = { 'i', 'm', 'i', 'e', 'n', 't', 'o' }; static const symbol s_6_22[3] = { 'i', 'v', 'o' }; static const symbol s_6_23[4] = { 'a', 'd', 'o', 'r' }; static const symbol s_6_24[4] = { 'i', 'c', 'a', 's' }; static const symbol s_6_25[6] = { 'a', 'n', 'c', 'i', 'a', 's' }; static const symbol s_6_26[6] = { 'e', 'n', 'c', 'i', 'a', 's' }; static const symbol s_6_27[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; static const symbol s_6_28[4] = { 'o', 's', 'a', 's' }; static const symbol s_6_29[5] = { 'i', 's', 't', 'a', 's' }; static const symbol s_6_30[4] = { 'i', 'v', 'a', 's' }; static const symbol s_6_31[5] = { 'a', 'n', 'z', 'a', 's' }; static const symbol s_6_32[6] = { 'l', 'o', 'g', 0xED, 'a', 's' }; static const symbol s_6_33[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; static const symbol s_6_34[5] = { 'a', 'b', 'l', 'e', 's' }; static const symbol s_6_35[5] = { 'i', 'b', 'l', 'e', 's' }; static const symbol s_6_36[7] = { 'a', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_6_37[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_6_38[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; static const symbol s_6_39[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_6_40[4] = { 'i', 'c', 'o', 's' }; static const symbol s_6_41[5] = { 'i', 's', 'm', 'o', 's' }; static const symbol s_6_42[4] = { 'o', 's', 'o', 's' }; static const symbol s_6_43[8] = { 'a', 'm', 'i', 'e', 'n', 't', 'o', 's' }; static const symbol s_6_44[8] = { 'i', 'm', 'i', 'e', 'n', 't', 'o', 's' }; static const symbol s_6_45[4] = { 'i', 'v', 'o', 's' }; static const struct among a_6[46] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 5, s_6_1, -1, 2, 0}, /* 2 */ { 5, s_6_2, -1, 5, 0}, /* 3 */ { 5, s_6_3, -1, 2, 0}, /* 4 */ { 3, s_6_4, -1, 1, 0}, /* 5 */ { 4, s_6_5, -1, 1, 0}, /* 6 */ { 3, s_6_6, -1, 9, 0}, /* 7 */ { 4, s_6_7, -1, 1, 0}, /* 8 */ { 5, s_6_8, -1, 3, 0}, /* 9 */ { 4, s_6_9, -1, 8, 0}, /* 10 */ { 4, s_6_10, -1, 1, 0}, /* 11 */ { 4, s_6_11, -1, 1, 0}, /* 12 */ { 4, s_6_12, -1, 2, 0}, /* 13 */ { 5, s_6_13, -1, 7, 0}, /* 14 */ { 6, s_6_14, 13, 6, 0}, /* 15 */ { 5, s_6_15, -1, 2, 0}, /* 16 */ { 5, s_6_16, -1, 4, 0}, /* 17 */ { 3, s_6_17, -1, 1, 0}, /* 18 */ { 4, s_6_18, -1, 1, 0}, /* 19 */ { 3, s_6_19, -1, 1, 0}, /* 20 */ { 7, s_6_20, -1, 1, 0}, /* 21 */ { 7, s_6_21, -1, 1, 0}, /* 22 */ { 3, s_6_22, -1, 9, 0}, /* 23 */ { 4, s_6_23, -1, 2, 0}, /* 24 */ { 4, s_6_24, -1, 1, 0}, /* 25 */ { 6, s_6_25, -1, 2, 0}, /* 26 */ { 6, s_6_26, -1, 5, 0}, /* 27 */ { 6, s_6_27, -1, 2, 0}, /* 28 */ { 4, s_6_28, -1, 1, 0}, /* 29 */ { 5, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, -1, 9, 0}, /* 31 */ { 5, s_6_31, -1, 1, 0}, /* 32 */ { 6, s_6_32, -1, 3, 0}, /* 33 */ { 6, s_6_33, -1, 8, 0}, /* 34 */ { 5, s_6_34, -1, 1, 0}, /* 35 */ { 5, s_6_35, -1, 1, 0}, /* 36 */ { 7, s_6_36, -1, 2, 0}, /* 37 */ { 7, s_6_37, -1, 4, 0}, /* 38 */ { 6, s_6_38, -1, 2, 0}, /* 39 */ { 5, s_6_39, -1, 2, 0}, /* 40 */ { 4, s_6_40, -1, 1, 0}, /* 41 */ { 5, s_6_41, -1, 1, 0}, /* 42 */ { 4, s_6_42, -1, 1, 0}, /* 43 */ { 8, s_6_43, -1, 1, 0}, /* 44 */ { 8, s_6_44, -1, 1, 0}, /* 45 */ { 4, s_6_45, -1, 9, 0} }; static const symbol s_7_0[2] = { 'y', 'a' }; static const symbol s_7_1[2] = { 'y', 'e' }; static const symbol s_7_2[3] = { 'y', 'a', 'n' }; static const symbol s_7_3[3] = { 'y', 'e', 'n' }; static const symbol s_7_4[5] = { 'y', 'e', 'r', 'o', 'n' }; static const symbol s_7_5[5] = { 'y', 'e', 'n', 'd', 'o' }; static const symbol s_7_6[2] = { 'y', 'o' }; static const symbol s_7_7[3] = { 'y', 'a', 's' }; static const symbol s_7_8[3] = { 'y', 'e', 's' }; static const symbol s_7_9[4] = { 'y', 'a', 'i', 's' }; static const symbol s_7_10[5] = { 'y', 'a', 'm', 'o', 's' }; static const symbol s_7_11[2] = { 'y', 0xF3 }; static const struct among a_7[12] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 2, s_7_1, -1, 1, 0}, /* 2 */ { 3, s_7_2, -1, 1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 5, s_7_4, -1, 1, 0}, /* 5 */ { 5, s_7_5, -1, 1, 0}, /* 6 */ { 2, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 3, s_7_8, -1, 1, 0}, /* 9 */ { 4, s_7_9, -1, 1, 0}, /* 10 */ { 5, s_7_10, -1, 1, 0}, /* 11 */ { 2, s_7_11, -1, 1, 0} }; static const symbol s_8_0[3] = { 'a', 'b', 'a' }; static const symbol s_8_1[3] = { 'a', 'd', 'a' }; static const symbol s_8_2[3] = { 'i', 'd', 'a' }; static const symbol s_8_3[3] = { 'a', 'r', 'a' }; static const symbol s_8_4[4] = { 'i', 'e', 'r', 'a' }; static const symbol s_8_5[2] = { 0xED, 'a' }; static const symbol s_8_6[4] = { 'a', 'r', 0xED, 'a' }; static const symbol s_8_7[4] = { 'e', 'r', 0xED, 'a' }; static const symbol s_8_8[4] = { 'i', 'r', 0xED, 'a' }; static const symbol s_8_9[2] = { 'a', 'd' }; static const symbol s_8_10[2] = { 'e', 'd' }; static const symbol s_8_11[2] = { 'i', 'd' }; static const symbol s_8_12[3] = { 'a', 's', 'e' }; static const symbol s_8_13[4] = { 'i', 'e', 's', 'e' }; static const symbol s_8_14[4] = { 'a', 's', 't', 'e' }; static const symbol s_8_15[4] = { 'i', 's', 't', 'e' }; static const symbol s_8_16[2] = { 'a', 'n' }; static const symbol s_8_17[4] = { 'a', 'b', 'a', 'n' }; static const symbol s_8_18[4] = { 'a', 'r', 'a', 'n' }; static const symbol s_8_19[5] = { 'i', 'e', 'r', 'a', 'n' }; static const symbol s_8_20[3] = { 0xED, 'a', 'n' }; static const symbol s_8_21[5] = { 'a', 'r', 0xED, 'a', 'n' }; static const symbol s_8_22[5] = { 'e', 'r', 0xED, 'a', 'n' }; static const symbol s_8_23[5] = { 'i', 'r', 0xED, 'a', 'n' }; static const symbol s_8_24[2] = { 'e', 'n' }; static const symbol s_8_25[4] = { 'a', 's', 'e', 'n' }; static const symbol s_8_26[5] = { 'i', 'e', 's', 'e', 'n' }; static const symbol s_8_27[4] = { 'a', 'r', 'o', 'n' }; static const symbol s_8_28[5] = { 'i', 'e', 'r', 'o', 'n' }; static const symbol s_8_29[4] = { 'a', 'r', 0xE1, 'n' }; static const symbol s_8_30[4] = { 'e', 'r', 0xE1, 'n' }; static const symbol s_8_31[4] = { 'i', 'r', 0xE1, 'n' }; static const symbol s_8_32[3] = { 'a', 'd', 'o' }; static const symbol s_8_33[3] = { 'i', 'd', 'o' }; static const symbol s_8_34[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_8_35[5] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_8_36[2] = { 'a', 'r' }; static const symbol s_8_37[2] = { 'e', 'r' }; static const symbol s_8_38[2] = { 'i', 'r' }; static const symbol s_8_39[2] = { 'a', 's' }; static const symbol s_8_40[4] = { 'a', 'b', 'a', 's' }; static const symbol s_8_41[4] = { 'a', 'd', 'a', 's' }; static const symbol s_8_42[4] = { 'i', 'd', 'a', 's' }; static const symbol s_8_43[4] = { 'a', 'r', 'a', 's' }; static const symbol s_8_44[5] = { 'i', 'e', 'r', 'a', 's' }; static const symbol s_8_45[3] = { 0xED, 'a', 's' }; static const symbol s_8_46[5] = { 'a', 'r', 0xED, 'a', 's' }; static const symbol s_8_47[5] = { 'e', 'r', 0xED, 'a', 's' }; static const symbol s_8_48[5] = { 'i', 'r', 0xED, 'a', 's' }; static const symbol s_8_49[2] = { 'e', 's' }; static const symbol s_8_50[4] = { 'a', 's', 'e', 's' }; static const symbol s_8_51[5] = { 'i', 'e', 's', 'e', 's' }; static const symbol s_8_52[5] = { 'a', 'b', 'a', 'i', 's' }; static const symbol s_8_53[5] = { 'a', 'r', 'a', 'i', 's' }; static const symbol s_8_54[6] = { 'i', 'e', 'r', 'a', 'i', 's' }; static const symbol s_8_55[4] = { 0xED, 'a', 'i', 's' }; static const symbol s_8_56[6] = { 'a', 'r', 0xED, 'a', 'i', 's' }; static const symbol s_8_57[6] = { 'e', 'r', 0xED, 'a', 'i', 's' }; static const symbol s_8_58[6] = { 'i', 'r', 0xED, 'a', 'i', 's' }; static const symbol s_8_59[5] = { 'a', 's', 'e', 'i', 's' }; static const symbol s_8_60[6] = { 'i', 'e', 's', 'e', 'i', 's' }; static const symbol s_8_61[6] = { 'a', 's', 't', 'e', 'i', 's' }; static const symbol s_8_62[6] = { 'i', 's', 't', 'e', 'i', 's' }; static const symbol s_8_63[3] = { 0xE1, 'i', 's' }; static const symbol s_8_64[3] = { 0xE9, 'i', 's' }; static const symbol s_8_65[5] = { 'a', 'r', 0xE9, 'i', 's' }; static const symbol s_8_66[5] = { 'e', 'r', 0xE9, 'i', 's' }; static const symbol s_8_67[5] = { 'i', 'r', 0xE9, 'i', 's' }; static const symbol s_8_68[4] = { 'a', 'd', 'o', 's' }; static const symbol s_8_69[4] = { 'i', 'd', 'o', 's' }; static const symbol s_8_70[4] = { 'a', 'm', 'o', 's' }; static const symbol s_8_71[6] = { 0xE1, 'b', 'a', 'm', 'o', 's' }; static const symbol s_8_72[6] = { 0xE1, 'r', 'a', 'm', 'o', 's' }; static const symbol s_8_73[7] = { 'i', 0xE9, 'r', 'a', 'm', 'o', 's' }; static const symbol s_8_74[5] = { 0xED, 'a', 'm', 'o', 's' }; static const symbol s_8_75[7] = { 'a', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_8_76[7] = { 'e', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_8_77[7] = { 'i', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_8_78[4] = { 'e', 'm', 'o', 's' }; static const symbol s_8_79[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_80[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_81[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_82[6] = { 0xE1, 's', 'e', 'm', 'o', 's' }; static const symbol s_8_83[7] = { 'i', 0xE9, 's', 'e', 'm', 'o', 's' }; static const symbol s_8_84[4] = { 'i', 'm', 'o', 's' }; static const symbol s_8_85[4] = { 'a', 'r', 0xE1, 's' }; static const symbol s_8_86[4] = { 'e', 'r', 0xE1, 's' }; static const symbol s_8_87[4] = { 'i', 'r', 0xE1, 's' }; static const symbol s_8_88[2] = { 0xED, 's' }; static const symbol s_8_89[3] = { 'a', 'r', 0xE1 }; static const symbol s_8_90[3] = { 'e', 'r', 0xE1 }; static const symbol s_8_91[3] = { 'i', 'r', 0xE1 }; static const symbol s_8_92[3] = { 'a', 'r', 0xE9 }; static const symbol s_8_93[3] = { 'e', 'r', 0xE9 }; static const symbol s_8_94[3] = { 'i', 'r', 0xE9 }; static const symbol s_8_95[2] = { 'i', 0xF3 }; static const struct among a_8[96] = { /* 0 */ { 3, s_8_0, -1, 2, 0}, /* 1 */ { 3, s_8_1, -1, 2, 0}, /* 2 */ { 3, s_8_2, -1, 2, 0}, /* 3 */ { 3, s_8_3, -1, 2, 0}, /* 4 */ { 4, s_8_4, -1, 2, 0}, /* 5 */ { 2, s_8_5, -1, 2, 0}, /* 6 */ { 4, s_8_6, 5, 2, 0}, /* 7 */ { 4, s_8_7, 5, 2, 0}, /* 8 */ { 4, s_8_8, 5, 2, 0}, /* 9 */ { 2, s_8_9, -1, 2, 0}, /* 10 */ { 2, s_8_10, -1, 2, 0}, /* 11 */ { 2, s_8_11, -1, 2, 0}, /* 12 */ { 3, s_8_12, -1, 2, 0}, /* 13 */ { 4, s_8_13, -1, 2, 0}, /* 14 */ { 4, s_8_14, -1, 2, 0}, /* 15 */ { 4, s_8_15, -1, 2, 0}, /* 16 */ { 2, s_8_16, -1, 2, 0}, /* 17 */ { 4, s_8_17, 16, 2, 0}, /* 18 */ { 4, s_8_18, 16, 2, 0}, /* 19 */ { 5, s_8_19, 16, 2, 0}, /* 20 */ { 3, s_8_20, 16, 2, 0}, /* 21 */ { 5, s_8_21, 20, 2, 0}, /* 22 */ { 5, s_8_22, 20, 2, 0}, /* 23 */ { 5, s_8_23, 20, 2, 0}, /* 24 */ { 2, s_8_24, -1, 1, 0}, /* 25 */ { 4, s_8_25, 24, 2, 0}, /* 26 */ { 5, s_8_26, 24, 2, 0}, /* 27 */ { 4, s_8_27, -1, 2, 0}, /* 28 */ { 5, s_8_28, -1, 2, 0}, /* 29 */ { 4, s_8_29, -1, 2, 0}, /* 30 */ { 4, s_8_30, -1, 2, 0}, /* 31 */ { 4, s_8_31, -1, 2, 0}, /* 32 */ { 3, s_8_32, -1, 2, 0}, /* 33 */ { 3, s_8_33, -1, 2, 0}, /* 34 */ { 4, s_8_34, -1, 2, 0}, /* 35 */ { 5, s_8_35, -1, 2, 0}, /* 36 */ { 2, s_8_36, -1, 2, 0}, /* 37 */ { 2, s_8_37, -1, 2, 0}, /* 38 */ { 2, s_8_38, -1, 2, 0}, /* 39 */ { 2, s_8_39, -1, 2, 0}, /* 40 */ { 4, s_8_40, 39, 2, 0}, /* 41 */ { 4, s_8_41, 39, 2, 0}, /* 42 */ { 4, s_8_42, 39, 2, 0}, /* 43 */ { 4, s_8_43, 39, 2, 0}, /* 44 */ { 5, s_8_44, 39, 2, 0}, /* 45 */ { 3, s_8_45, 39, 2, 0}, /* 46 */ { 5, s_8_46, 45, 2, 0}, /* 47 */ { 5, s_8_47, 45, 2, 0}, /* 48 */ { 5, s_8_48, 45, 2, 0}, /* 49 */ { 2, s_8_49, -1, 1, 0}, /* 50 */ { 4, s_8_50, 49, 2, 0}, /* 51 */ { 5, s_8_51, 49, 2, 0}, /* 52 */ { 5, s_8_52, -1, 2, 0}, /* 53 */ { 5, s_8_53, -1, 2, 0}, /* 54 */ { 6, s_8_54, -1, 2, 0}, /* 55 */ { 4, s_8_55, -1, 2, 0}, /* 56 */ { 6, s_8_56, 55, 2, 0}, /* 57 */ { 6, s_8_57, 55, 2, 0}, /* 58 */ { 6, s_8_58, 55, 2, 0}, /* 59 */ { 5, s_8_59, -1, 2, 0}, /* 60 */ { 6, s_8_60, -1, 2, 0}, /* 61 */ { 6, s_8_61, -1, 2, 0}, /* 62 */ { 6, s_8_62, -1, 2, 0}, /* 63 */ { 3, s_8_63, -1, 2, 0}, /* 64 */ { 3, s_8_64, -1, 1, 0}, /* 65 */ { 5, s_8_65, 64, 2, 0}, /* 66 */ { 5, s_8_66, 64, 2, 0}, /* 67 */ { 5, s_8_67, 64, 2, 0}, /* 68 */ { 4, s_8_68, -1, 2, 0}, /* 69 */ { 4, s_8_69, -1, 2, 0}, /* 70 */ { 4, s_8_70, -1, 2, 0}, /* 71 */ { 6, s_8_71, 70, 2, 0}, /* 72 */ { 6, s_8_72, 70, 2, 0}, /* 73 */ { 7, s_8_73, 70, 2, 0}, /* 74 */ { 5, s_8_74, 70, 2, 0}, /* 75 */ { 7, s_8_75, 74, 2, 0}, /* 76 */ { 7, s_8_76, 74, 2, 0}, /* 77 */ { 7, s_8_77, 74, 2, 0}, /* 78 */ { 4, s_8_78, -1, 1, 0}, /* 79 */ { 6, s_8_79, 78, 2, 0}, /* 80 */ { 6, s_8_80, 78, 2, 0}, /* 81 */ { 6, s_8_81, 78, 2, 0}, /* 82 */ { 6, s_8_82, 78, 2, 0}, /* 83 */ { 7, s_8_83, 78, 2, 0}, /* 84 */ { 4, s_8_84, -1, 2, 0}, /* 85 */ { 4, s_8_85, -1, 2, 0}, /* 86 */ { 4, s_8_86, -1, 2, 0}, /* 87 */ { 4, s_8_87, -1, 2, 0}, /* 88 */ { 2, s_8_88, -1, 2, 0}, /* 89 */ { 3, s_8_89, -1, 2, 0}, /* 90 */ { 3, s_8_90, -1, 2, 0}, /* 91 */ { 3, s_8_91, -1, 2, 0}, /* 92 */ { 3, s_8_92, -1, 2, 0}, /* 93 */ { 3, s_8_93, -1, 2, 0}, /* 94 */ { 3, s_8_94, -1, 2, 0}, /* 95 */ { 2, s_8_95, -1, 2, 0} }; static const symbol s_9_0[1] = { 'a' }; static const symbol s_9_1[1] = { 'e' }; static const symbol s_9_2[1] = { 'o' }; static const symbol s_9_3[2] = { 'o', 's' }; static const symbol s_9_4[1] = { 0xE1 }; static const symbol s_9_5[1] = { 0xE9 }; static const symbol s_9_6[1] = { 0xED }; static const symbol s_9_7[1] = { 0xF3 }; static const struct among a_9[8] = { /* 0 */ { 1, s_9_0, -1, 1, 0}, /* 1 */ { 1, s_9_1, -1, 2, 0}, /* 2 */ { 1, s_9_2, -1, 1, 0}, /* 3 */ { 2, s_9_3, -1, 1, 0}, /* 4 */ { 1, s_9_4, -1, 1, 0}, /* 5 */ { 1, s_9_5, -1, 2, 0}, /* 6 */ { 1, s_9_6, -1, 1, 0}, /* 7 */ { 1, s_9_7, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'o' }; static const symbol s_4[] = { 'u' }; static const symbol s_5[] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_6[] = { 'a', 'n', 'd', 'o' }; static const symbol s_7[] = { 'a', 'r' }; static const symbol s_8[] = { 'e', 'r' }; static const symbol s_9[] = { 'i', 'r' }; static const symbol s_10[] = { 'u' }; static const symbol s_11[] = { 'i', 'c' }; static const symbol s_12[] = { 'l', 'o', 'g' }; static const symbol s_13[] = { 'u' }; static const symbol s_14[] = { 'e', 'n', 't', 'e' }; static const symbol s_15[] = { 'a', 't' }; static const symbol s_16[] = { 'a', 't' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'u' }; static const symbol s_19[] = { 'g' }; static const symbol s_20[] = { 'u' }; static const symbol s_21[] = { 'g' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 37 */ { int c2 = z->c; /* or, line 39 */ if (in_grouping(z, g_v, 97, 252, 0)) goto lab2; { int c3 = z->c; /* or, line 38 */ if (out_grouping(z, g_v, 97, 252, 0)) goto lab4; { /* gopast */ /* grouping v, line 38 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping(z, g_v, 97, 252, 0)) goto lab2; { /* gopast */ /* non v, line 38 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping(z, g_v, 97, 252, 0)) goto lab0; { int c4 = z->c; /* or, line 40 */ if (out_grouping(z, g_v, 97, 252, 0)) goto lab6; { /* gopast */ /* grouping v, line 40 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping(z, g_v, 97, 252, 0)) goto lab0; if (z->c >= z->l) goto lab0; z->c++; /* next, line 40 */ } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 41 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 43 */ { /* gopast */ /* grouping v, line 44 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 44 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 44 */ { /* gopast */ /* grouping v, line 45 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 45 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 45 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 49 */ int c1 = z->c; z->bra = z->c; /* [, line 50 */ if (z->c >= z->l || z->p[z->c + 0] >> 5 != 7 || !((67641858 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 6; else among_var = find_among(z, a_0, 6); /* substring, line 50 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 50 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 51 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 52 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 53 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 54 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 55 */ if (ret < 0) return ret; } break; case 6: if (z->c >= z->l) goto lab0; z->c++; /* next, line 57 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_attached_pronoun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 68 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((557090 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_1, 13))) return 0; /* substring, line 68 */ z->bra = z->c; /* ], line 68 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; among_var = find_among_b(z, a_2, 11); /* substring, line 72 */ if (!(among_var)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 72 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: z->bra = z->c; /* ], line 73 */ { int ret = slice_from_s(z, 5, s_5); /* <-, line 73 */ if (ret < 0) return ret; } break; case 2: z->bra = z->c; /* ], line 74 */ { int ret = slice_from_s(z, 4, s_6); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: z->bra = z->c; /* ], line 75 */ { int ret = slice_from_s(z, 2, s_7); /* <-, line 75 */ if (ret < 0) return ret; } break; case 4: z->bra = z->c; /* ], line 76 */ { int ret = slice_from_s(z, 2, s_8); /* <-, line 76 */ if (ret < 0) return ret; } break; case 5: z->bra = z->c; /* ], line 77 */ { int ret = slice_from_s(z, 2, s_9); /* <-, line 77 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } break; case 7: if (!(eq_s_b(z, 1, s_10))) return 0; { int ret = slice_del(z); /* delete, line 82 */ if (ret < 0) return ret; } break; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 87 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((835634 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_6, 46); /* substring, line 87 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 87 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 99 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 105 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 105 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 106 */ z->ket = z->c; /* [, line 106 */ if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 106 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 106 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 106 */ if (ret < 0) return ret; } lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 111 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_12); /* <-, line 111 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 115 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_13); /* <-, line 115 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 119 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_14); /* <-, line 119 */ if (ret < 0) return ret; } break; case 6: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 123 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 124 */ z->ket = z->c; /* [, line 125 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_3, 4); /* substring, line 125 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 125 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 125 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: z->ket = z->c; /* [, line 126 */ if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 126 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 126 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 126 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 135 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 135 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 136 */ z->ket = z->c; /* [, line 137 */ if (z->c - 3 <= z->lb || z->p[z->c - 1] != 101) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_4, 3); /* substring, line 137 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 137 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 140 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 140 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 147 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 148 */ z->ket = z->c; /* [, line 149 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; } among_var = find_among_b(z, a_5, 3); /* substring, line 149 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 149 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab3; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 152 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } break; } lab3: ; } break; case 9: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 159 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 159 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 160 */ z->ket = z->c; /* [, line 161 */ if (!(eq_s_b(z, 2, s_16))) { z->c = z->l - m_keep; goto lab4; } z->bra = z->c; /* ], line 161 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 161 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 161 */ if (ret < 0) return ret; } lab4: ; } break; } return 1; } static int r_y_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 168 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 168 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 168 */ among_var = find_among_b(z, a_7, 12); /* substring, line 168 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 168 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (!(eq_s_b(z, 1, s_17))) return 0; { int ret = slice_del(z); /* delete, line 171 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 176 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 176 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 176 */ among_var = find_among_b(z, a_8, 96); /* substring, line 176 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 176 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 179 */ if (!(eq_s_b(z, 1, s_18))) { z->c = z->l - m_keep; goto lab0; } { int m_test = z->l - z->c; /* test, line 179 */ if (!(eq_s_b(z, 1, s_19))) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } lab0: ; } z->bra = z->c; /* ], line 179 */ { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 200 */ if (ret < 0) return ret; } break; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 205 */ among_var = find_among_b(z, a_9, 8); /* substring, line 205 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 205 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 208 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 208 */ if (ret < 0) return ret; } break; case 2: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 210 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 210 */ z->ket = z->c; /* [, line 210 */ if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 210 */ { int m_test = z->l - z->c; /* test, line 210 */ if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 210 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } lab0: ; } break; } return 1; } extern int spanish_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 216 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 216 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 217 */ { int m2 = z->l - z->c; (void)m2; /* do, line 218 */ { int ret = r_attached_pronoun(z); if (ret == 0) goto lab1; /* call attached_pronoun, line 218 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 219 */ { int m4 = z->l - z->c; (void)m4; /* or, line 219 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab4; /* call standard_suffix, line 219 */ if (ret < 0) return ret; } goto lab3; lab4: z->c = z->l - m4; { int ret = r_y_verb_suffix(z); if (ret == 0) goto lab5; /* call y_verb_suffix, line 220 */ if (ret < 0) return ret; } goto lab3; lab5: z->c = z->l - m4; { int ret = r_verb_suffix(z); if (ret == 0) goto lab2; /* call verb_suffix, line 221 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m5 = z->l - z->c; (void)m5; /* do, line 223 */ { int ret = r_residual_suffix(z); if (ret == 0) goto lab6; /* call residual_suffix, line 223 */ if (ret < 0) return ret; } lab6: z->c = z->l - m5; } z->c = z->lb; { int c6 = z->c; /* do, line 225 */ { int ret = r_postlude(z); if (ret == 0) goto lab7; /* call postlude, line 225 */ if (ret < 0) return ret; } lab7: z->c = c6; } return 1; } extern struct SN_env * spanish_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void spanish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h000066400000000000000000000005101217574114600314120ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * spanish_ISO_8859_1_create_env(void); extern void spanish_ISO_8859_1_close_env(struct SN_env * z); extern int spanish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c000066400000000000000000000245501217574114600314200ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int swedish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * swedish_ISO_8859_1_create_env(void); extern void swedish_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'a' }; static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; static const symbol s_0_5[2] = { 'a', 'd' }; static const symbol s_0_6[1] = { 'e' }; static const symbol s_0_7[3] = { 'a', 'd', 'e' }; static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; static const symbol s_0_10[3] = { 'a', 'r', 'e' }; static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; static const symbol s_0_12[2] = { 'e', 'n' }; static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; static const symbol s_0_16[3] = { 'e', 'r', 'n' }; static const symbol s_0_17[2] = { 'a', 'r' }; static const symbol s_0_18[2] = { 'e', 'r' }; static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; static const symbol s_0_20[2] = { 'o', 'r' }; static const symbol s_0_21[1] = { 's' }; static const symbol s_0_22[2] = { 'a', 's' }; static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; static const symbol s_0_26[2] = { 'e', 's' }; static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; static const symbol s_0_29[3] = { 'e', 'n', 's' }; static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; static const symbol s_0_33[2] = { 'a', 't' }; static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; static const symbol s_0_35[3] = { 'h', 'e', 't' }; static const symbol s_0_36[3] = { 'a', 's', 't' }; static const struct among a_0[37] = { /* 0 */ { 1, s_0_0, -1, 1, 0}, /* 1 */ { 4, s_0_1, 0, 1, 0}, /* 2 */ { 4, s_0_2, 0, 1, 0}, /* 3 */ { 7, s_0_3, 2, 1, 0}, /* 4 */ { 4, s_0_4, 0, 1, 0}, /* 5 */ { 2, s_0_5, -1, 1, 0}, /* 6 */ { 1, s_0_6, -1, 1, 0}, /* 7 */ { 3, s_0_7, 6, 1, 0}, /* 8 */ { 4, s_0_8, 6, 1, 0}, /* 9 */ { 4, s_0_9, 6, 1, 0}, /* 10 */ { 3, s_0_10, 6, 1, 0}, /* 11 */ { 4, s_0_11, 6, 1, 0}, /* 12 */ { 2, s_0_12, -1, 1, 0}, /* 13 */ { 5, s_0_13, 12, 1, 0}, /* 14 */ { 4, s_0_14, 12, 1, 0}, /* 15 */ { 5, s_0_15, 12, 1, 0}, /* 16 */ { 3, s_0_16, -1, 1, 0}, /* 17 */ { 2, s_0_17, -1, 1, 0}, /* 18 */ { 2, s_0_18, -1, 1, 0}, /* 19 */ { 5, s_0_19, 18, 1, 0}, /* 20 */ { 2, s_0_20, -1, 1, 0}, /* 21 */ { 1, s_0_21, -1, 2, 0}, /* 22 */ { 2, s_0_22, 21, 1, 0}, /* 23 */ { 5, s_0_23, 22, 1, 0}, /* 24 */ { 5, s_0_24, 22, 1, 0}, /* 25 */ { 5, s_0_25, 22, 1, 0}, /* 26 */ { 2, s_0_26, 21, 1, 0}, /* 27 */ { 4, s_0_27, 26, 1, 0}, /* 28 */ { 5, s_0_28, 26, 1, 0}, /* 29 */ { 3, s_0_29, 21, 1, 0}, /* 30 */ { 5, s_0_30, 29, 1, 0}, /* 31 */ { 6, s_0_31, 29, 1, 0}, /* 32 */ { 4, s_0_32, 21, 1, 0}, /* 33 */ { 2, s_0_33, -1, 1, 0}, /* 34 */ { 5, s_0_34, -1, 1, 0}, /* 35 */ { 3, s_0_35, -1, 1, 0}, /* 36 */ { 3, s_0_36, -1, 1, 0} }; static const symbol s_1_0[2] = { 'd', 'd' }; static const symbol s_1_1[2] = { 'g', 'd' }; static const symbol s_1_2[2] = { 'n', 'n' }; static const symbol s_1_3[2] = { 'd', 't' }; static const symbol s_1_4[2] = { 'g', 't' }; static const symbol s_1_5[2] = { 'k', 't' }; static const symbol s_1_6[2] = { 't', 't' }; static const struct among a_1[7] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 2, s_1_4, -1, -1, 0}, /* 5 */ { 2, s_1_5, -1, -1, 0}, /* 6 */ { 2, s_1_6, -1, -1, 0} }; static const symbol s_2_0[2] = { 'i', 'g' }; static const symbol s_2_1[3] = { 'l', 'i', 'g' }; static const symbol s_2_2[3] = { 'e', 'l', 's' }; static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; static const symbol s_2_4[4] = { 'l', 0xF6, 's', 't' }; static const struct among a_2[5] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 3, s_2_2, -1, 1, 0}, /* 3 */ { 5, s_2_3, -1, 3, 0}, /* 4 */ { 4, s_2_4, -1, 2, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; static const unsigned char g_s_ending[] = { 119, 127, 149 }; static const symbol s_0[] = { 'l', 0xF6, 's' }; static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 29 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) return 0; z->c = ret; /* hop, line 29 */ } z->I[1] = z->c; /* setmark x, line 29 */ z->c = c_test; } if (out_grouping(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ { /* gopast */ /* non v, line 30 */ int ret = in_grouping(z, g_v, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 30 */ /* try, line 31 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 37 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 37 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 37 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 37 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b(z, g_s_ending, 98, 121, 0)) return 0; { int ret = slice_del(z); /* delete, line 46 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int mlimit; /* setlimit, line 50 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 50 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ z->c = z->l - m2; z->ket = z->c; /* [, line 52 */ if (z->c <= z->lb) { z->lb = mlimit; return 0; } z->c--; /* next, line 52 */ z->bra = z->c; /* ], line 52 */ { int ret = slice_del(z); /* delete, line 52 */ if (ret < 0) return ret; } } z->lb = mlimit; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 55 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 55 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 56 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 56 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 57 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_0); /* <-, line 58 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } extern int swedish_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 66 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 66 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 67 */ { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 68 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 70 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } z->c = z->lb; return 1; } extern struct SN_env * swedish_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } extern void swedish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h000066400000000000000000000005101217574114600314130ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * swedish_ISO_8859_1_create_env(void); extern void swedish_ISO_8859_1_close_env(struct SN_env * z); extern int swedish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c000066400000000000000000001100011217574114600315420ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int romanian_ISO_8859_2_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_vowel_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_combo_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_step_0(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * romanian_ISO_8859_2_create_env(void); extern void romanian_ISO_8859_2_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 'I' }; static const symbol s_0_2[1] = { 'U' }; static const struct among a_0[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 2, 0} }; static const symbol s_1_0[2] = { 'e', 'a' }; static const symbol s_1_1[4] = { 'a', 0xFE, 'i', 'a' }; static const symbol s_1_2[3] = { 'a', 'u', 'a' }; static const symbol s_1_3[3] = { 'i', 'u', 'a' }; static const symbol s_1_4[4] = { 'a', 0xFE, 'i', 'e' }; static const symbol s_1_5[3] = { 'e', 'l', 'e' }; static const symbol s_1_6[3] = { 'i', 'l', 'e' }; static const symbol s_1_7[4] = { 'i', 'i', 'l', 'e' }; static const symbol s_1_8[3] = { 'i', 'e', 'i' }; static const symbol s_1_9[4] = { 'a', 't', 'e', 'i' }; static const symbol s_1_10[2] = { 'i', 'i' }; static const symbol s_1_11[4] = { 'u', 'l', 'u', 'i' }; static const symbol s_1_12[2] = { 'u', 'l' }; static const symbol s_1_13[4] = { 'e', 'l', 'o', 'r' }; static const symbol s_1_14[4] = { 'i', 'l', 'o', 'r' }; static const symbol s_1_15[5] = { 'i', 'i', 'l', 'o', 'r' }; static const struct among a_1[16] = { /* 0 */ { 2, s_1_0, -1, 3, 0}, /* 1 */ { 4, s_1_1, -1, 7, 0}, /* 2 */ { 3, s_1_2, -1, 2, 0}, /* 3 */ { 3, s_1_3, -1, 4, 0}, /* 4 */ { 4, s_1_4, -1, 7, 0}, /* 5 */ { 3, s_1_5, -1, 3, 0}, /* 6 */ { 3, s_1_6, -1, 5, 0}, /* 7 */ { 4, s_1_7, 6, 4, 0}, /* 8 */ { 3, s_1_8, -1, 4, 0}, /* 9 */ { 4, s_1_9, -1, 6, 0}, /* 10 */ { 2, s_1_10, -1, 4, 0}, /* 11 */ { 4, s_1_11, -1, 1, 0}, /* 12 */ { 2, s_1_12, -1, 1, 0}, /* 13 */ { 4, s_1_13, -1, 3, 0}, /* 14 */ { 4, s_1_14, -1, 4, 0}, /* 15 */ { 5, s_1_15, 14, 4, 0} }; static const symbol s_2_0[5] = { 'i', 'c', 'a', 'l', 'a' }; static const symbol s_2_1[5] = { 'i', 'c', 'i', 'v', 'a' }; static const symbol s_2_2[5] = { 'a', 't', 'i', 'v', 'a' }; static const symbol s_2_3[5] = { 'i', 't', 'i', 'v', 'a' }; static const symbol s_2_4[5] = { 'i', 'c', 'a', 'l', 'e' }; static const symbol s_2_5[6] = { 'a', 0xFE, 'i', 'u', 'n', 'e' }; static const symbol s_2_6[6] = { 'i', 0xFE, 'i', 'u', 'n', 'e' }; static const symbol s_2_7[6] = { 'a', 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_8[6] = { 'i', 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_9[6] = { 0xE3, 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_10[7] = { 'i', 'c', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_11[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_12[9] = { 'i', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_13[7] = { 'i', 'v', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_14[5] = { 'i', 'c', 'i', 'v', 'e' }; static const symbol s_2_15[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_2_16[5] = { 'i', 't', 'i', 'v', 'e' }; static const symbol s_2_17[5] = { 'i', 'c', 'a', 'l', 'i' }; static const symbol s_2_18[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_2_19[7] = { 'i', 'c', 'a', 't', 'o', 'r', 'i' }; static const symbol s_2_20[5] = { 'i', 't', 'o', 'r', 'i' }; static const symbol s_2_21[5] = { 0xE3, 't', 'o', 'r', 'i' }; static const symbol s_2_22[7] = { 'i', 'c', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_23[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_24[7] = { 'i', 'v', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_25[5] = { 'i', 'c', 'i', 'v', 'i' }; static const symbol s_2_26[5] = { 'a', 't', 'i', 'v', 'i' }; static const symbol s_2_27[5] = { 'i', 't', 'i', 'v', 'i' }; static const symbol s_2_28[6] = { 'i', 'c', 'i', 't', 0xE3, 'i' }; static const symbol s_2_29[8] = { 'a', 'b', 'i', 'l', 'i', 't', 0xE3, 'i' }; static const symbol s_2_30[6] = { 'i', 'v', 'i', 't', 0xE3, 'i' }; static const symbol s_2_31[7] = { 'i', 'c', 'i', 't', 0xE3, 0xFE, 'i' }; static const symbol s_2_32[9] = { 'a', 'b', 'i', 'l', 'i', 't', 0xE3, 0xFE, 'i' }; static const symbol s_2_33[7] = { 'i', 'v', 'i', 't', 0xE3, 0xFE, 'i' }; static const symbol s_2_34[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_2_35[4] = { 'a', 't', 'o', 'r' }; static const symbol s_2_36[6] = { 'i', 'c', 'a', 't', 'o', 'r' }; static const symbol s_2_37[4] = { 'i', 't', 'o', 'r' }; static const symbol s_2_38[4] = { 0xE3, 't', 'o', 'r' }; static const symbol s_2_39[4] = { 'i', 'c', 'i', 'v' }; static const symbol s_2_40[4] = { 'a', 't', 'i', 'v' }; static const symbol s_2_41[4] = { 'i', 't', 'i', 'v' }; static const symbol s_2_42[5] = { 'i', 'c', 'a', 'l', 0xE3 }; static const symbol s_2_43[5] = { 'i', 'c', 'i', 'v', 0xE3 }; static const symbol s_2_44[5] = { 'a', 't', 'i', 'v', 0xE3 }; static const symbol s_2_45[5] = { 'i', 't', 'i', 'v', 0xE3 }; static const struct among a_2[46] = { /* 0 */ { 5, s_2_0, -1, 4, 0}, /* 1 */ { 5, s_2_1, -1, 4, 0}, /* 2 */ { 5, s_2_2, -1, 5, 0}, /* 3 */ { 5, s_2_3, -1, 6, 0}, /* 4 */ { 5, s_2_4, -1, 4, 0}, /* 5 */ { 6, s_2_5, -1, 5, 0}, /* 6 */ { 6, s_2_6, -1, 6, 0}, /* 7 */ { 6, s_2_7, -1, 5, 0}, /* 8 */ { 6, s_2_8, -1, 6, 0}, /* 9 */ { 6, s_2_9, -1, 5, 0}, /* 10 */ { 7, s_2_10, -1, 4, 0}, /* 11 */ { 9, s_2_11, -1, 1, 0}, /* 12 */ { 9, s_2_12, -1, 2, 0}, /* 13 */ { 7, s_2_13, -1, 3, 0}, /* 14 */ { 5, s_2_14, -1, 4, 0}, /* 15 */ { 5, s_2_15, -1, 5, 0}, /* 16 */ { 5, s_2_16, -1, 6, 0}, /* 17 */ { 5, s_2_17, -1, 4, 0}, /* 18 */ { 5, s_2_18, -1, 5, 0}, /* 19 */ { 7, s_2_19, 18, 4, 0}, /* 20 */ { 5, s_2_20, -1, 6, 0}, /* 21 */ { 5, s_2_21, -1, 5, 0}, /* 22 */ { 7, s_2_22, -1, 4, 0}, /* 23 */ { 9, s_2_23, -1, 1, 0}, /* 24 */ { 7, s_2_24, -1, 3, 0}, /* 25 */ { 5, s_2_25, -1, 4, 0}, /* 26 */ { 5, s_2_26, -1, 5, 0}, /* 27 */ { 5, s_2_27, -1, 6, 0}, /* 28 */ { 6, s_2_28, -1, 4, 0}, /* 29 */ { 8, s_2_29, -1, 1, 0}, /* 30 */ { 6, s_2_30, -1, 3, 0}, /* 31 */ { 7, s_2_31, -1, 4, 0}, /* 32 */ { 9, s_2_32, -1, 1, 0}, /* 33 */ { 7, s_2_33, -1, 3, 0}, /* 34 */ { 4, s_2_34, -1, 4, 0}, /* 35 */ { 4, s_2_35, -1, 5, 0}, /* 36 */ { 6, s_2_36, 35, 4, 0}, /* 37 */ { 4, s_2_37, -1, 6, 0}, /* 38 */ { 4, s_2_38, -1, 5, 0}, /* 39 */ { 4, s_2_39, -1, 4, 0}, /* 40 */ { 4, s_2_40, -1, 5, 0}, /* 41 */ { 4, s_2_41, -1, 6, 0}, /* 42 */ { 5, s_2_42, -1, 4, 0}, /* 43 */ { 5, s_2_43, -1, 4, 0}, /* 44 */ { 5, s_2_44, -1, 5, 0}, /* 45 */ { 5, s_2_45, -1, 6, 0} }; static const symbol s_3_0[3] = { 'i', 'c', 'a' }; static const symbol s_3_1[5] = { 'a', 'b', 'i', 'l', 'a' }; static const symbol s_3_2[5] = { 'i', 'b', 'i', 'l', 'a' }; static const symbol s_3_3[4] = { 'o', 'a', 's', 'a' }; static const symbol s_3_4[3] = { 'a', 't', 'a' }; static const symbol s_3_5[3] = { 'i', 't', 'a' }; static const symbol s_3_6[4] = { 'a', 'n', 't', 'a' }; static const symbol s_3_7[4] = { 'i', 's', 't', 'a' }; static const symbol s_3_8[3] = { 'u', 't', 'a' }; static const symbol s_3_9[3] = { 'i', 'v', 'a' }; static const symbol s_3_10[2] = { 'i', 'c' }; static const symbol s_3_11[3] = { 'i', 'c', 'e' }; static const symbol s_3_12[5] = { 'a', 'b', 'i', 'l', 'e' }; static const symbol s_3_13[5] = { 'i', 'b', 'i', 'l', 'e' }; static const symbol s_3_14[4] = { 'i', 's', 'm', 'e' }; static const symbol s_3_15[4] = { 'i', 'u', 'n', 'e' }; static const symbol s_3_16[4] = { 'o', 'a', 's', 'e' }; static const symbol s_3_17[3] = { 'a', 't', 'e' }; static const symbol s_3_18[5] = { 'i', 't', 'a', 't', 'e' }; static const symbol s_3_19[3] = { 'i', 't', 'e' }; static const symbol s_3_20[4] = { 'a', 'n', 't', 'e' }; static const symbol s_3_21[4] = { 'i', 's', 't', 'e' }; static const symbol s_3_22[3] = { 'u', 't', 'e' }; static const symbol s_3_23[3] = { 'i', 'v', 'e' }; static const symbol s_3_24[3] = { 'i', 'c', 'i' }; static const symbol s_3_25[5] = { 'a', 'b', 'i', 'l', 'i' }; static const symbol s_3_26[5] = { 'i', 'b', 'i', 'l', 'i' }; static const symbol s_3_27[4] = { 'i', 'u', 'n', 'i' }; static const symbol s_3_28[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_3_29[3] = { 'o', 's', 'i' }; static const symbol s_3_30[3] = { 'a', 't', 'i' }; static const symbol s_3_31[5] = { 'i', 't', 'a', 't', 'i' }; static const symbol s_3_32[3] = { 'i', 't', 'i' }; static const symbol s_3_33[4] = { 'a', 'n', 't', 'i' }; static const symbol s_3_34[4] = { 'i', 's', 't', 'i' }; static const symbol s_3_35[3] = { 'u', 't', 'i' }; static const symbol s_3_36[4] = { 'i', 0xBA, 't', 'i' }; static const symbol s_3_37[3] = { 'i', 'v', 'i' }; static const symbol s_3_38[3] = { 'o', 0xBA, 'i' }; static const symbol s_3_39[4] = { 'i', 't', 0xE3, 'i' }; static const symbol s_3_40[5] = { 'i', 't', 0xE3, 0xFE, 'i' }; static const symbol s_3_41[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_3_42[4] = { 'i', 'b', 'i', 'l' }; static const symbol s_3_43[3] = { 'i', 's', 'm' }; static const symbol s_3_44[4] = { 'a', 't', 'o', 'r' }; static const symbol s_3_45[2] = { 'o', 's' }; static const symbol s_3_46[2] = { 'a', 't' }; static const symbol s_3_47[2] = { 'i', 't' }; static const symbol s_3_48[3] = { 'a', 'n', 't' }; static const symbol s_3_49[3] = { 'i', 's', 't' }; static const symbol s_3_50[2] = { 'u', 't' }; static const symbol s_3_51[2] = { 'i', 'v' }; static const symbol s_3_52[3] = { 'i', 'c', 0xE3 }; static const symbol s_3_53[5] = { 'a', 'b', 'i', 'l', 0xE3 }; static const symbol s_3_54[5] = { 'i', 'b', 'i', 'l', 0xE3 }; static const symbol s_3_55[4] = { 'o', 'a', 's', 0xE3 }; static const symbol s_3_56[3] = { 'a', 't', 0xE3 }; static const symbol s_3_57[3] = { 'i', 't', 0xE3 }; static const symbol s_3_58[4] = { 'a', 'n', 't', 0xE3 }; static const symbol s_3_59[4] = { 'i', 's', 't', 0xE3 }; static const symbol s_3_60[3] = { 'u', 't', 0xE3 }; static const symbol s_3_61[3] = { 'i', 'v', 0xE3 }; static const struct among a_3[62] = { /* 0 */ { 3, s_3_0, -1, 1, 0}, /* 1 */ { 5, s_3_1, -1, 1, 0}, /* 2 */ { 5, s_3_2, -1, 1, 0}, /* 3 */ { 4, s_3_3, -1, 1, 0}, /* 4 */ { 3, s_3_4, -1, 1, 0}, /* 5 */ { 3, s_3_5, -1, 1, 0}, /* 6 */ { 4, s_3_6, -1, 1, 0}, /* 7 */ { 4, s_3_7, -1, 3, 0}, /* 8 */ { 3, s_3_8, -1, 1, 0}, /* 9 */ { 3, s_3_9, -1, 1, 0}, /* 10 */ { 2, s_3_10, -1, 1, 0}, /* 11 */ { 3, s_3_11, -1, 1, 0}, /* 12 */ { 5, s_3_12, -1, 1, 0}, /* 13 */ { 5, s_3_13, -1, 1, 0}, /* 14 */ { 4, s_3_14, -1, 3, 0}, /* 15 */ { 4, s_3_15, -1, 2, 0}, /* 16 */ { 4, s_3_16, -1, 1, 0}, /* 17 */ { 3, s_3_17, -1, 1, 0}, /* 18 */ { 5, s_3_18, 17, 1, 0}, /* 19 */ { 3, s_3_19, -1, 1, 0}, /* 20 */ { 4, s_3_20, -1, 1, 0}, /* 21 */ { 4, s_3_21, -1, 3, 0}, /* 22 */ { 3, s_3_22, -1, 1, 0}, /* 23 */ { 3, s_3_23, -1, 1, 0}, /* 24 */ { 3, s_3_24, -1, 1, 0}, /* 25 */ { 5, s_3_25, -1, 1, 0}, /* 26 */ { 5, s_3_26, -1, 1, 0}, /* 27 */ { 4, s_3_27, -1, 2, 0}, /* 28 */ { 5, s_3_28, -1, 1, 0}, /* 29 */ { 3, s_3_29, -1, 1, 0}, /* 30 */ { 3, s_3_30, -1, 1, 0}, /* 31 */ { 5, s_3_31, 30, 1, 0}, /* 32 */ { 3, s_3_32, -1, 1, 0}, /* 33 */ { 4, s_3_33, -1, 1, 0}, /* 34 */ { 4, s_3_34, -1, 3, 0}, /* 35 */ { 3, s_3_35, -1, 1, 0}, /* 36 */ { 4, s_3_36, -1, 3, 0}, /* 37 */ { 3, s_3_37, -1, 1, 0}, /* 38 */ { 3, s_3_38, -1, 1, 0}, /* 39 */ { 4, s_3_39, -1, 1, 0}, /* 40 */ { 5, s_3_40, -1, 1, 0}, /* 41 */ { 4, s_3_41, -1, 1, 0}, /* 42 */ { 4, s_3_42, -1, 1, 0}, /* 43 */ { 3, s_3_43, -1, 3, 0}, /* 44 */ { 4, s_3_44, -1, 1, 0}, /* 45 */ { 2, s_3_45, -1, 1, 0}, /* 46 */ { 2, s_3_46, -1, 1, 0}, /* 47 */ { 2, s_3_47, -1, 1, 0}, /* 48 */ { 3, s_3_48, -1, 1, 0}, /* 49 */ { 3, s_3_49, -1, 3, 0}, /* 50 */ { 2, s_3_50, -1, 1, 0}, /* 51 */ { 2, s_3_51, -1, 1, 0}, /* 52 */ { 3, s_3_52, -1, 1, 0}, /* 53 */ { 5, s_3_53, -1, 1, 0}, /* 54 */ { 5, s_3_54, -1, 1, 0}, /* 55 */ { 4, s_3_55, -1, 1, 0}, /* 56 */ { 3, s_3_56, -1, 1, 0}, /* 57 */ { 3, s_3_57, -1, 1, 0}, /* 58 */ { 4, s_3_58, -1, 1, 0}, /* 59 */ { 4, s_3_59, -1, 3, 0}, /* 60 */ { 3, s_3_60, -1, 1, 0}, /* 61 */ { 3, s_3_61, -1, 1, 0} }; static const symbol s_4_0[2] = { 'e', 'a' }; static const symbol s_4_1[2] = { 'i', 'a' }; static const symbol s_4_2[3] = { 'e', 's', 'c' }; static const symbol s_4_3[3] = { 0xE3, 's', 'c' }; static const symbol s_4_4[3] = { 'i', 'n', 'd' }; static const symbol s_4_5[3] = { 0xE2, 'n', 'd' }; static const symbol s_4_6[3] = { 'a', 'r', 'e' }; static const symbol s_4_7[3] = { 'e', 'r', 'e' }; static const symbol s_4_8[3] = { 'i', 'r', 'e' }; static const symbol s_4_9[3] = { 0xE2, 'r', 'e' }; static const symbol s_4_10[2] = { 's', 'e' }; static const symbol s_4_11[3] = { 'a', 's', 'e' }; static const symbol s_4_12[4] = { 's', 'e', 's', 'e' }; static const symbol s_4_13[3] = { 'i', 's', 'e' }; static const symbol s_4_14[3] = { 'u', 's', 'e' }; static const symbol s_4_15[3] = { 0xE2, 's', 'e' }; static const symbol s_4_16[4] = { 'e', 0xBA, 't', 'e' }; static const symbol s_4_17[4] = { 0xE3, 0xBA, 't', 'e' }; static const symbol s_4_18[3] = { 'e', 'z', 'e' }; static const symbol s_4_19[2] = { 'a', 'i' }; static const symbol s_4_20[3] = { 'e', 'a', 'i' }; static const symbol s_4_21[3] = { 'i', 'a', 'i' }; static const symbol s_4_22[3] = { 's', 'e', 'i' }; static const symbol s_4_23[4] = { 'e', 0xBA, 't', 'i' }; static const symbol s_4_24[4] = { 0xE3, 0xBA, 't', 'i' }; static const symbol s_4_25[2] = { 'u', 'i' }; static const symbol s_4_26[3] = { 'e', 'z', 'i' }; static const symbol s_4_27[3] = { 'a', 0xBA, 'i' }; static const symbol s_4_28[4] = { 's', 'e', 0xBA, 'i' }; static const symbol s_4_29[5] = { 'a', 's', 'e', 0xBA, 'i' }; static const symbol s_4_30[6] = { 's', 'e', 's', 'e', 0xBA, 'i' }; static const symbol s_4_31[5] = { 'i', 's', 'e', 0xBA, 'i' }; static const symbol s_4_32[5] = { 'u', 's', 'e', 0xBA, 'i' }; static const symbol s_4_33[5] = { 0xE2, 's', 'e', 0xBA, 'i' }; static const symbol s_4_34[3] = { 'i', 0xBA, 'i' }; static const symbol s_4_35[3] = { 'u', 0xBA, 'i' }; static const symbol s_4_36[3] = { 0xE2, 0xBA, 'i' }; static const symbol s_4_37[2] = { 0xE2, 'i' }; static const symbol s_4_38[3] = { 'a', 0xFE, 'i' }; static const symbol s_4_39[4] = { 'e', 'a', 0xFE, 'i' }; static const symbol s_4_40[4] = { 'i', 'a', 0xFE, 'i' }; static const symbol s_4_41[3] = { 'e', 0xFE, 'i' }; static const symbol s_4_42[3] = { 'i', 0xFE, 'i' }; static const symbol s_4_43[3] = { 0xE2, 0xFE, 'i' }; static const symbol s_4_44[5] = { 'a', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_45[6] = { 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_46[7] = { 'a', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_47[8] = { 's', 'e', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_48[7] = { 'i', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_49[7] = { 'u', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_50[7] = { 0xE2, 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_51[5] = { 'i', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_52[5] = { 'u', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_53[5] = { 0xE2, 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_54[2] = { 'a', 'm' }; static const symbol s_4_55[3] = { 'e', 'a', 'm' }; static const symbol s_4_56[3] = { 'i', 'a', 'm' }; static const symbol s_4_57[2] = { 'e', 'm' }; static const symbol s_4_58[4] = { 'a', 's', 'e', 'm' }; static const symbol s_4_59[5] = { 's', 'e', 's', 'e', 'm' }; static const symbol s_4_60[4] = { 'i', 's', 'e', 'm' }; static const symbol s_4_61[4] = { 'u', 's', 'e', 'm' }; static const symbol s_4_62[4] = { 0xE2, 's', 'e', 'm' }; static const symbol s_4_63[2] = { 'i', 'm' }; static const symbol s_4_64[2] = { 0xE2, 'm' }; static const symbol s_4_65[2] = { 0xE3, 'm' }; static const symbol s_4_66[4] = { 'a', 'r', 0xE3, 'm' }; static const symbol s_4_67[5] = { 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_68[6] = { 'a', 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_69[7] = { 's', 'e', 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_70[6] = { 'i', 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_71[6] = { 'u', 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_72[6] = { 0xE2, 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_73[4] = { 'i', 'r', 0xE3, 'm' }; static const symbol s_4_74[4] = { 'u', 'r', 0xE3, 'm' }; static const symbol s_4_75[4] = { 0xE2, 'r', 0xE3, 'm' }; static const symbol s_4_76[2] = { 'a', 'u' }; static const symbol s_4_77[3] = { 'e', 'a', 'u' }; static const symbol s_4_78[3] = { 'i', 'a', 'u' }; static const symbol s_4_79[4] = { 'i', 'n', 'd', 'u' }; static const symbol s_4_80[4] = { 0xE2, 'n', 'd', 'u' }; static const symbol s_4_81[2] = { 'e', 'z' }; static const symbol s_4_82[5] = { 'e', 'a', 's', 'c', 0xE3 }; static const symbol s_4_83[3] = { 'a', 'r', 0xE3 }; static const symbol s_4_84[4] = { 's', 'e', 'r', 0xE3 }; static const symbol s_4_85[5] = { 'a', 's', 'e', 'r', 0xE3 }; static const symbol s_4_86[6] = { 's', 'e', 's', 'e', 'r', 0xE3 }; static const symbol s_4_87[5] = { 'i', 's', 'e', 'r', 0xE3 }; static const symbol s_4_88[5] = { 'u', 's', 'e', 'r', 0xE3 }; static const symbol s_4_89[5] = { 0xE2, 's', 'e', 'r', 0xE3 }; static const symbol s_4_90[3] = { 'i', 'r', 0xE3 }; static const symbol s_4_91[3] = { 'u', 'r', 0xE3 }; static const symbol s_4_92[3] = { 0xE2, 'r', 0xE3 }; static const symbol s_4_93[4] = { 'e', 'a', 'z', 0xE3 }; static const struct among a_4[94] = { /* 0 */ { 2, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 1, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 3, s_4_3, -1, 1, 0}, /* 4 */ { 3, s_4_4, -1, 1, 0}, /* 5 */ { 3, s_4_5, -1, 1, 0}, /* 6 */ { 3, s_4_6, -1, 1, 0}, /* 7 */ { 3, s_4_7, -1, 1, 0}, /* 8 */ { 3, s_4_8, -1, 1, 0}, /* 9 */ { 3, s_4_9, -1, 1, 0}, /* 10 */ { 2, s_4_10, -1, 2, 0}, /* 11 */ { 3, s_4_11, 10, 1, 0}, /* 12 */ { 4, s_4_12, 10, 2, 0}, /* 13 */ { 3, s_4_13, 10, 1, 0}, /* 14 */ { 3, s_4_14, 10, 1, 0}, /* 15 */ { 3, s_4_15, 10, 1, 0}, /* 16 */ { 4, s_4_16, -1, 1, 0}, /* 17 */ { 4, s_4_17, -1, 1, 0}, /* 18 */ { 3, s_4_18, -1, 1, 0}, /* 19 */ { 2, s_4_19, -1, 1, 0}, /* 20 */ { 3, s_4_20, 19, 1, 0}, /* 21 */ { 3, s_4_21, 19, 1, 0}, /* 22 */ { 3, s_4_22, -1, 2, 0}, /* 23 */ { 4, s_4_23, -1, 1, 0}, /* 24 */ { 4, s_4_24, -1, 1, 0}, /* 25 */ { 2, s_4_25, -1, 1, 0}, /* 26 */ { 3, s_4_26, -1, 1, 0}, /* 27 */ { 3, s_4_27, -1, 1, 0}, /* 28 */ { 4, s_4_28, -1, 2, 0}, /* 29 */ { 5, s_4_29, 28, 1, 0}, /* 30 */ { 6, s_4_30, 28, 2, 0}, /* 31 */ { 5, s_4_31, 28, 1, 0}, /* 32 */ { 5, s_4_32, 28, 1, 0}, /* 33 */ { 5, s_4_33, 28, 1, 0}, /* 34 */ { 3, s_4_34, -1, 1, 0}, /* 35 */ { 3, s_4_35, -1, 1, 0}, /* 36 */ { 3, s_4_36, -1, 1, 0}, /* 37 */ { 2, s_4_37, -1, 1, 0}, /* 38 */ { 3, s_4_38, -1, 2, 0}, /* 39 */ { 4, s_4_39, 38, 1, 0}, /* 40 */ { 4, s_4_40, 38, 1, 0}, /* 41 */ { 3, s_4_41, -1, 2, 0}, /* 42 */ { 3, s_4_42, -1, 2, 0}, /* 43 */ { 3, s_4_43, -1, 2, 0}, /* 44 */ { 5, s_4_44, -1, 1, 0}, /* 45 */ { 6, s_4_45, -1, 2, 0}, /* 46 */ { 7, s_4_46, 45, 1, 0}, /* 47 */ { 8, s_4_47, 45, 2, 0}, /* 48 */ { 7, s_4_48, 45, 1, 0}, /* 49 */ { 7, s_4_49, 45, 1, 0}, /* 50 */ { 7, s_4_50, 45, 1, 0}, /* 51 */ { 5, s_4_51, -1, 1, 0}, /* 52 */ { 5, s_4_52, -1, 1, 0}, /* 53 */ { 5, s_4_53, -1, 1, 0}, /* 54 */ { 2, s_4_54, -1, 1, 0}, /* 55 */ { 3, s_4_55, 54, 1, 0}, /* 56 */ { 3, s_4_56, 54, 1, 0}, /* 57 */ { 2, s_4_57, -1, 2, 0}, /* 58 */ { 4, s_4_58, 57, 1, 0}, /* 59 */ { 5, s_4_59, 57, 2, 0}, /* 60 */ { 4, s_4_60, 57, 1, 0}, /* 61 */ { 4, s_4_61, 57, 1, 0}, /* 62 */ { 4, s_4_62, 57, 1, 0}, /* 63 */ { 2, s_4_63, -1, 2, 0}, /* 64 */ { 2, s_4_64, -1, 2, 0}, /* 65 */ { 2, s_4_65, -1, 2, 0}, /* 66 */ { 4, s_4_66, 65, 1, 0}, /* 67 */ { 5, s_4_67, 65, 2, 0}, /* 68 */ { 6, s_4_68, 67, 1, 0}, /* 69 */ { 7, s_4_69, 67, 2, 0}, /* 70 */ { 6, s_4_70, 67, 1, 0}, /* 71 */ { 6, s_4_71, 67, 1, 0}, /* 72 */ { 6, s_4_72, 67, 1, 0}, /* 73 */ { 4, s_4_73, 65, 1, 0}, /* 74 */ { 4, s_4_74, 65, 1, 0}, /* 75 */ { 4, s_4_75, 65, 1, 0}, /* 76 */ { 2, s_4_76, -1, 1, 0}, /* 77 */ { 3, s_4_77, 76, 1, 0}, /* 78 */ { 3, s_4_78, 76, 1, 0}, /* 79 */ { 4, s_4_79, -1, 1, 0}, /* 80 */ { 4, s_4_80, -1, 1, 0}, /* 81 */ { 2, s_4_81, -1, 1, 0}, /* 82 */ { 5, s_4_82, -1, 1, 0}, /* 83 */ { 3, s_4_83, -1, 1, 0}, /* 84 */ { 4, s_4_84, -1, 2, 0}, /* 85 */ { 5, s_4_85, 84, 1, 0}, /* 86 */ { 6, s_4_86, 84, 2, 0}, /* 87 */ { 5, s_4_87, 84, 1, 0}, /* 88 */ { 5, s_4_88, 84, 1, 0}, /* 89 */ { 5, s_4_89, 84, 1, 0}, /* 90 */ { 3, s_4_90, -1, 1, 0}, /* 91 */ { 3, s_4_91, -1, 1, 0}, /* 92 */ { 3, s_4_92, -1, 1, 0}, /* 93 */ { 4, s_4_93, -1, 1, 0} }; static const symbol s_5_0[1] = { 'a' }; static const symbol s_5_1[1] = { 'e' }; static const symbol s_5_2[2] = { 'i', 'e' }; static const symbol s_5_3[1] = { 'i' }; static const symbol s_5_4[1] = { 0xE3 }; static const struct among a_5[5] = { /* 0 */ { 1, s_5_0, -1, 1, 0}, /* 1 */ { 1, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, 1, 1, 0}, /* 3 */ { 1, s_5_3, -1, 1, 0}, /* 4 */ { 1, s_5_4, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 32 }; static const symbol s_0[] = { 'u' }; static const symbol s_1[] = { 'U' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'I' }; static const symbol s_4[] = { 'i' }; static const symbol s_5[] = { 'u' }; static const symbol s_6[] = { 'a' }; static const symbol s_7[] = { 'e' }; static const symbol s_8[] = { 'i' }; static const symbol s_9[] = { 'a', 'b' }; static const symbol s_10[] = { 'i' }; static const symbol s_11[] = { 'a', 't' }; static const symbol s_12[] = { 'a', 0xFE, 'i' }; static const symbol s_13[] = { 'a', 'b', 'i', 'l' }; static const symbol s_14[] = { 'i', 'b', 'i', 'l' }; static const symbol s_15[] = { 'i', 'v' }; static const symbol s_16[] = { 'i', 'c' }; static const symbol s_17[] = { 'a', 't' }; static const symbol s_18[] = { 'i', 't' }; static const symbol s_19[] = { 0xFE }; static const symbol s_20[] = { 't' }; static const symbol s_21[] = { 'i', 's', 't' }; static const symbol s_22[] = { 'u' }; static int r_prelude(struct SN_env * z) { while(1) { /* repeat, line 32 */ int c1 = z->c; while(1) { /* goto, line 32 */ int c2 = z->c; if (in_grouping(z, g_v, 97, 238, 0)) goto lab1; z->bra = z->c; /* [, line 33 */ { int c3 = z->c; /* or, line 33 */ if (!(eq_s(z, 1, s_0))) goto lab3; z->ket = z->c; /* ], line 33 */ if (in_grouping(z, g_v, 97, 238, 0)) goto lab3; { int ret = slice_from_s(z, 1, s_1); /* <-, line 33 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = c3; if (!(eq_s(z, 1, s_2))) goto lab1; z->ket = z->c; /* ], line 34 */ if (in_grouping(z, g_v, 97, 238, 0)) goto lab1; { int ret = slice_from_s(z, 1, s_3); /* <-, line 34 */ if (ret < 0) return ret; } } lab2: z->c = c2; break; lab1: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* goto, line 32 */ } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 44 */ { int c2 = z->c; /* or, line 46 */ if (in_grouping(z, g_v, 97, 238, 0)) goto lab2; { int c3 = z->c; /* or, line 45 */ if (out_grouping(z, g_v, 97, 238, 0)) goto lab4; { /* gopast */ /* grouping v, line 45 */ int ret = out_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping(z, g_v, 97, 238, 0)) goto lab2; { /* gopast */ /* non v, line 45 */ int ret = in_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping(z, g_v, 97, 238, 0)) goto lab0; { int c4 = z->c; /* or, line 47 */ if (out_grouping(z, g_v, 97, 238, 0)) goto lab6; { /* gopast */ /* grouping v, line 47 */ int ret = out_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping(z, g_v, 97, 238, 0)) goto lab0; if (z->c >= z->l) goto lab0; z->c++; /* next, line 47 */ } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 48 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 50 */ { /* gopast */ /* grouping v, line 51 */ int ret = out_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 51 */ int ret = in_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 51 */ { /* gopast */ /* grouping v, line 52 */ int ret = out_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 52 */ int ret = in_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 52 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 56 */ int c1 = z->c; z->bra = z->c; /* [, line 58 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else among_var = find_among(z, a_0, 3); /* substring, line 58 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 58 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_4); /* <-, line 59 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_5); /* <-, line 60 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 61 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_step_0(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 73 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((266786 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_1, 16); /* substring, line 73 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 73 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 73 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 75 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_6); /* <-, line 77 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_7); /* <-, line 79 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_8); /* <-, line 81 */ if (ret < 0) return ret; } break; case 5: { int m1 = z->l - z->c; (void)m1; /* not, line 83 */ if (!(eq_s_b(z, 2, s_9))) goto lab0; return 0; lab0: z->c = z->l - m1; } { int ret = slice_from_s(z, 1, s_10); /* <-, line 83 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_11); /* <-, line 85 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_12); /* <-, line 87 */ if (ret < 0) return ret; } break; } return 1; } static int r_combo_suffix(struct SN_env * z) { int among_var; { int m_test = z->l - z->c; /* test, line 91 */ z->ket = z->c; /* [, line 92 */ among_var = find_among_b(z, a_2, 46); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 92 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_13); /* <-, line 101 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_14); /* <-, line 104 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_15); /* <-, line 107 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_16); /* <-, line 113 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 2, s_17); /* <-, line 118 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_18); /* <-, line 122 */ if (ret < 0) return ret; } break; } z->B[0] = 1; /* set standard_suffix_removed, line 125 */ z->c = z->l - m_test; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->B[0] = 0; /* unset standard_suffix_removed, line 130 */ while(1) { /* repeat, line 131 */ int m1 = z->l - z->c; (void)m1; { int ret = r_combo_suffix(z); if (ret == 0) goto lab0; /* call combo_suffix, line 131 */ if (ret < 0) return ret; } continue; lab0: z->c = z->l - m1; break; } z->ket = z->c; /* [, line 132 */ among_var = find_among_b(z, a_3, 62); /* substring, line 132 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 132 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 132 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 149 */ if (ret < 0) return ret; } break; case 2: if (!(eq_s_b(z, 1, s_19))) return 0; z->bra = z->c; /* ], line 152 */ { int ret = slice_from_s(z, 1, s_20); /* <-, line 152 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 3, s_21); /* <-, line 156 */ if (ret < 0) return ret; } break; } z->B[0] = 1; /* set standard_suffix_removed, line 160 */ return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 164 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 164 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 165 */ among_var = find_among_b(z, a_4, 94); /* substring, line 165 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 165 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int m2 = z->l - z->c; (void)m2; /* or, line 200 */ if (out_grouping_b(z, g_v, 97, 238, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_22))) { z->lb = mlimit; return 0; } } lab0: { int ret = slice_del(z); /* delete, line 200 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 214 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_vowel_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 219 */ among_var = find_among_b(z, a_5, 5); /* substring, line 219 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 219 */ { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 219 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 220 */ if (ret < 0) return ret; } break; } return 1; } extern int romanian_ISO_8859_2_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 226 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 226 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 227 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 227 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 228 */ { int m3 = z->l - z->c; (void)m3; /* do, line 229 */ { int ret = r_step_0(z); if (ret == 0) goto lab2; /* call step_0, line 229 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 230 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab3; /* call standard_suffix, line 230 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 231 */ { int m6 = z->l - z->c; (void)m6; /* or, line 231 */ if (!(z->B[0])) goto lab6; /* Boolean test standard_suffix_removed, line 231 */ goto lab5; lab6: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ if (ret < 0) return ret; } } lab5: lab4: z->c = z->l - m5; } { int m7 = z->l - z->c; (void)m7; /* do, line 232 */ { int ret = r_vowel_suffix(z); if (ret == 0) goto lab7; /* call vowel_suffix, line 232 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } z->c = z->lb; { int c8 = z->c; /* do, line 234 */ { int ret = r_postlude(z); if (ret == 0) goto lab8; /* call postlude, line 234 */ if (ret < 0) return ret; } lab8: z->c = c8; } return 1; } extern struct SN_env * romanian_ISO_8859_2_create_env(void) { return SN_create_env(0, 3, 1); } extern void romanian_ISO_8859_2_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h000066400000000000000000000005131217574114600315550ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * romanian_ISO_8859_2_create_env(void); extern void romanian_ISO_8859_2_close_env(struct SN_env * z); extern int romanian_ISO_8859_2_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_KOI8_R_russian.c000066400000000000000000000572021217574114600310620ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int russian_KOI8_R_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_tidy_up(struct SN_env * z); static int r_derivational(struct SN_env * z); static int r_noun(struct SN_env * z); static int r_verb(struct SN_env * z); static int r_reflexive(struct SN_env * z); static int r_adjectival(struct SN_env * z); static int r_adjective(struct SN_env * z); static int r_perfective_gerund(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * russian_KOI8_R_create_env(void); extern void russian_KOI8_R_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 0xD7, 0xDB, 0xC9 }; static const symbol s_0_1[4] = { 0xC9, 0xD7, 0xDB, 0xC9 }; static const symbol s_0_2[4] = { 0xD9, 0xD7, 0xDB, 0xC9 }; static const symbol s_0_3[1] = { 0xD7 }; static const symbol s_0_4[2] = { 0xC9, 0xD7 }; static const symbol s_0_5[2] = { 0xD9, 0xD7 }; static const symbol s_0_6[5] = { 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 }; static const symbol s_0_7[6] = { 0xC9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 }; static const symbol s_0_8[6] = { 0xD9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 }; static const struct among a_0[9] = { /* 0 */ { 3, s_0_0, -1, 1, 0}, /* 1 */ { 4, s_0_1, 0, 2, 0}, /* 2 */ { 4, s_0_2, 0, 2, 0}, /* 3 */ { 1, s_0_3, -1, 1, 0}, /* 4 */ { 2, s_0_4, 3, 2, 0}, /* 5 */ { 2, s_0_5, 3, 2, 0}, /* 6 */ { 5, s_0_6, -1, 1, 0}, /* 7 */ { 6, s_0_7, 6, 2, 0}, /* 8 */ { 6, s_0_8, 6, 2, 0} }; static const symbol s_1_0[2] = { 0xC0, 0xC0 }; static const symbol s_1_1[2] = { 0xC5, 0xC0 }; static const symbol s_1_2[2] = { 0xCF, 0xC0 }; static const symbol s_1_3[2] = { 0xD5, 0xC0 }; static const symbol s_1_4[2] = { 0xC5, 0xC5 }; static const symbol s_1_5[2] = { 0xC9, 0xC5 }; static const symbol s_1_6[2] = { 0xCF, 0xC5 }; static const symbol s_1_7[2] = { 0xD9, 0xC5 }; static const symbol s_1_8[2] = { 0xC9, 0xC8 }; static const symbol s_1_9[2] = { 0xD9, 0xC8 }; static const symbol s_1_10[3] = { 0xC9, 0xCD, 0xC9 }; static const symbol s_1_11[3] = { 0xD9, 0xCD, 0xC9 }; static const symbol s_1_12[2] = { 0xC5, 0xCA }; static const symbol s_1_13[2] = { 0xC9, 0xCA }; static const symbol s_1_14[2] = { 0xCF, 0xCA }; static const symbol s_1_15[2] = { 0xD9, 0xCA }; static const symbol s_1_16[2] = { 0xC5, 0xCD }; static const symbol s_1_17[2] = { 0xC9, 0xCD }; static const symbol s_1_18[2] = { 0xCF, 0xCD }; static const symbol s_1_19[2] = { 0xD9, 0xCD }; static const symbol s_1_20[3] = { 0xC5, 0xC7, 0xCF }; static const symbol s_1_21[3] = { 0xCF, 0xC7, 0xCF }; static const symbol s_1_22[2] = { 0xC1, 0xD1 }; static const symbol s_1_23[2] = { 0xD1, 0xD1 }; static const symbol s_1_24[3] = { 0xC5, 0xCD, 0xD5 }; static const symbol s_1_25[3] = { 0xCF, 0xCD, 0xD5 }; static const struct among a_1[26] = { /* 0 */ { 2, s_1_0, -1, 1, 0}, /* 1 */ { 2, s_1_1, -1, 1, 0}, /* 2 */ { 2, s_1_2, -1, 1, 0}, /* 3 */ { 2, s_1_3, -1, 1, 0}, /* 4 */ { 2, s_1_4, -1, 1, 0}, /* 5 */ { 2, s_1_5, -1, 1, 0}, /* 6 */ { 2, s_1_6, -1, 1, 0}, /* 7 */ { 2, s_1_7, -1, 1, 0}, /* 8 */ { 2, s_1_8, -1, 1, 0}, /* 9 */ { 2, s_1_9, -1, 1, 0}, /* 10 */ { 3, s_1_10, -1, 1, 0}, /* 11 */ { 3, s_1_11, -1, 1, 0}, /* 12 */ { 2, s_1_12, -1, 1, 0}, /* 13 */ { 2, s_1_13, -1, 1, 0}, /* 14 */ { 2, s_1_14, -1, 1, 0}, /* 15 */ { 2, s_1_15, -1, 1, 0}, /* 16 */ { 2, s_1_16, -1, 1, 0}, /* 17 */ { 2, s_1_17, -1, 1, 0}, /* 18 */ { 2, s_1_18, -1, 1, 0}, /* 19 */ { 2, s_1_19, -1, 1, 0}, /* 20 */ { 3, s_1_20, -1, 1, 0}, /* 21 */ { 3, s_1_21, -1, 1, 0}, /* 22 */ { 2, s_1_22, -1, 1, 0}, /* 23 */ { 2, s_1_23, -1, 1, 0}, /* 24 */ { 3, s_1_24, -1, 1, 0}, /* 25 */ { 3, s_1_25, -1, 1, 0} }; static const symbol s_2_0[2] = { 0xC5, 0xCD }; static const symbol s_2_1[2] = { 0xCE, 0xCE }; static const symbol s_2_2[2] = { 0xD7, 0xDB }; static const symbol s_2_3[3] = { 0xC9, 0xD7, 0xDB }; static const symbol s_2_4[3] = { 0xD9, 0xD7, 0xDB }; static const symbol s_2_5[1] = { 0xDD }; static const symbol s_2_6[2] = { 0xC0, 0xDD }; static const symbol s_2_7[3] = { 0xD5, 0xC0, 0xDD }; static const struct among a_2[8] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 2, s_2_1, -1, 1, 0}, /* 2 */ { 2, s_2_2, -1, 1, 0}, /* 3 */ { 3, s_2_3, 2, 2, 0}, /* 4 */ { 3, s_2_4, 2, 2, 0}, /* 5 */ { 1, s_2_5, -1, 1, 0}, /* 6 */ { 2, s_2_6, 5, 1, 0}, /* 7 */ { 3, s_2_7, 6, 2, 0} }; static const symbol s_3_0[2] = { 0xD3, 0xD1 }; static const symbol s_3_1[2] = { 0xD3, 0xD8 }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 2, s_3_1, -1, 1, 0} }; static const symbol s_4_0[1] = { 0xC0 }; static const symbol s_4_1[2] = { 0xD5, 0xC0 }; static const symbol s_4_2[2] = { 0xCC, 0xC1 }; static const symbol s_4_3[3] = { 0xC9, 0xCC, 0xC1 }; static const symbol s_4_4[3] = { 0xD9, 0xCC, 0xC1 }; static const symbol s_4_5[2] = { 0xCE, 0xC1 }; static const symbol s_4_6[3] = { 0xC5, 0xCE, 0xC1 }; static const symbol s_4_7[3] = { 0xC5, 0xD4, 0xC5 }; static const symbol s_4_8[3] = { 0xC9, 0xD4, 0xC5 }; static const symbol s_4_9[3] = { 0xCA, 0xD4, 0xC5 }; static const symbol s_4_10[4] = { 0xC5, 0xCA, 0xD4, 0xC5 }; static const symbol s_4_11[4] = { 0xD5, 0xCA, 0xD4, 0xC5 }; static const symbol s_4_12[2] = { 0xCC, 0xC9 }; static const symbol s_4_13[3] = { 0xC9, 0xCC, 0xC9 }; static const symbol s_4_14[3] = { 0xD9, 0xCC, 0xC9 }; static const symbol s_4_15[1] = { 0xCA }; static const symbol s_4_16[2] = { 0xC5, 0xCA }; static const symbol s_4_17[2] = { 0xD5, 0xCA }; static const symbol s_4_18[1] = { 0xCC }; static const symbol s_4_19[2] = { 0xC9, 0xCC }; static const symbol s_4_20[2] = { 0xD9, 0xCC }; static const symbol s_4_21[2] = { 0xC5, 0xCD }; static const symbol s_4_22[2] = { 0xC9, 0xCD }; static const symbol s_4_23[2] = { 0xD9, 0xCD }; static const symbol s_4_24[1] = { 0xCE }; static const symbol s_4_25[2] = { 0xC5, 0xCE }; static const symbol s_4_26[2] = { 0xCC, 0xCF }; static const symbol s_4_27[3] = { 0xC9, 0xCC, 0xCF }; static const symbol s_4_28[3] = { 0xD9, 0xCC, 0xCF }; static const symbol s_4_29[2] = { 0xCE, 0xCF }; static const symbol s_4_30[3] = { 0xC5, 0xCE, 0xCF }; static const symbol s_4_31[3] = { 0xCE, 0xCE, 0xCF }; static const symbol s_4_32[2] = { 0xC0, 0xD4 }; static const symbol s_4_33[3] = { 0xD5, 0xC0, 0xD4 }; static const symbol s_4_34[2] = { 0xC5, 0xD4 }; static const symbol s_4_35[3] = { 0xD5, 0xC5, 0xD4 }; static const symbol s_4_36[2] = { 0xC9, 0xD4 }; static const symbol s_4_37[2] = { 0xD1, 0xD4 }; static const symbol s_4_38[2] = { 0xD9, 0xD4 }; static const symbol s_4_39[2] = { 0xD4, 0xD8 }; static const symbol s_4_40[3] = { 0xC9, 0xD4, 0xD8 }; static const symbol s_4_41[3] = { 0xD9, 0xD4, 0xD8 }; static const symbol s_4_42[3] = { 0xC5, 0xDB, 0xD8 }; static const symbol s_4_43[3] = { 0xC9, 0xDB, 0xD8 }; static const symbol s_4_44[2] = { 0xCE, 0xD9 }; static const symbol s_4_45[3] = { 0xC5, 0xCE, 0xD9 }; static const struct among a_4[46] = { /* 0 */ { 1, s_4_0, -1, 2, 0}, /* 1 */ { 2, s_4_1, 0, 2, 0}, /* 2 */ { 2, s_4_2, -1, 1, 0}, /* 3 */ { 3, s_4_3, 2, 2, 0}, /* 4 */ { 3, s_4_4, 2, 2, 0}, /* 5 */ { 2, s_4_5, -1, 1, 0}, /* 6 */ { 3, s_4_6, 5, 2, 0}, /* 7 */ { 3, s_4_7, -1, 1, 0}, /* 8 */ { 3, s_4_8, -1, 2, 0}, /* 9 */ { 3, s_4_9, -1, 1, 0}, /* 10 */ { 4, s_4_10, 9, 2, 0}, /* 11 */ { 4, s_4_11, 9, 2, 0}, /* 12 */ { 2, s_4_12, -1, 1, 0}, /* 13 */ { 3, s_4_13, 12, 2, 0}, /* 14 */ { 3, s_4_14, 12, 2, 0}, /* 15 */ { 1, s_4_15, -1, 1, 0}, /* 16 */ { 2, s_4_16, 15, 2, 0}, /* 17 */ { 2, s_4_17, 15, 2, 0}, /* 18 */ { 1, s_4_18, -1, 1, 0}, /* 19 */ { 2, s_4_19, 18, 2, 0}, /* 20 */ { 2, s_4_20, 18, 2, 0}, /* 21 */ { 2, s_4_21, -1, 1, 0}, /* 22 */ { 2, s_4_22, -1, 2, 0}, /* 23 */ { 2, s_4_23, -1, 2, 0}, /* 24 */ { 1, s_4_24, -1, 1, 0}, /* 25 */ { 2, s_4_25, 24, 2, 0}, /* 26 */ { 2, s_4_26, -1, 1, 0}, /* 27 */ { 3, s_4_27, 26, 2, 0}, /* 28 */ { 3, s_4_28, 26, 2, 0}, /* 29 */ { 2, s_4_29, -1, 1, 0}, /* 30 */ { 3, s_4_30, 29, 2, 0}, /* 31 */ { 3, s_4_31, 29, 1, 0}, /* 32 */ { 2, s_4_32, -1, 1, 0}, /* 33 */ { 3, s_4_33, 32, 2, 0}, /* 34 */ { 2, s_4_34, -1, 1, 0}, /* 35 */ { 3, s_4_35, 34, 2, 0}, /* 36 */ { 2, s_4_36, -1, 2, 0}, /* 37 */ { 2, s_4_37, -1, 2, 0}, /* 38 */ { 2, s_4_38, -1, 2, 0}, /* 39 */ { 2, s_4_39, -1, 1, 0}, /* 40 */ { 3, s_4_40, 39, 2, 0}, /* 41 */ { 3, s_4_41, 39, 2, 0}, /* 42 */ { 3, s_4_42, -1, 1, 0}, /* 43 */ { 3, s_4_43, -1, 2, 0}, /* 44 */ { 2, s_4_44, -1, 1, 0}, /* 45 */ { 3, s_4_45, 44, 2, 0} }; static const symbol s_5_0[1] = { 0xC0 }; static const symbol s_5_1[2] = { 0xC9, 0xC0 }; static const symbol s_5_2[2] = { 0xD8, 0xC0 }; static const symbol s_5_3[1] = { 0xC1 }; static const symbol s_5_4[1] = { 0xC5 }; static const symbol s_5_5[2] = { 0xC9, 0xC5 }; static const symbol s_5_6[2] = { 0xD8, 0xC5 }; static const symbol s_5_7[2] = { 0xC1, 0xC8 }; static const symbol s_5_8[2] = { 0xD1, 0xC8 }; static const symbol s_5_9[3] = { 0xC9, 0xD1, 0xC8 }; static const symbol s_5_10[1] = { 0xC9 }; static const symbol s_5_11[2] = { 0xC5, 0xC9 }; static const symbol s_5_12[2] = { 0xC9, 0xC9 }; static const symbol s_5_13[3] = { 0xC1, 0xCD, 0xC9 }; static const symbol s_5_14[3] = { 0xD1, 0xCD, 0xC9 }; static const symbol s_5_15[4] = { 0xC9, 0xD1, 0xCD, 0xC9 }; static const symbol s_5_16[1] = { 0xCA }; static const symbol s_5_17[2] = { 0xC5, 0xCA }; static const symbol s_5_18[3] = { 0xC9, 0xC5, 0xCA }; static const symbol s_5_19[2] = { 0xC9, 0xCA }; static const symbol s_5_20[2] = { 0xCF, 0xCA }; static const symbol s_5_21[2] = { 0xC1, 0xCD }; static const symbol s_5_22[2] = { 0xC5, 0xCD }; static const symbol s_5_23[3] = { 0xC9, 0xC5, 0xCD }; static const symbol s_5_24[2] = { 0xCF, 0xCD }; static const symbol s_5_25[2] = { 0xD1, 0xCD }; static const symbol s_5_26[3] = { 0xC9, 0xD1, 0xCD }; static const symbol s_5_27[1] = { 0xCF }; static const symbol s_5_28[1] = { 0xD1 }; static const symbol s_5_29[2] = { 0xC9, 0xD1 }; static const symbol s_5_30[2] = { 0xD8, 0xD1 }; static const symbol s_5_31[1] = { 0xD5 }; static const symbol s_5_32[2] = { 0xC5, 0xD7 }; static const symbol s_5_33[2] = { 0xCF, 0xD7 }; static const symbol s_5_34[1] = { 0xD8 }; static const symbol s_5_35[1] = { 0xD9 }; static const struct among a_5[36] = { /* 0 */ { 1, s_5_0, -1, 1, 0}, /* 1 */ { 2, s_5_1, 0, 1, 0}, /* 2 */ { 2, s_5_2, 0, 1, 0}, /* 3 */ { 1, s_5_3, -1, 1, 0}, /* 4 */ { 1, s_5_4, -1, 1, 0}, /* 5 */ { 2, s_5_5, 4, 1, 0}, /* 6 */ { 2, s_5_6, 4, 1, 0}, /* 7 */ { 2, s_5_7, -1, 1, 0}, /* 8 */ { 2, s_5_8, -1, 1, 0}, /* 9 */ { 3, s_5_9, 8, 1, 0}, /* 10 */ { 1, s_5_10, -1, 1, 0}, /* 11 */ { 2, s_5_11, 10, 1, 0}, /* 12 */ { 2, s_5_12, 10, 1, 0}, /* 13 */ { 3, s_5_13, 10, 1, 0}, /* 14 */ { 3, s_5_14, 10, 1, 0}, /* 15 */ { 4, s_5_15, 14, 1, 0}, /* 16 */ { 1, s_5_16, -1, 1, 0}, /* 17 */ { 2, s_5_17, 16, 1, 0}, /* 18 */ { 3, s_5_18, 17, 1, 0}, /* 19 */ { 2, s_5_19, 16, 1, 0}, /* 20 */ { 2, s_5_20, 16, 1, 0}, /* 21 */ { 2, s_5_21, -1, 1, 0}, /* 22 */ { 2, s_5_22, -1, 1, 0}, /* 23 */ { 3, s_5_23, 22, 1, 0}, /* 24 */ { 2, s_5_24, -1, 1, 0}, /* 25 */ { 2, s_5_25, -1, 1, 0}, /* 26 */ { 3, s_5_26, 25, 1, 0}, /* 27 */ { 1, s_5_27, -1, 1, 0}, /* 28 */ { 1, s_5_28, -1, 1, 0}, /* 29 */ { 2, s_5_29, 28, 1, 0}, /* 30 */ { 2, s_5_30, 28, 1, 0}, /* 31 */ { 1, s_5_31, -1, 1, 0}, /* 32 */ { 2, s_5_32, -1, 1, 0}, /* 33 */ { 2, s_5_33, -1, 1, 0}, /* 34 */ { 1, s_5_34, -1, 1, 0}, /* 35 */ { 1, s_5_35, -1, 1, 0} }; static const symbol s_6_0[3] = { 0xCF, 0xD3, 0xD4 }; static const symbol s_6_1[4] = { 0xCF, 0xD3, 0xD4, 0xD8 }; static const struct among a_6[2] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 4, s_6_1, -1, 1, 0} }; static const symbol s_7_0[4] = { 0xC5, 0xCA, 0xDB, 0xC5 }; static const symbol s_7_1[1] = { 0xCE }; static const symbol s_7_2[1] = { 0xD8 }; static const symbol s_7_3[3] = { 0xC5, 0xCA, 0xDB }; static const struct among a_7[4] = { /* 0 */ { 4, s_7_0, -1, 1, 0}, /* 1 */ { 1, s_7_1, -1, 2, 0}, /* 2 */ { 1, s_7_2, -1, 3, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0} }; static const unsigned char g_v[] = { 35, 130, 34, 18 }; static const symbol s_0[] = { 0xC1 }; static const symbol s_1[] = { 0xD1 }; static const symbol s_2[] = { 0xC1 }; static const symbol s_3[] = { 0xD1 }; static const symbol s_4[] = { 0xC1 }; static const symbol s_5[] = { 0xD1 }; static const symbol s_6[] = { 0xCE }; static const symbol s_7[] = { 0xCE }; static const symbol s_8[] = { 0xCE }; static const symbol s_9[] = { 0xC9 }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c1 = z->c; /* do, line 63 */ { /* gopast */ /* grouping v, line 64 */ int ret = out_grouping(z, g_v, 192, 220, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[0] = z->c; /* setmark pV, line 64 */ { /* gopast */ /* non v, line 64 */ int ret = in_grouping(z, g_v, 192, 220, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* grouping v, line 65 */ int ret = out_grouping(z, g_v, 192, 220, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 65 */ int ret = in_grouping(z, g_v, 192, 220, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 65 */ lab0: z->c = c1; } return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_perfective_gerund(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 74 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((25166336 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_0, 9); /* substring, line 74 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 74 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 78 */ if (!(eq_s_b(z, 1, s_0))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_1))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 85 */ if (ret < 0) return ret; } break; } return 1; } static int r_adjective(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 90 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((2271009 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_1, 26); /* substring, line 90 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 90 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; } return 1; } static int r_adjectival(struct SN_env * z) { int among_var; { int ret = r_adjective(z); if (ret == 0) return 0; /* call adjective, line 104 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ z->ket = z->c; /* [, line 112 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((671113216 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_2, 8); /* substring, line 112 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 112 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 117 */ if (!(eq_s_b(z, 1, s_2))) goto lab2; goto lab1; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m_keep; goto lab0; } } lab1: { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 124 */ if (ret < 0) return ret; } break; } lab0: ; } return 1; } static int r_reflexive(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 131 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 209 && z->p[z->c - 1] != 216)) return 0; among_var = find_among_b(z, a_3, 2); /* substring, line 131 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 131 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 139 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((51443235 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 46); /* substring, line 139 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 139 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ if (!(eq_s_b(z, 1, s_4))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_5))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 153 */ if (ret < 0) return ret; } break; } return 1; } static int r_noun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 162 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((60991267 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 36); /* substring, line 162 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 162 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 169 */ if (ret < 0) return ret; } break; } return 1; } static int r_derivational(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 178 */ if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 212 && z->p[z->c - 1] != 216)) return 0; among_var = find_among_b(z, a_6, 2); /* substring, line 178 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 178 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 178 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 181 */ if (ret < 0) return ret; } break; } return 1; } static int r_tidy_up(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 186 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((151011360 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_7, 4); /* substring, line 186 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 186 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 190 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 191 */ if (!(eq_s_b(z, 1, s_6))) return 0; z->bra = z->c; /* ], line 191 */ if (!(eq_s_b(z, 1, s_7))) return 0; { int ret = slice_del(z); /* delete, line 191 */ if (ret < 0) return ret; } break; case 2: if (!(eq_s_b(z, 1, s_8))) return 0; { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 196 */ if (ret < 0) return ret; } break; } return 1; } extern int russian_KOI8_R_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 203 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 203 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 204 */ { int mlimit; /* setlimit, line 204 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 204 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; { int m3 = z->l - z->c; (void)m3; /* do, line 205 */ { int m4 = z->l - z->c; (void)m4; /* or, line 206 */ { int ret = r_perfective_gerund(z); if (ret == 0) goto lab3; /* call perfective_gerund, line 206 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = z->l - m4; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 207 */ { int ret = r_reflexive(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call reflexive, line 207 */ if (ret < 0) return ret; } lab4: ; } { int m5 = z->l - z->c; (void)m5; /* or, line 208 */ { int ret = r_adjectival(z); if (ret == 0) goto lab6; /* call adjectival, line 208 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m5; { int ret = r_verb(z); if (ret == 0) goto lab7; /* call verb, line 208 */ if (ret < 0) return ret; } goto lab5; lab7: z->c = z->l - m5; { int ret = r_noun(z); if (ret == 0) goto lab1; /* call noun, line 208 */ if (ret < 0) return ret; } } lab5: ; } lab2: lab1: z->c = z->l - m3; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 211 */ z->ket = z->c; /* [, line 211 */ if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m_keep; goto lab8; } z->bra = z->c; /* ], line 211 */ { int ret = slice_del(z); /* delete, line 211 */ if (ret < 0) return ret; } lab8: ; } { int m6 = z->l - z->c; (void)m6; /* do, line 214 */ { int ret = r_derivational(z); if (ret == 0) goto lab9; /* call derivational, line 214 */ if (ret < 0) return ret; } lab9: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 215 */ { int ret = r_tidy_up(z); if (ret == 0) goto lab10; /* call tidy_up, line 215 */ if (ret < 0) return ret; } lab10: z->c = z->l - m7; } z->lb = mlimit; } z->c = z->lb; return 1; } extern struct SN_env * russian_KOI8_R_create_env(void) { return SN_create_env(0, 2, 0); } extern void russian_KOI8_R_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_KOI8_R_russian.h000066400000000000000000000004741217574114600310660ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * russian_KOI8_R_create_env(void); extern void russian_KOI8_R_close_env(struct SN_env * z); extern int russian_KOI8_R_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_danish.c000066400000000000000000000265061217574114600305410ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int danish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_undouble(struct SN_env * z); static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * danish_UTF_8_create_env(void); extern void danish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 'h', 'e', 'd' }; static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; static const symbol s_0_3[1] = { 'e' }; static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; static const symbol s_0_7[3] = { 'e', 'n', 'e' }; static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; static const symbol s_0_9[3] = { 'e', 'r', 'e' }; static const symbol s_0_10[2] = { 'e', 'n' }; static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; static const symbol s_0_13[2] = { 'e', 'r' }; static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; static const symbol s_0_16[1] = { 's' }; static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; static const symbol s_0_18[2] = { 'e', 's' }; static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; static const symbol s_0_24[3] = { 'e', 'n', 's' }; static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; static const symbol s_0_27[3] = { 'e', 'r', 's' }; static const symbol s_0_28[3] = { 'e', 't', 's' }; static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; static const symbol s_0_30[2] = { 'e', 't' }; static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; static const struct among a_0[32] = { /* 0 */ { 3, s_0_0, -1, 1, 0}, /* 1 */ { 5, s_0_1, 0, 1, 0}, /* 2 */ { 4, s_0_2, -1, 1, 0}, /* 3 */ { 1, s_0_3, -1, 1, 0}, /* 4 */ { 5, s_0_4, 3, 1, 0}, /* 5 */ { 4, s_0_5, 3, 1, 0}, /* 6 */ { 6, s_0_6, 5, 1, 0}, /* 7 */ { 3, s_0_7, 3, 1, 0}, /* 8 */ { 4, s_0_8, 3, 1, 0}, /* 9 */ { 3, s_0_9, 3, 1, 0}, /* 10 */ { 2, s_0_10, -1, 1, 0}, /* 11 */ { 5, s_0_11, 10, 1, 0}, /* 12 */ { 4, s_0_12, 10, 1, 0}, /* 13 */ { 2, s_0_13, -1, 1, 0}, /* 14 */ { 5, s_0_14, 13, 1, 0}, /* 15 */ { 4, s_0_15, 13, 1, 0}, /* 16 */ { 1, s_0_16, -1, 2, 0}, /* 17 */ { 4, s_0_17, 16, 1, 0}, /* 18 */ { 2, s_0_18, 16, 1, 0}, /* 19 */ { 5, s_0_19, 18, 1, 0}, /* 20 */ { 7, s_0_20, 19, 1, 0}, /* 21 */ { 4, s_0_21, 18, 1, 0}, /* 22 */ { 5, s_0_22, 18, 1, 0}, /* 23 */ { 4, s_0_23, 18, 1, 0}, /* 24 */ { 3, s_0_24, 16, 1, 0}, /* 25 */ { 6, s_0_25, 24, 1, 0}, /* 26 */ { 5, s_0_26, 24, 1, 0}, /* 27 */ { 3, s_0_27, 16, 1, 0}, /* 28 */ { 3, s_0_28, 16, 1, 0}, /* 29 */ { 5, s_0_29, 28, 1, 0}, /* 30 */ { 2, s_0_30, -1, 1, 0}, /* 31 */ { 4, s_0_31, 30, 1, 0} }; static const symbol s_1_0[2] = { 'g', 'd' }; static const symbol s_1_1[2] = { 'd', 't' }; static const symbol s_1_2[2] = { 'g', 't' }; static const symbol s_1_3[2] = { 'k', 't' }; static const struct among a_1[4] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0} }; static const symbol s_2_0[2] = { 'i', 'g' }; static const symbol s_2_1[3] = { 'l', 'i', 'g' }; static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; static const symbol s_2_3[3] = { 'e', 'l', 's' }; static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' }; static const struct among a_2[5] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 4, s_2_2, 1, 1, 0}, /* 3 */ { 3, s_2_3, -1, 1, 0}, /* 4 */ { 5, s_2_4, -1, 2, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; static const symbol s_0[] = { 's', 't' }; static const symbol s_1[] = { 'i', 'g' }; static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 33 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) return 0; z->c = ret; /* hop, line 33 */ } z->I[1] = z->c; /* setmark x, line 33 */ z->c = c_test; } if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ { /* gopast */ /* non v, line 34 */ int ret = in_grouping_U(z, g_v, 97, 248, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 34 */ /* try, line 35 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 41 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 41 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 41 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 41 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 48 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; { int ret = slice_del(z); /* delete, line 50 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 55 */ { int mlimit; /* setlimit, line 56 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 56 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 56 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ z->bra = z->c; /* ], line 56 */ z->lb = mlimit; } z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 62 */ } z->bra = z->c; /* ], line 62 */ { int ret = slice_del(z); /* delete, line 62 */ if (ret < 0) return ret; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ z->ket = z->c; /* [, line 66 */ if (!(eq_s_b(z, 2, s_0))) goto lab0; z->bra = z->c; /* ], line 66 */ if (!(eq_s_b(z, 2, s_1))) goto lab0; { int ret = slice_del(z); /* delete, line 66 */ if (ret < 0) return ret; } lab0: z->c = z->l - m1; } { int mlimit; /* setlimit, line 67 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 67 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; z->ket = z->c; /* [, line 67 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 67 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 70 */ if (ret < 0) return ret; } { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ if (ret < 0) return ret; } lab1: z->c = z->l - m3; } break; case 2: { int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */ if (ret < 0) return ret; } break; } return 1; } static int r_undouble(struct SN_env * z) { { int mlimit; /* setlimit, line 76 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 76 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 76 */ if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 76 */ z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ if (z->S[0] == 0) return -1; /* -> ch, line 76 */ z->lb = mlimit; } if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } return 1; } extern int danish_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 84 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 84 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 85 */ { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 86 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 88 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ { int ret = r_undouble(z); if (ret == 0) goto lab4; /* call undouble, line 89 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } z->c = z->lb; return 1; } extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); } extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_danish.h000066400000000000000000000004661217574114600305430ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * danish_UTF_8_create_env(void); extern void danish_UTF_8_close_env(struct SN_env * z); extern int danish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_dutch.c000066400000000000000000000514251217574114600304000ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int dutch_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_standard_suffix(struct SN_env * z); static int r_undouble(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_en_ending(struct SN_env * z); static int r_e_ending(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * dutch_UTF_8_create_env(void); extern void dutch_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 0xC3, 0xA1 }; static const symbol s_0_2[2] = { 0xC3, 0xA4 }; static const symbol s_0_3[2] = { 0xC3, 0xA9 }; static const symbol s_0_4[2] = { 0xC3, 0xAB }; static const symbol s_0_5[2] = { 0xC3, 0xAD }; static const symbol s_0_6[2] = { 0xC3, 0xAF }; static const symbol s_0_7[2] = { 0xC3, 0xB3 }; static const symbol s_0_8[2] = { 0xC3, 0xB6 }; static const symbol s_0_9[2] = { 0xC3, 0xBA }; static const symbol s_0_10[2] = { 0xC3, 0xBC }; static const struct among a_0[11] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 2, s_0_1, 0, 1, 0}, /* 2 */ { 2, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, 2, 0}, /* 4 */ { 2, s_0_4, 0, 2, 0}, /* 5 */ { 2, s_0_5, 0, 3, 0}, /* 6 */ { 2, s_0_6, 0, 3, 0}, /* 7 */ { 2, s_0_7, 0, 4, 0}, /* 8 */ { 2, s_0_8, 0, 4, 0}, /* 9 */ { 2, s_0_9, 0, 5, 0}, /* 10 */ { 2, s_0_10, 0, 5, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'Y' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_1_1, 0, 2, 0}, /* 2 */ { 1, s_1_2, 0, 1, 0} }; static const symbol s_2_0[2] = { 'd', 'd' }; static const symbol s_2_1[2] = { 'k', 'k' }; static const symbol s_2_2[2] = { 't', 't' }; static const struct among a_2[3] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0} }; static const symbol s_3_0[3] = { 'e', 'n', 'e' }; static const symbol s_3_1[2] = { 's', 'e' }; static const symbol s_3_2[2] = { 'e', 'n' }; static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' }; static const symbol s_3_4[1] = { 's' }; static const struct among a_3[5] = { /* 0 */ { 3, s_3_0, -1, 2, 0}, /* 1 */ { 2, s_3_1, -1, 3, 0}, /* 2 */ { 2, s_3_2, -1, 2, 0}, /* 3 */ { 5, s_3_3, 2, 1, 0}, /* 4 */ { 1, s_3_4, -1, 3, 0} }; static const symbol s_4_0[3] = { 'e', 'n', 'd' }; static const symbol s_4_1[2] = { 'i', 'g' }; static const symbol s_4_2[3] = { 'i', 'n', 'g' }; static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' }; static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' }; static const symbol s_4_5[3] = { 'b', 'a', 'r' }; static const struct among a_4[6] = { /* 0 */ { 3, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 2, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 3, 0}, /* 4 */ { 4, s_4_4, -1, 4, 0}, /* 5 */ { 3, s_4_5, -1, 5, 0} }; static const symbol s_5_0[2] = { 'a', 'a' }; static const symbol s_5_1[2] = { 'e', 'e' }; static const symbol s_5_2[2] = { 'o', 'o' }; static const symbol s_5_3[2] = { 'u', 'u' }; static const struct among a_5[4] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0}, /* 2 */ { 2, s_5_2, -1, -1, 0}, /* 3 */ { 2, s_5_3, -1, -1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'o' }; static const symbol s_4[] = { 'u' }; static const symbol s_5[] = { 'y' }; static const symbol s_6[] = { 'Y' }; static const symbol s_7[] = { 'i' }; static const symbol s_8[] = { 'I' }; static const symbol s_9[] = { 'y' }; static const symbol s_10[] = { 'Y' }; static const symbol s_11[] = { 'y' }; static const symbol s_12[] = { 'i' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'g', 'e', 'm' }; static const symbol s_15[] = { 'h', 'e', 'i', 'd' }; static const symbol s_16[] = { 'h', 'e', 'i', 'd' }; static const symbol s_17[] = { 'c' }; static const symbol s_18[] = { 'e', 'n' }; static const symbol s_19[] = { 'i', 'g' }; static const symbol s_20[] = { 'e' }; static const symbol s_21[] = { 'e' }; static int r_prelude(struct SN_env * z) { int among_var; { int c_test = z->c; /* test, line 42 */ while(1) { /* repeat, line 42 */ int c1 = z->c; z->bra = z->c; /* [, line 43 */ if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((340306450 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else among_var = find_among(z, a_0, 11); /* substring, line 43 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 43 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */ if (ret < 0) return ret; } break; case 6: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 54 */ } break; } continue; lab0: z->c = c1; break; } z->c = c_test; } { int c_keep = z->c; /* try, line 57 */ z->bra = z->c; /* [, line 57 */ if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; } z->ket = z->c; /* ], line 57 */ { int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */ if (ret < 0) return ret; } lab1: ; } while(1) { /* repeat, line 58 */ int c2 = z->c; while(1) { /* goto, line 58 */ int c3 = z->c; if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab3; z->bra = z->c; /* [, line 59 */ { int c4 = z->c; /* or, line 59 */ if (!(eq_s(z, 1, s_7))) goto lab5; z->ket = z->c; /* ], line 59 */ if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab5; { int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = c4; if (!(eq_s(z, 1, s_9))) goto lab3; z->ket = z->c; /* ], line 60 */ { int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */ if (ret < 0) return ret; } } lab4: z->c = c3; break; lab3: z->c = c3; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab2; z->c = ret; /* goto, line 58 */ } } continue; lab2: z->c = c2; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { /* gopast */ /* grouping v, line 69 */ int ret = out_grouping_U(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 69 */ int ret = in_grouping_U(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 69 */ /* try, line 70 */ if (!(z->I[0] < 3)) goto lab0; z->I[0] = 3; lab0: { /* gopast */ /* grouping v, line 71 */ int ret = out_grouping_U(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 71 */ int ret = in_grouping_U(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 71 */ return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 75 */ int c1 = z->c; z->bra = z->c; /* [, line 77 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 77 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 77 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 80 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_undouble(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 91 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */ z->c = z->l - m_test; } z->ket = z->c; /* [, line 91 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 91 */ } z->bra = z->c; /* ], line 91 */ { int ret = slice_del(z); /* delete, line 91 */ if (ret < 0) return ret; } return 1; } static int r_e_ending(struct SN_env * z) { z->B[0] = 0; /* unset e_found, line 95 */ z->ket = z->c; /* [, line 96 */ if (!(eq_s_b(z, 1, s_13))) return 0; z->bra = z->c; /* ], line 96 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 96 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 96 */ if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0; z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } z->B[0] = 1; /* set e_found, line 97 */ { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 98 */ if (ret < 0) return ret; } return 1; } static int r_en_ending(struct SN_env * z) { { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 102 */ if (ret < 0) return ret; } { int m1 = z->l - z->c; (void)m1; /* and, line 102 */ if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ if (!(eq_s_b(z, 3, s_14))) goto lab0; return 0; lab0: z->c = z->l - m2; } } { int ret = slice_del(z); /* delete, line 102 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 103 */ if (ret < 0) return ret; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 107 */ z->ket = z->c; /* [, line 108 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; among_var = find_among_b(z, a_3, 5); /* substring, line 108 */ if (!(among_var)) goto lab0; z->bra = z->c; /* ], line 108 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 110 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */ if (ret < 0) return ret; } break; case 2: { int ret = r_en_ending(z); if (ret == 0) goto lab0; /* call en_ending, line 113 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 116 */ if (ret < 0) return ret; } if (out_grouping_b_U(z, g_v_j, 97, 232, 0)) goto lab0; { int ret = slice_del(z); /* delete, line 116 */ if (ret < 0) return ret; } break; } lab0: z->c = z->l - m1; } { int m2 = z->l - z->c; (void)m2; /* do, line 120 */ { int ret = r_e_ending(z); if (ret == 0) goto lab1; /* call e_ending, line 120 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 122 */ z->ket = z->c; /* [, line 122 */ if (!(eq_s_b(z, 4, s_16))) goto lab2; z->bra = z->c; /* ], line 122 */ { int ret = r_R2(z); if (ret == 0) goto lab2; /* call R2, line 122 */ if (ret < 0) return ret; } { int m4 = z->l - z->c; (void)m4; /* not, line 122 */ if (!(eq_s_b(z, 1, s_17))) goto lab3; goto lab2; lab3: z->c = z->l - m4; } { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 123 */ if (!(eq_s_b(z, 2, s_18))) goto lab2; z->bra = z->c; /* ], line 123 */ { int ret = r_en_ending(z); if (ret == 0) goto lab2; /* call en_ending, line 123 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m5 = z->l - z->c; (void)m5; /* do, line 126 */ z->ket = z->c; /* [, line 127 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4; among_var = find_among_b(z, a_4, 6); /* substring, line 127 */ if (!(among_var)) goto lab4; z->bra = z->c; /* ], line 127 */ switch(among_var) { case 0: goto lab4; case 1: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 129 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 129 */ if (ret < 0) return ret; } { int m6 = z->l - z->c; (void)m6; /* or, line 130 */ z->ket = z->c; /* [, line 130 */ if (!(eq_s_b(z, 2, s_19))) goto lab6; z->bra = z->c; /* ], line 130 */ { int ret = r_R2(z); if (ret == 0) goto lab6; /* call R2, line 130 */ if (ret < 0) return ret; } { int m7 = z->l - z->c; (void)m7; /* not, line 130 */ if (!(eq_s_b(z, 1, s_20))) goto lab7; goto lab6; lab7: z->c = z->l - m7; } { int ret = slice_del(z); /* delete, line 130 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_undouble(z); if (ret == 0) goto lab4; /* call undouble, line 130 */ if (ret < 0) return ret; } } lab5: break; case 2: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 133 */ if (ret < 0) return ret; } { int m8 = z->l - z->c; (void)m8; /* not, line 133 */ if (!(eq_s_b(z, 1, s_21))) goto lab8; goto lab4; lab8: z->c = z->l - m8; } { int ret = slice_del(z); /* delete, line 133 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 136 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 136 */ if (ret < 0) return ret; } { int ret = r_e_ending(z); if (ret == 0) goto lab4; /* call e_ending, line 136 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 139 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 139 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 142 */ if (ret < 0) return ret; } if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */ { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } break; } lab4: z->c = z->l - m5; } { int m9 = z->l - z->c; (void)m9; /* do, line 146 */ if (out_grouping_b_U(z, g_v_I, 73, 232, 0)) goto lab9; { int m_test = z->l - z->c; /* test, line 148 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9; if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */ if (out_grouping_b_U(z, g_v, 97, 232, 0)) goto lab9; z->c = z->l - m_test; } z->ket = z->c; /* [, line 152 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab9; z->c = ret; /* next, line 152 */ } z->bra = z->c; /* ], line 152 */ { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } lab9: z->c = z->l - m9; } return 1; } extern int dutch_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 159 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 159 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 160 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 160 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 161 */ { int m3 = z->l - z->c; (void)m3; /* do, line 162 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab2; /* call standard_suffix, line 162 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; { int c4 = z->c; /* do, line 163 */ { int ret = r_postlude(z); if (ret == 0) goto lab3; /* call postlude, line 163 */ if (ret < 0) return ret; } lab3: z->c = c4; } return 1; } extern struct SN_env * dutch_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); } extern void dutch_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_dutch.h000066400000000000000000000004631217574114600304010ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * dutch_UTF_8_create_env(void); extern void dutch_UTF_8_close_env(struct SN_env * z); extern int dutch_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_english.c000066400000000000000000001142601217574114600307170ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int english_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_exception2(struct SN_env * z); static int r_exception1(struct SN_env * z); static int r_Step_5(struct SN_env * z); static int r_Step_4(struct SN_env * z); static int r_Step_3(struct SN_env * z); static int r_Step_2(struct SN_env * z); static int r_Step_1c(struct SN_env * z); static int r_Step_1b(struct SN_env * z); static int r_Step_1a(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_shortv(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * english_UTF_8_create_env(void); extern void english_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[5] = { 'a', 'r', 's', 'e', 'n' }; static const symbol s_0_1[6] = { 'c', 'o', 'm', 'm', 'u', 'n' }; static const symbol s_0_2[5] = { 'g', 'e', 'n', 'e', 'r' }; static const struct among a_0[3] = { /* 0 */ { 5, s_0_0, -1, -1, 0}, /* 1 */ { 6, s_0_1, -1, -1, 0}, /* 2 */ { 5, s_0_2, -1, -1, 0} }; static const symbol s_1_0[1] = { '\'' }; static const symbol s_1_1[3] = { '\'', 's', '\'' }; static const symbol s_1_2[2] = { '\'', 's' }; static const struct among a_1[3] = { /* 0 */ { 1, s_1_0, -1, 1, 0}, /* 1 */ { 3, s_1_1, 0, 1, 0}, /* 2 */ { 2, s_1_2, -1, 1, 0} }; static const symbol s_2_0[3] = { 'i', 'e', 'd' }; static const symbol s_2_1[1] = { 's' }; static const symbol s_2_2[3] = { 'i', 'e', 's' }; static const symbol s_2_3[4] = { 's', 's', 'e', 's' }; static const symbol s_2_4[2] = { 's', 's' }; static const symbol s_2_5[2] = { 'u', 's' }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, 2, 0}, /* 1 */ { 1, s_2_1, -1, 3, 0}, /* 2 */ { 3, s_2_2, 1, 2, 0}, /* 3 */ { 4, s_2_3, 1, 1, 0}, /* 4 */ { 2, s_2_4, 1, -1, 0}, /* 5 */ { 2, s_2_5, 1, -1, 0} }; static const symbol s_3_1[2] = { 'b', 'b' }; static const symbol s_3_2[2] = { 'd', 'd' }; static const symbol s_3_3[2] = { 'f', 'f' }; static const symbol s_3_4[2] = { 'g', 'g' }; static const symbol s_3_5[2] = { 'b', 'l' }; static const symbol s_3_6[2] = { 'm', 'm' }; static const symbol s_3_7[2] = { 'n', 'n' }; static const symbol s_3_8[2] = { 'p', 'p' }; static const symbol s_3_9[2] = { 'r', 'r' }; static const symbol s_3_10[2] = { 'a', 't' }; static const symbol s_3_11[2] = { 't', 't' }; static const symbol s_3_12[2] = { 'i', 'z' }; static const struct among a_3[13] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_3_1, 0, 2, 0}, /* 2 */ { 2, s_3_2, 0, 2, 0}, /* 3 */ { 2, s_3_3, 0, 2, 0}, /* 4 */ { 2, s_3_4, 0, 2, 0}, /* 5 */ { 2, s_3_5, 0, 1, 0}, /* 6 */ { 2, s_3_6, 0, 2, 0}, /* 7 */ { 2, s_3_7, 0, 2, 0}, /* 8 */ { 2, s_3_8, 0, 2, 0}, /* 9 */ { 2, s_3_9, 0, 2, 0}, /* 10 */ { 2, s_3_10, 0, 1, 0}, /* 11 */ { 2, s_3_11, 0, 2, 0}, /* 12 */ { 2, s_3_12, 0, 1, 0} }; static const symbol s_4_0[2] = { 'e', 'd' }; static const symbol s_4_1[3] = { 'e', 'e', 'd' }; static const symbol s_4_2[3] = { 'i', 'n', 'g' }; static const symbol s_4_3[4] = { 'e', 'd', 'l', 'y' }; static const symbol s_4_4[5] = { 'e', 'e', 'd', 'l', 'y' }; static const symbol s_4_5[5] = { 'i', 'n', 'g', 'l', 'y' }; static const struct among a_4[6] = { /* 0 */ { 2, s_4_0, -1, 2, 0}, /* 1 */ { 3, s_4_1, 0, 1, 0}, /* 2 */ { 3, s_4_2, -1, 2, 0}, /* 3 */ { 4, s_4_3, -1, 2, 0}, /* 4 */ { 5, s_4_4, 3, 1, 0}, /* 5 */ { 5, s_4_5, -1, 2, 0} }; static const symbol s_5_0[4] = { 'a', 'n', 'c', 'i' }; static const symbol s_5_1[4] = { 'e', 'n', 'c', 'i' }; static const symbol s_5_2[3] = { 'o', 'g', 'i' }; static const symbol s_5_3[2] = { 'l', 'i' }; static const symbol s_5_4[3] = { 'b', 'l', 'i' }; static const symbol s_5_5[4] = { 'a', 'b', 'l', 'i' }; static const symbol s_5_6[4] = { 'a', 'l', 'l', 'i' }; static const symbol s_5_7[5] = { 'f', 'u', 'l', 'l', 'i' }; static const symbol s_5_8[6] = { 'l', 'e', 's', 's', 'l', 'i' }; static const symbol s_5_9[5] = { 'o', 'u', 's', 'l', 'i' }; static const symbol s_5_10[5] = { 'e', 'n', 't', 'l', 'i' }; static const symbol s_5_11[5] = { 'a', 'l', 'i', 't', 'i' }; static const symbol s_5_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; static const symbol s_5_13[5] = { 'i', 'v', 'i', 't', 'i' }; static const symbol s_5_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_5_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_5_16[5] = { 'a', 'l', 'i', 's', 'm' }; static const symbol s_5_17[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_5_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; static const symbol s_5_19[4] = { 'i', 'z', 'e', 'r' }; static const symbol s_5_20[4] = { 'a', 't', 'o', 'r' }; static const symbol s_5_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; static const symbol s_5_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; static const symbol s_5_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; static const struct among a_5[24] = { /* 0 */ { 4, s_5_0, -1, 3, 0}, /* 1 */ { 4, s_5_1, -1, 2, 0}, /* 2 */ { 3, s_5_2, -1, 13, 0}, /* 3 */ { 2, s_5_3, -1, 16, 0}, /* 4 */ { 3, s_5_4, 3, 12, 0}, /* 5 */ { 4, s_5_5, 4, 4, 0}, /* 6 */ { 4, s_5_6, 3, 8, 0}, /* 7 */ { 5, s_5_7, 3, 14, 0}, /* 8 */ { 6, s_5_8, 3, 15, 0}, /* 9 */ { 5, s_5_9, 3, 10, 0}, /* 10 */ { 5, s_5_10, 3, 5, 0}, /* 11 */ { 5, s_5_11, -1, 8, 0}, /* 12 */ { 6, s_5_12, -1, 12, 0}, /* 13 */ { 5, s_5_13, -1, 11, 0}, /* 14 */ { 6, s_5_14, -1, 1, 0}, /* 15 */ { 7, s_5_15, 14, 7, 0}, /* 16 */ { 5, s_5_16, -1, 8, 0}, /* 17 */ { 5, s_5_17, -1, 7, 0}, /* 18 */ { 7, s_5_18, 17, 6, 0}, /* 19 */ { 4, s_5_19, -1, 6, 0}, /* 20 */ { 4, s_5_20, -1, 7, 0}, /* 21 */ { 7, s_5_21, -1, 11, 0}, /* 22 */ { 7, s_5_22, -1, 9, 0}, /* 23 */ { 7, s_5_23, -1, 10, 0} }; static const symbol s_6_0[5] = { 'i', 'c', 'a', 't', 'e' }; static const symbol s_6_1[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_6_2[5] = { 'a', 'l', 'i', 'z', 'e' }; static const symbol s_6_3[5] = { 'i', 'c', 'i', 't', 'i' }; static const symbol s_6_4[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_6_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_6_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_6_7[3] = { 'f', 'u', 'l' }; static const symbol s_6_8[4] = { 'n', 'e', 's', 's' }; static const struct among a_6[9] = { /* 0 */ { 5, s_6_0, -1, 4, 0}, /* 1 */ { 5, s_6_1, -1, 6, 0}, /* 2 */ { 5, s_6_2, -1, 3, 0}, /* 3 */ { 5, s_6_3, -1, 4, 0}, /* 4 */ { 4, s_6_4, -1, 4, 0}, /* 5 */ { 6, s_6_5, -1, 1, 0}, /* 6 */ { 7, s_6_6, 5, 2, 0}, /* 7 */ { 3, s_6_7, -1, 5, 0}, /* 8 */ { 4, s_6_8, -1, 5, 0} }; static const symbol s_7_0[2] = { 'i', 'c' }; static const symbol s_7_1[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_7_2[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_7_3[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_7_4[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_7_5[3] = { 'a', 't', 'e' }; static const symbol s_7_6[3] = { 'i', 'v', 'e' }; static const symbol s_7_7[3] = { 'i', 'z', 'e' }; static const symbol s_7_8[3] = { 'i', 't', 'i' }; static const symbol s_7_9[2] = { 'a', 'l' }; static const symbol s_7_10[3] = { 'i', 's', 'm' }; static const symbol s_7_11[3] = { 'i', 'o', 'n' }; static const symbol s_7_12[2] = { 'e', 'r' }; static const symbol s_7_13[3] = { 'o', 'u', 's' }; static const symbol s_7_14[3] = { 'a', 'n', 't' }; static const symbol s_7_15[3] = { 'e', 'n', 't' }; static const symbol s_7_16[4] = { 'm', 'e', 'n', 't' }; static const symbol s_7_17[5] = { 'e', 'm', 'e', 'n', 't' }; static const struct among a_7[18] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 4, s_7_1, -1, 1, 0}, /* 2 */ { 4, s_7_2, -1, 1, 0}, /* 3 */ { 4, s_7_3, -1, 1, 0}, /* 4 */ { 4, s_7_4, -1, 1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 3, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 3, s_7_8, -1, 1, 0}, /* 9 */ { 2, s_7_9, -1, 1, 0}, /* 10 */ { 3, s_7_10, -1, 1, 0}, /* 11 */ { 3, s_7_11, -1, 2, 0}, /* 12 */ { 2, s_7_12, -1, 1, 0}, /* 13 */ { 3, s_7_13, -1, 1, 0}, /* 14 */ { 3, s_7_14, -1, 1, 0}, /* 15 */ { 3, s_7_15, -1, 1, 0}, /* 16 */ { 4, s_7_16, 15, 1, 0}, /* 17 */ { 5, s_7_17, 16, 1, 0} }; static const symbol s_8_0[1] = { 'e' }; static const symbol s_8_1[1] = { 'l' }; static const struct among a_8[2] = { /* 0 */ { 1, s_8_0, -1, 1, 0}, /* 1 */ { 1, s_8_1, -1, 2, 0} }; static const symbol s_9_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' }; static const symbol s_9_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' }; static const symbol s_9_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' }; static const symbol s_9_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' }; static const symbol s_9_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' }; static const symbol s_9_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' }; static const symbol s_9_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' }; static const symbol s_9_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' }; static const struct among a_9[8] = { /* 0 */ { 7, s_9_0, -1, -1, 0}, /* 1 */ { 7, s_9_1, -1, -1, 0}, /* 2 */ { 6, s_9_2, -1, -1, 0}, /* 3 */ { 7, s_9_3, -1, -1, 0}, /* 4 */ { 6, s_9_4, -1, -1, 0}, /* 5 */ { 7, s_9_5, -1, -1, 0}, /* 6 */ { 7, s_9_6, -1, -1, 0}, /* 7 */ { 6, s_9_7, -1, -1, 0} }; static const symbol s_10_0[5] = { 'a', 'n', 'd', 'e', 's' }; static const symbol s_10_1[5] = { 'a', 't', 'l', 'a', 's' }; static const symbol s_10_2[4] = { 'b', 'i', 'a', 's' }; static const symbol s_10_3[6] = { 'c', 'o', 's', 'm', 'o', 's' }; static const symbol s_10_4[5] = { 'd', 'y', 'i', 'n', 'g' }; static const symbol s_10_5[5] = { 'e', 'a', 'r', 'l', 'y' }; static const symbol s_10_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' }; static const symbol s_10_7[4] = { 'h', 'o', 'w', 'e' }; static const symbol s_10_8[4] = { 'i', 'd', 'l', 'y' }; static const symbol s_10_9[5] = { 'l', 'y', 'i', 'n', 'g' }; static const symbol s_10_10[4] = { 'n', 'e', 'w', 's' }; static const symbol s_10_11[4] = { 'o', 'n', 'l', 'y' }; static const symbol s_10_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' }; static const symbol s_10_13[5] = { 's', 'k', 'i', 'e', 's' }; static const symbol s_10_14[4] = { 's', 'k', 'i', 's' }; static const symbol s_10_15[3] = { 's', 'k', 'y' }; static const symbol s_10_16[5] = { 't', 'y', 'i', 'n', 'g' }; static const symbol s_10_17[4] = { 'u', 'g', 'l', 'y' }; static const struct among a_10[18] = { /* 0 */ { 5, s_10_0, -1, -1, 0}, /* 1 */ { 5, s_10_1, -1, -1, 0}, /* 2 */ { 4, s_10_2, -1, -1, 0}, /* 3 */ { 6, s_10_3, -1, -1, 0}, /* 4 */ { 5, s_10_4, -1, 3, 0}, /* 5 */ { 5, s_10_5, -1, 9, 0}, /* 6 */ { 6, s_10_6, -1, 7, 0}, /* 7 */ { 4, s_10_7, -1, -1, 0}, /* 8 */ { 4, s_10_8, -1, 6, 0}, /* 9 */ { 5, s_10_9, -1, 4, 0}, /* 10 */ { 4, s_10_10, -1, -1, 0}, /* 11 */ { 4, s_10_11, -1, 10, 0}, /* 12 */ { 6, s_10_12, -1, 11, 0}, /* 13 */ { 5, s_10_13, -1, 2, 0}, /* 14 */ { 4, s_10_14, -1, 1, 0}, /* 15 */ { 3, s_10_15, -1, -1, 0}, /* 16 */ { 5, s_10_16, -1, 5, 0}, /* 17 */ { 4, s_10_17, -1, 8, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1 }; static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; static const unsigned char g_valid_LI[] = { 55, 141, 2 }; static const symbol s_0[] = { '\'' }; static const symbol s_1[] = { 'y' }; static const symbol s_2[] = { 'Y' }; static const symbol s_3[] = { 'y' }; static const symbol s_4[] = { 'Y' }; static const symbol s_5[] = { 's', 's' }; static const symbol s_6[] = { 'i' }; static const symbol s_7[] = { 'i', 'e' }; static const symbol s_8[] = { 'e', 'e' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'e' }; static const symbol s_11[] = { 'y' }; static const symbol s_12[] = { 'Y' }; static const symbol s_13[] = { 'i' }; static const symbol s_14[] = { 't', 'i', 'o', 'n' }; static const symbol s_15[] = { 'e', 'n', 'c', 'e' }; static const symbol s_16[] = { 'a', 'n', 'c', 'e' }; static const symbol s_17[] = { 'a', 'b', 'l', 'e' }; static const symbol s_18[] = { 'e', 'n', 't' }; static const symbol s_19[] = { 'i', 'z', 'e' }; static const symbol s_20[] = { 'a', 't', 'e' }; static const symbol s_21[] = { 'a', 'l' }; static const symbol s_22[] = { 'f', 'u', 'l' }; static const symbol s_23[] = { 'o', 'u', 's' }; static const symbol s_24[] = { 'i', 'v', 'e' }; static const symbol s_25[] = { 'b', 'l', 'e' }; static const symbol s_26[] = { 'l' }; static const symbol s_27[] = { 'o', 'g' }; static const symbol s_28[] = { 'f', 'u', 'l' }; static const symbol s_29[] = { 'l', 'e', 's', 's' }; static const symbol s_30[] = { 't', 'i', 'o', 'n' }; static const symbol s_31[] = { 'a', 't', 'e' }; static const symbol s_32[] = { 'a', 'l' }; static const symbol s_33[] = { 'i', 'c' }; static const symbol s_34[] = { 's' }; static const symbol s_35[] = { 't' }; static const symbol s_36[] = { 'l' }; static const symbol s_37[] = { 's', 'k', 'i' }; static const symbol s_38[] = { 's', 'k', 'y' }; static const symbol s_39[] = { 'd', 'i', 'e' }; static const symbol s_40[] = { 'l', 'i', 'e' }; static const symbol s_41[] = { 't', 'i', 'e' }; static const symbol s_42[] = { 'i', 'd', 'l' }; static const symbol s_43[] = { 'g', 'e', 'n', 't', 'l' }; static const symbol s_44[] = { 'u', 'g', 'l', 'i' }; static const symbol s_45[] = { 'e', 'a', 'r', 'l', 'i' }; static const symbol s_46[] = { 'o', 'n', 'l', 'i' }; static const symbol s_47[] = { 's', 'i', 'n', 'g', 'l' }; static const symbol s_48[] = { 'Y' }; static const symbol s_49[] = { 'y' }; static int r_prelude(struct SN_env * z) { z->B[0] = 0; /* unset Y_found, line 26 */ { int c1 = z->c; /* do, line 27 */ z->bra = z->c; /* [, line 27 */ if (!(eq_s(z, 1, s_0))) goto lab0; z->ket = z->c; /* ], line 27 */ { int ret = slice_del(z); /* delete, line 27 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 28 */ z->bra = z->c; /* [, line 28 */ if (!(eq_s(z, 1, s_1))) goto lab1; z->ket = z->c; /* ], line 28 */ { int ret = slice_from_s(z, 1, s_2); /* <-, line 28 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 28 */ lab1: z->c = c2; } { int c3 = z->c; /* do, line 29 */ while(1) { /* repeat, line 29 */ int c4 = z->c; while(1) { /* goto, line 29 */ int c5 = z->c; if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab4; z->bra = z->c; /* [, line 29 */ if (!(eq_s(z, 1, s_3))) goto lab4; z->ket = z->c; /* ], line 29 */ z->c = c5; break; lab4: z->c = c5; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab3; z->c = ret; /* goto, line 29 */ } } { int ret = slice_from_s(z, 1, s_4); /* <-, line 29 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 29 */ continue; lab3: z->c = c4; break; } z->c = c3; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c1 = z->c; /* do, line 35 */ { int c2 = z->c; /* or, line 41 */ if (z->c + 4 >= z->l || z->p[z->c + 4] >> 5 != 3 || !((2375680 >> (z->p[z->c + 4] & 0x1f)) & 1)) goto lab2; if (!(find_among(z, a_0, 3))) goto lab2; /* among, line 36 */ goto lab1; lab2: z->c = c2; { /* gopast */ /* grouping v, line 41 */ int ret = out_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 41 */ int ret = in_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } } lab1: z->I[0] = z->c; /* setmark p1, line 42 */ { /* gopast */ /* grouping v, line 43 */ int ret = out_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 43 */ int ret = in_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 43 */ lab0: z->c = c1; } return 1; } static int r_shortv(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 51 */ if (out_grouping_b_U(z, g_v_WXY, 89, 121, 0)) goto lab1; if (in_grouping_b_U(z, g_v, 97, 121, 0)) goto lab1; if (out_grouping_b_U(z, g_v, 97, 121, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0; if (in_grouping_b_U(z, g_v, 97, 121, 0)) return 0; if (z->c > z->lb) return 0; /* atlimit, line 52 */ } lab0: return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_Step_1a(struct SN_env * z) { int among_var; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 59 */ z->ket = z->c; /* [, line 60 */ if (z->c <= z->lb || (z->p[z->c - 1] != 39 && z->p[z->c - 1] != 115)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_1, 3); /* substring, line 60 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 60 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: { int ret = slice_del(z); /* delete, line 62 */ if (ret < 0) return ret; } break; } lab0: ; } z->ket = z->c; /* [, line 65 */ if (z->c <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 115)) return 0; among_var = find_among_b(z, a_2, 6); /* substring, line 65 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 65 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_5); /* <-, line 66 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 68 */ { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 2); if (ret < 0) goto lab2; z->c = ret; /* hop, line 68 */ } { int ret = slice_from_s(z, 1, s_6); /* <-, line 68 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m1; { int ret = slice_from_s(z, 2, s_7); /* <-, line 68 */ if (ret < 0) return ret; } } lab1: break; case 3: { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 69 */ } { /* gopast */ /* grouping v, line 69 */ int ret = out_grouping_b_U(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } { int ret = slice_del(z); /* delete, line 69 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_1b(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 75 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33554576 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 6); /* substring, line 75 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 75 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 77 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_8); /* <-, line 77 */ if (ret < 0) return ret; } break; case 2: { int m_test = z->l - z->c; /* test, line 80 */ { /* gopast */ /* grouping v, line 80 */ int ret = out_grouping_b_U(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 80 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 81 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else among_var = find_among_b(z, a_3, 13); /* substring, line 81 */ if (!(among_var)) return 0; z->c = z->l - m_test; } switch(among_var) { case 0: return 0; case 1: { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_9); /* <+, line 83 */ z->c = c_keep; if (ret < 0) return ret; } break; case 2: z->ket = z->c; /* [, line 86 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 86 */ } z->bra = z->c; /* ], line 86 */ { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } break; case 3: if (z->c != z->I[0]) return 0; /* atmark, line 87 */ { int m_test = z->l - z->c; /* test, line 87 */ { int ret = r_shortv(z); if (ret == 0) return 0; /* call shortv, line 87 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_10); /* <+, line 87 */ z->c = c_keep; if (ret < 0) return ret; } break; } break; } return 1; } static int r_Step_1c(struct SN_env * z) { z->ket = z->c; /* [, line 94 */ { int m1 = z->l - z->c; (void)m1; /* or, line 94 */ if (!(eq_s_b(z, 1, s_11))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_12))) return 0; } lab0: z->bra = z->c; /* ], line 94 */ if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0; { int m2 = z->l - z->c; (void)m2; /* not, line 95 */ if (z->c > z->lb) goto lab2; /* atlimit, line 95 */ return 0; lab2: z->c = z->l - m2; } { int ret = slice_from_s(z, 1, s_13); /* <-, line 96 */ if (ret < 0) return ret; } return 1; } static int r_Step_2(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 100 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 24); /* substring, line 100 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 100 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 100 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_14); /* <-, line 101 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_15); /* <-, line 102 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_16); /* <-, line 103 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 4, s_17); /* <-, line 104 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_18); /* <-, line 105 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 3, s_19); /* <-, line 107 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_20); /* <-, line 109 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 2, s_21); /* <-, line 111 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 3, s_22); /* <-, line 112 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 3, s_23); /* <-, line 114 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 3, s_24); /* <-, line 116 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 3, s_25); /* <-, line 118 */ if (ret < 0) return ret; } break; case 13: if (!(eq_s_b(z, 1, s_26))) return 0; { int ret = slice_from_s(z, 2, s_27); /* <-, line 119 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 3, s_28); /* <-, line 120 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_from_s(z, 4, s_29); /* <-, line 121 */ if (ret < 0) return ret; } break; case 16: if (in_grouping_b_U(z, g_valid_LI, 99, 116, 0)) return 0; { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_3(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 127 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_6, 9); /* substring, line 127 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 127 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 127 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_30); /* <-, line 128 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_31); /* <-, line 129 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_32); /* <-, line 130 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_33); /* <-, line 132 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; case 6: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 136 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 136 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_4(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 141 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1864232 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_7, 18); /* substring, line 141 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 141 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 141 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 144 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ if (!(eq_s_b(z, 1, s_34))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_35))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_5(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 150 */ if (z->c <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) return 0; among_var = find_among_b(z, a_8, 2); /* substring, line 150 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 150 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 151 */ { int ret = r_R2(z); if (ret == 0) goto lab1; /* call R2, line 151 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 151 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* not, line 151 */ { int ret = r_shortv(z); if (ret == 0) goto lab2; /* call shortv, line 151 */ if (ret < 0) return ret; } return 0; lab2: z->c = z->l - m2; } } lab0: { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 152 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_36))) return 0; { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } break; } return 1; } static int r_exception2(struct SN_env * z) { z->ket = z->c; /* [, line 158 */ if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; if (!(find_among_b(z, a_9, 8))) return 0; /* substring, line 158 */ z->bra = z->c; /* ], line 158 */ if (z->c > z->lb) return 0; /* atlimit, line 158 */ return 1; } static int r_exception1(struct SN_env * z) { int among_var; z->bra = z->c; /* [, line 170 */ if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((42750482 >> (z->p[z->c + 2] & 0x1f)) & 1)) return 0; among_var = find_among(z, a_10, 18); /* substring, line 170 */ if (!(among_var)) return 0; z->ket = z->c; /* ], line 170 */ if (z->c < z->l) return 0; /* atlimit, line 170 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 3, s_37); /* <-, line 174 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_38); /* <-, line 175 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 3, s_39); /* <-, line 176 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 3, s_40); /* <-, line 177 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_41); /* <-, line 178 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 3, s_42); /* <-, line 182 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 5, s_43); /* <-, line 183 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 4, s_44); /* <-, line 184 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 5, s_45); /* <-, line 185 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 4, s_46); /* <-, line 186 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 5, s_47); /* <-, line 187 */ if (ret < 0) return ret; } break; } return 1; } static int r_postlude(struct SN_env * z) { if (!(z->B[0])) return 0; /* Boolean test Y_found, line 203 */ while(1) { /* repeat, line 203 */ int c1 = z->c; while(1) { /* goto, line 203 */ int c2 = z->c; z->bra = z->c; /* [, line 203 */ if (!(eq_s(z, 1, s_48))) goto lab1; z->ket = z->c; /* ], line 203 */ z->c = c2; break; lab1: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* goto, line 203 */ } } { int ret = slice_from_s(z, 1, s_49); /* <-, line 203 */ if (ret < 0) return ret; } continue; lab0: z->c = c1; break; } return 1; } extern int english_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* or, line 207 */ { int ret = r_exception1(z); if (ret == 0) goto lab1; /* call exception1, line 207 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = c1; { int c2 = z->c; /* not, line 208 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) goto lab3; z->c = ret; /* hop, line 208 */ } goto lab2; lab3: z->c = c2; } goto lab0; lab2: z->c = c1; { int c3 = z->c; /* do, line 209 */ { int ret = r_prelude(z); if (ret == 0) goto lab4; /* call prelude, line 209 */ if (ret < 0) return ret; } lab4: z->c = c3; } { int c4 = z->c; /* do, line 210 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab5; /* call mark_regions, line 210 */ if (ret < 0) return ret; } lab5: z->c = c4; } z->lb = z->c; z->c = z->l; /* backwards, line 211 */ { int m5 = z->l - z->c; (void)m5; /* do, line 213 */ { int ret = r_Step_1a(z); if (ret == 0) goto lab6; /* call Step_1a, line 213 */ if (ret < 0) return ret; } lab6: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* or, line 215 */ { int ret = r_exception2(z); if (ret == 0) goto lab8; /* call exception2, line 215 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m6; { int m7 = z->l - z->c; (void)m7; /* do, line 217 */ { int ret = r_Step_1b(z); if (ret == 0) goto lab9; /* call Step_1b, line 217 */ if (ret < 0) return ret; } lab9: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 218 */ { int ret = r_Step_1c(z); if (ret == 0) goto lab10; /* call Step_1c, line 218 */ if (ret < 0) return ret; } lab10: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 220 */ { int ret = r_Step_2(z); if (ret == 0) goto lab11; /* call Step_2, line 220 */ if (ret < 0) return ret; } lab11: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 221 */ { int ret = r_Step_3(z); if (ret == 0) goto lab12; /* call Step_3, line 221 */ if (ret < 0) return ret; } lab12: z->c = z->l - m10; } { int m11 = z->l - z->c; (void)m11; /* do, line 222 */ { int ret = r_Step_4(z); if (ret == 0) goto lab13; /* call Step_4, line 222 */ if (ret < 0) return ret; } lab13: z->c = z->l - m11; } { int m12 = z->l - z->c; (void)m12; /* do, line 224 */ { int ret = r_Step_5(z); if (ret == 0) goto lab14; /* call Step_5, line 224 */ if (ret < 0) return ret; } lab14: z->c = z->l - m12; } } lab7: z->c = z->lb; { int c13 = z->c; /* do, line 227 */ { int ret = r_postlude(z); if (ret == 0) goto lab15; /* call postlude, line 227 */ if (ret < 0) return ret; } lab15: z->c = c13; } } lab0: return 1; } extern struct SN_env * english_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); } extern void english_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_english.h000066400000000000000000000004711217574114600307220ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * english_UTF_8_create_env(void); extern void english_UTF_8_close_env(struct SN_env * z); extern int english_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_finnish.c000066400000000000000000000632731217574114600307330ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int finnish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_tidy(struct SN_env * z); static int r_other_endings(struct SN_env * z); static int r_t_plural(struct SN_env * z); static int r_i_plural(struct SN_env * z); static int r_case_ending(struct SN_env * z); static int r_VI(struct SN_env * z); static int r_LONG(struct SN_env * z); static int r_possessive(struct SN_env * z); static int r_particle_etc(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * finnish_UTF_8_create_env(void); extern void finnish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[2] = { 'p', 'a' }; static const symbol s_0_1[3] = { 's', 't', 'i' }; static const symbol s_0_2[4] = { 'k', 'a', 'a', 'n' }; static const symbol s_0_3[3] = { 'h', 'a', 'n' }; static const symbol s_0_4[3] = { 'k', 'i', 'n' }; static const symbol s_0_5[4] = { 'h', 0xC3, 0xA4, 'n' }; static const symbol s_0_6[6] = { 'k', 0xC3, 0xA4, 0xC3, 0xA4, 'n' }; static const symbol s_0_7[2] = { 'k', 'o' }; static const symbol s_0_8[3] = { 'p', 0xC3, 0xA4 }; static const symbol s_0_9[3] = { 'k', 0xC3, 0xB6 }; static const struct among a_0[10] = { /* 0 */ { 2, s_0_0, -1, 1, 0}, /* 1 */ { 3, s_0_1, -1, 2, 0}, /* 2 */ { 4, s_0_2, -1, 1, 0}, /* 3 */ { 3, s_0_3, -1, 1, 0}, /* 4 */ { 3, s_0_4, -1, 1, 0}, /* 5 */ { 4, s_0_5, -1, 1, 0}, /* 6 */ { 6, s_0_6, -1, 1, 0}, /* 7 */ { 2, s_0_7, -1, 1, 0}, /* 8 */ { 3, s_0_8, -1, 1, 0}, /* 9 */ { 3, s_0_9, -1, 1, 0} }; static const symbol s_1_0[3] = { 'l', 'l', 'a' }; static const symbol s_1_1[2] = { 'n', 'a' }; static const symbol s_1_2[3] = { 's', 's', 'a' }; static const symbol s_1_3[2] = { 't', 'a' }; static const symbol s_1_4[3] = { 'l', 't', 'a' }; static const symbol s_1_5[3] = { 's', 't', 'a' }; static const struct among a_1[6] = { /* 0 */ { 3, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 3, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 3, s_1_4, 3, -1, 0}, /* 5 */ { 3, s_1_5, 3, -1, 0} }; static const symbol s_2_0[4] = { 'l', 'l', 0xC3, 0xA4 }; static const symbol s_2_1[3] = { 'n', 0xC3, 0xA4 }; static const symbol s_2_2[4] = { 's', 's', 0xC3, 0xA4 }; static const symbol s_2_3[3] = { 't', 0xC3, 0xA4 }; static const symbol s_2_4[4] = { 'l', 't', 0xC3, 0xA4 }; static const symbol s_2_5[4] = { 's', 't', 0xC3, 0xA4 }; static const struct among a_2[6] = { /* 0 */ { 4, s_2_0, -1, -1, 0}, /* 1 */ { 3, s_2_1, -1, -1, 0}, /* 2 */ { 4, s_2_2, -1, -1, 0}, /* 3 */ { 3, s_2_3, -1, -1, 0}, /* 4 */ { 4, s_2_4, 3, -1, 0}, /* 5 */ { 4, s_2_5, 3, -1, 0} }; static const symbol s_3_0[3] = { 'l', 'l', 'e' }; static const symbol s_3_1[3] = { 'i', 'n', 'e' }; static const struct among a_3[2] = { /* 0 */ { 3, s_3_0, -1, -1, 0}, /* 1 */ { 3, s_3_1, -1, -1, 0} }; static const symbol s_4_0[3] = { 'n', 's', 'a' }; static const symbol s_4_1[3] = { 'm', 'm', 'e' }; static const symbol s_4_2[3] = { 'n', 'n', 'e' }; static const symbol s_4_3[2] = { 'n', 'i' }; static const symbol s_4_4[2] = { 's', 'i' }; static const symbol s_4_5[2] = { 'a', 'n' }; static const symbol s_4_6[2] = { 'e', 'n' }; static const symbol s_4_7[3] = { 0xC3, 0xA4, 'n' }; static const symbol s_4_8[4] = { 'n', 's', 0xC3, 0xA4 }; static const struct among a_4[9] = { /* 0 */ { 3, s_4_0, -1, 3, 0}, /* 1 */ { 3, s_4_1, -1, 3, 0}, /* 2 */ { 3, s_4_2, -1, 3, 0}, /* 3 */ { 2, s_4_3, -1, 2, 0}, /* 4 */ { 2, s_4_4, -1, 1, 0}, /* 5 */ { 2, s_4_5, -1, 4, 0}, /* 6 */ { 2, s_4_6, -1, 6, 0}, /* 7 */ { 3, s_4_7, -1, 5, 0}, /* 8 */ { 4, s_4_8, -1, 3, 0} }; static const symbol s_5_0[2] = { 'a', 'a' }; static const symbol s_5_1[2] = { 'e', 'e' }; static const symbol s_5_2[2] = { 'i', 'i' }; static const symbol s_5_3[2] = { 'o', 'o' }; static const symbol s_5_4[2] = { 'u', 'u' }; static const symbol s_5_5[4] = { 0xC3, 0xA4, 0xC3, 0xA4 }; static const symbol s_5_6[4] = { 0xC3, 0xB6, 0xC3, 0xB6 }; static const struct among a_5[7] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0}, /* 2 */ { 2, s_5_2, -1, -1, 0}, /* 3 */ { 2, s_5_3, -1, -1, 0}, /* 4 */ { 2, s_5_4, -1, -1, 0}, /* 5 */ { 4, s_5_5, -1, -1, 0}, /* 6 */ { 4, s_5_6, -1, -1, 0} }; static const symbol s_6_0[1] = { 'a' }; static const symbol s_6_1[3] = { 'l', 'l', 'a' }; static const symbol s_6_2[2] = { 'n', 'a' }; static const symbol s_6_3[3] = { 's', 's', 'a' }; static const symbol s_6_4[2] = { 't', 'a' }; static const symbol s_6_5[3] = { 'l', 't', 'a' }; static const symbol s_6_6[3] = { 's', 't', 'a' }; static const symbol s_6_7[3] = { 't', 't', 'a' }; static const symbol s_6_8[3] = { 'l', 'l', 'e' }; static const symbol s_6_9[3] = { 'i', 'n', 'e' }; static const symbol s_6_10[3] = { 'k', 's', 'i' }; static const symbol s_6_11[1] = { 'n' }; static const symbol s_6_12[3] = { 'h', 'a', 'n' }; static const symbol s_6_13[3] = { 'd', 'e', 'n' }; static const symbol s_6_14[4] = { 's', 'e', 'e', 'n' }; static const symbol s_6_15[3] = { 'h', 'e', 'n' }; static const symbol s_6_16[4] = { 't', 't', 'e', 'n' }; static const symbol s_6_17[3] = { 'h', 'i', 'n' }; static const symbol s_6_18[4] = { 's', 'i', 'i', 'n' }; static const symbol s_6_19[3] = { 'h', 'o', 'n' }; static const symbol s_6_20[4] = { 'h', 0xC3, 0xA4, 'n' }; static const symbol s_6_21[4] = { 'h', 0xC3, 0xB6, 'n' }; static const symbol s_6_22[2] = { 0xC3, 0xA4 }; static const symbol s_6_23[4] = { 'l', 'l', 0xC3, 0xA4 }; static const symbol s_6_24[3] = { 'n', 0xC3, 0xA4 }; static const symbol s_6_25[4] = { 's', 's', 0xC3, 0xA4 }; static const symbol s_6_26[3] = { 't', 0xC3, 0xA4 }; static const symbol s_6_27[4] = { 'l', 't', 0xC3, 0xA4 }; static const symbol s_6_28[4] = { 's', 't', 0xC3, 0xA4 }; static const symbol s_6_29[4] = { 't', 't', 0xC3, 0xA4 }; static const struct among a_6[30] = { /* 0 */ { 1, s_6_0, -1, 8, 0}, /* 1 */ { 3, s_6_1, 0, -1, 0}, /* 2 */ { 2, s_6_2, 0, -1, 0}, /* 3 */ { 3, s_6_3, 0, -1, 0}, /* 4 */ { 2, s_6_4, 0, -1, 0}, /* 5 */ { 3, s_6_5, 4, -1, 0}, /* 6 */ { 3, s_6_6, 4, -1, 0}, /* 7 */ { 3, s_6_7, 4, 9, 0}, /* 8 */ { 3, s_6_8, -1, -1, 0}, /* 9 */ { 3, s_6_9, -1, -1, 0}, /* 10 */ { 3, s_6_10, -1, -1, 0}, /* 11 */ { 1, s_6_11, -1, 7, 0}, /* 12 */ { 3, s_6_12, 11, 1, 0}, /* 13 */ { 3, s_6_13, 11, -1, r_VI}, /* 14 */ { 4, s_6_14, 11, -1, r_LONG}, /* 15 */ { 3, s_6_15, 11, 2, 0}, /* 16 */ { 4, s_6_16, 11, -1, r_VI}, /* 17 */ { 3, s_6_17, 11, 3, 0}, /* 18 */ { 4, s_6_18, 11, -1, r_VI}, /* 19 */ { 3, s_6_19, 11, 4, 0}, /* 20 */ { 4, s_6_20, 11, 5, 0}, /* 21 */ { 4, s_6_21, 11, 6, 0}, /* 22 */ { 2, s_6_22, -1, 8, 0}, /* 23 */ { 4, s_6_23, 22, -1, 0}, /* 24 */ { 3, s_6_24, 22, -1, 0}, /* 25 */ { 4, s_6_25, 22, -1, 0}, /* 26 */ { 3, s_6_26, 22, -1, 0}, /* 27 */ { 4, s_6_27, 26, -1, 0}, /* 28 */ { 4, s_6_28, 26, -1, 0}, /* 29 */ { 4, s_6_29, 26, 9, 0} }; static const symbol s_7_0[3] = { 'e', 'j', 'a' }; static const symbol s_7_1[3] = { 'm', 'm', 'a' }; static const symbol s_7_2[4] = { 'i', 'm', 'm', 'a' }; static const symbol s_7_3[3] = { 'm', 'p', 'a' }; static const symbol s_7_4[4] = { 'i', 'm', 'p', 'a' }; static const symbol s_7_5[3] = { 'm', 'm', 'i' }; static const symbol s_7_6[4] = { 'i', 'm', 'm', 'i' }; static const symbol s_7_7[3] = { 'm', 'p', 'i' }; static const symbol s_7_8[4] = { 'i', 'm', 'p', 'i' }; static const symbol s_7_9[4] = { 'e', 'j', 0xC3, 0xA4 }; static const symbol s_7_10[4] = { 'm', 'm', 0xC3, 0xA4 }; static const symbol s_7_11[5] = { 'i', 'm', 'm', 0xC3, 0xA4 }; static const symbol s_7_12[4] = { 'm', 'p', 0xC3, 0xA4 }; static const symbol s_7_13[5] = { 'i', 'm', 'p', 0xC3, 0xA4 }; static const struct among a_7[14] = { /* 0 */ { 3, s_7_0, -1, -1, 0}, /* 1 */ { 3, s_7_1, -1, 1, 0}, /* 2 */ { 4, s_7_2, 1, -1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 4, s_7_4, 3, -1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 4, s_7_6, 5, -1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 4, s_7_8, 7, -1, 0}, /* 9 */ { 4, s_7_9, -1, -1, 0}, /* 10 */ { 4, s_7_10, -1, 1, 0}, /* 11 */ { 5, s_7_11, 10, -1, 0}, /* 12 */ { 4, s_7_12, -1, 1, 0}, /* 13 */ { 5, s_7_13, 12, -1, 0} }; static const symbol s_8_0[1] = { 'i' }; static const symbol s_8_1[1] = { 'j' }; static const struct among a_8[2] = { /* 0 */ { 1, s_8_0, -1, -1, 0}, /* 1 */ { 1, s_8_1, -1, -1, 0} }; static const symbol s_9_0[3] = { 'm', 'm', 'a' }; static const symbol s_9_1[4] = { 'i', 'm', 'm', 'a' }; static const struct among a_9[2] = { /* 0 */ { 3, s_9_0, -1, 1, 0}, /* 1 */ { 4, s_9_1, 0, -1, 0} }; static const unsigned char g_AEI[] = { 17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; static const unsigned char g_V1[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const unsigned char g_V2[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const unsigned char g_particle_end[] = { 17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const symbol s_0[] = { 'k' }; static const symbol s_1[] = { 'k', 's', 'e' }; static const symbol s_2[] = { 'k', 's', 'i' }; static const symbol s_3[] = { 'i' }; static const symbol s_4[] = { 'a' }; static const symbol s_5[] = { 'e' }; static const symbol s_6[] = { 'i' }; static const symbol s_7[] = { 'o' }; static const symbol s_8[] = { 0xC3, 0xA4 }; static const symbol s_9[] = { 0xC3, 0xB6 }; static const symbol s_10[] = { 'i', 'e' }; static const symbol s_11[] = { 'e' }; static const symbol s_12[] = { 'p', 'o' }; static const symbol s_13[] = { 't' }; static const symbol s_14[] = { 'p', 'o' }; static const symbol s_15[] = { 'j' }; static const symbol s_16[] = { 'o' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'o' }; static const symbol s_19[] = { 'j' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; if (out_grouping_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 46 */ { /* gopast */ /* non V1, line 46 */ int ret = in_grouping_U(z, g_V1, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 46 */ if (out_grouping_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 47 */ { /* gopast */ /* non V1, line 47 */ int ret = in_grouping_U(z, g_V1, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 47 */ return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_particle_etc(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 55 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 55 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 55 */ among_var = find_among_b(z, a_0, 10); /* substring, line 55 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 55 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (in_grouping_b_U(z, g_particle_end, 97, 246, 0)) return 0; break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 64 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 66 */ if (ret < 0) return ret; } return 1; } static int r_possessive(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 69 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 69 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 69 */ among_var = find_among_b(z, a_4, 9); /* substring, line 69 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 69 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m2 = z->l - z->c; (void)m2; /* not, line 72 */ if (!(eq_s_b(z, 1, s_0))) goto lab0; return 0; lab0: z->c = z->l - m2; } { int ret = slice_del(z); /* delete, line 72 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 74 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 74 */ if (!(eq_s_b(z, 3, s_1))) return 0; z->bra = z->c; /* ], line 74 */ { int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } break; case 4: if (z->c - 1 <= z->lb || z->p[z->c - 1] != 97) return 0; if (!(find_among_b(z, a_1, 6))) return 0; /* among, line 81 */ { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } break; case 5: if (z->c - 2 <= z->lb || z->p[z->c - 1] != 164) return 0; if (!(find_among_b(z, a_2, 6))) return 0; /* among, line 83 */ { int ret = slice_del(z); /* delete, line 84 */ if (ret < 0) return ret; } break; case 6: if (z->c - 2 <= z->lb || z->p[z->c - 1] != 101) return 0; if (!(find_among_b(z, a_3, 2))) return 0; /* among, line 86 */ { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } break; } return 1; } static int r_LONG(struct SN_env * z) { if (!(find_among_b(z, a_5, 7))) return 0; /* among, line 91 */ return 1; } static int r_VI(struct SN_env * z) { if (!(eq_s_b(z, 1, s_3))) return 0; if (in_grouping_b_U(z, g_V2, 97, 246, 0)) return 0; return 1; } static int r_case_ending(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 96 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 96 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 96 */ among_var = find_among_b(z, a_6, 30); /* substring, line 96 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 96 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (!(eq_s_b(z, 1, s_4))) return 0; break; case 2: if (!(eq_s_b(z, 1, s_5))) return 0; break; case 3: if (!(eq_s_b(z, 1, s_6))) return 0; break; case 4: if (!(eq_s_b(z, 1, s_7))) return 0; break; case 5: if (!(eq_s_b(z, 2, s_8))) return 0; break; case 6: if (!(eq_s_b(z, 2, s_9))) return 0; break; case 7: { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ { int m2 = z->l - z->c; (void)m2; /* and, line 113 */ { int m3 = z->l - z->c; (void)m3; /* or, line 112 */ { int ret = r_LONG(z); if (ret == 0) goto lab2; /* call LONG, line 111 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m3; if (!(eq_s_b(z, 2, s_10))) { z->c = z->l - m_keep; goto lab0; } } lab1: z->c = z->l - m2; { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) { z->c = z->l - m_keep; goto lab0; } z->c = ret; /* next, line 113 */ } } z->bra = z->c; /* ], line 113 */ lab0: ; } break; case 8: if (in_grouping_b_U(z, g_V1, 97, 246, 0)) return 0; if (out_grouping_b_U(z, g_V1, 97, 246, 0)) return 0; break; case 9: if (!(eq_s_b(z, 1, s_11))) return 0; break; } { int ret = slice_del(z); /* delete, line 138 */ if (ret < 0) return ret; } z->B[0] = 1; /* set ending_removed, line 139 */ return 1; } static int r_other_endings(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 142 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[1]) return 0; z->c = z->I[1]; /* tomark, line 142 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 142 */ among_var = find_among_b(z, a_7, 14); /* substring, line 142 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 142 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m2 = z->l - z->c; (void)m2; /* not, line 146 */ if (!(eq_s_b(z, 2, s_12))) goto lab0; return 0; lab0: z->c = z->l - m2; } break; } { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } return 1; } static int r_i_plural(struct SN_env * z) { { int mlimit; /* setlimit, line 154 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 154 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 154 */ if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 106)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_8, 2))) { z->lb = mlimit; return 0; } /* substring, line 154 */ z->bra = z->c; /* ], line 154 */ z->lb = mlimit; } { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } return 1; } static int r_t_plural(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 161 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 161 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 162 */ if (!(eq_s_b(z, 1, s_13))) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 162 */ { int m_test = z->l - z->c; /* test, line 162 */ if (in_grouping_b_U(z, g_V1, 97, 246, 0)) { z->lb = mlimit; return 0; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } z->lb = mlimit; } { int mlimit; /* setlimit, line 165 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[1]) return 0; z->c = z->I[1]; /* tomark, line 165 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; z->ket = z->c; /* [, line 165 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] != 97) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_9, 2); /* substring, line 165 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 165 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m3 = z->l - z->c; (void)m3; /* not, line 167 */ if (!(eq_s_b(z, 2, s_14))) goto lab0; return 0; lab0: z->c = z->l - m3; } break; } { int ret = slice_del(z); /* delete, line 170 */ if (ret < 0) return ret; } return 1; } static int r_tidy(struct SN_env * z) { { int mlimit; /* setlimit, line 173 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 173 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* do, line 174 */ { int m3 = z->l - z->c; (void)m3; /* and, line 174 */ { int ret = r_LONG(z); if (ret == 0) goto lab0; /* call LONG, line 174 */ if (ret < 0) return ret; } z->c = z->l - m3; z->ket = z->c; /* [, line 174 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab0; z->c = ret; /* next, line 174 */ } z->bra = z->c; /* ], line 174 */ { int ret = slice_del(z); /* delete, line 174 */ if (ret < 0) return ret; } } lab0: z->c = z->l - m2; } { int m4 = z->l - z->c; (void)m4; /* do, line 175 */ z->ket = z->c; /* [, line 175 */ if (in_grouping_b_U(z, g_AEI, 97, 228, 0)) goto lab1; z->bra = z->c; /* ], line 175 */ if (out_grouping_b_U(z, g_V1, 97, 246, 0)) goto lab1; { int ret = slice_del(z); /* delete, line 175 */ if (ret < 0) return ret; } lab1: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 176 */ z->ket = z->c; /* [, line 176 */ if (!(eq_s_b(z, 1, s_15))) goto lab2; z->bra = z->c; /* ], line 176 */ { int m6 = z->l - z->c; (void)m6; /* or, line 176 */ if (!(eq_s_b(z, 1, s_16))) goto lab4; goto lab3; lab4: z->c = z->l - m6; if (!(eq_s_b(z, 1, s_17))) goto lab2; } lab3: { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } lab2: z->c = z->l - m5; } { int m7 = z->l - z->c; (void)m7; /* do, line 177 */ z->ket = z->c; /* [, line 177 */ if (!(eq_s_b(z, 1, s_18))) goto lab5; z->bra = z->c; /* ], line 177 */ if (!(eq_s_b(z, 1, s_19))) goto lab5; { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } lab5: z->c = z->l - m7; } z->lb = mlimit; } if (in_grouping_b_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* non V1, line 179 */ z->ket = z->c; /* [, line 179 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 179 */ } z->bra = z->c; /* ], line 179 */ z->S[0] = slice_to(z, z->S[0]); /* -> x, line 179 */ if (z->S[0] == 0) return -1; /* -> x, line 179 */ if (!(eq_v_b(z, z->S[0]))) return 0; /* name x, line 179 */ { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } return 1; } extern int finnish_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 185 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 185 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->B[0] = 0; /* unset ending_removed, line 186 */ z->lb = z->c; z->c = z->l; /* backwards, line 187 */ { int m2 = z->l - z->c; (void)m2; /* do, line 188 */ { int ret = r_particle_etc(z); if (ret == 0) goto lab1; /* call particle_etc, line 188 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 189 */ { int ret = r_possessive(z); if (ret == 0) goto lab2; /* call possessive, line 189 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 190 */ { int ret = r_case_ending(z); if (ret == 0) goto lab3; /* call case_ending, line 190 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 191 */ { int ret = r_other_endings(z); if (ret == 0) goto lab4; /* call other_endings, line 191 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* or, line 192 */ if (!(z->B[0])) goto lab6; /* Boolean test ending_removed, line 192 */ { int m7 = z->l - z->c; (void)m7; /* do, line 192 */ { int ret = r_i_plural(z); if (ret == 0) goto lab7; /* call i_plural, line 192 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } goto lab5; lab6: z->c = z->l - m6; { int m8 = z->l - z->c; (void)m8; /* do, line 192 */ { int ret = r_t_plural(z); if (ret == 0) goto lab8; /* call t_plural, line 192 */ if (ret < 0) return ret; } lab8: z->c = z->l - m8; } } lab5: { int m9 = z->l - z->c; (void)m9; /* do, line 193 */ { int ret = r_tidy(z); if (ret == 0) goto lab9; /* call tidy, line 193 */ if (ret < 0) return ret; } lab9: z->c = z->l - m9; } z->c = z->lb; return 1; } extern struct SN_env * finnish_UTF_8_create_env(void) { return SN_create_env(1, 2, 1); } extern void finnish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_finnish.h000066400000000000000000000004711217574114600307270ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * finnish_UTF_8_create_env(void); extern void finnish_UTF_8_close_env(struct SN_env * z); extern int finnish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_french.c000066400000000000000000001356041217574114600305400ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int french_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_un_accent(struct SN_env * z); static int r_un_double(struct SN_env * z); static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_i_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * french_UTF_8_create_env(void); extern void french_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 'c', 'o', 'l' }; static const symbol s_0_1[3] = { 'p', 'a', 'r' }; static const symbol s_0_2[3] = { 't', 'a', 'p' }; static const struct among a_0[3] = { /* 0 */ { 3, s_0_0, -1, -1, 0}, /* 1 */ { 3, s_0_1, -1, -1, 0}, /* 2 */ { 3, s_0_2, -1, -1, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'U' }; static const symbol s_1_3[1] = { 'Y' }; static const struct among a_1[4] = { /* 0 */ { 0, 0, -1, 4, 0}, /* 1 */ { 1, s_1_1, 0, 1, 0}, /* 2 */ { 1, s_1_2, 0, 2, 0}, /* 3 */ { 1, s_1_3, 0, 3, 0} }; static const symbol s_2_0[3] = { 'i', 'q', 'U' }; static const symbol s_2_1[3] = { 'a', 'b', 'l' }; static const symbol s_2_2[4] = { 'I', 0xC3, 0xA8, 'r' }; static const symbol s_2_3[4] = { 'i', 0xC3, 0xA8, 'r' }; static const symbol s_2_4[3] = { 'e', 'u', 's' }; static const symbol s_2_5[2] = { 'i', 'v' }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, 3, 0}, /* 1 */ { 3, s_2_1, -1, 3, 0}, /* 2 */ { 4, s_2_2, -1, 4, 0}, /* 3 */ { 4, s_2_3, -1, 4, 0}, /* 4 */ { 3, s_2_4, -1, 2, 0}, /* 5 */ { 2, s_2_5, -1, 1, 0} }; static const symbol s_3_0[2] = { 'i', 'c' }; static const symbol s_3_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_3_2[2] = { 'i', 'v' }; static const struct among a_3[3] = { /* 0 */ { 2, s_3_0, -1, 2, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 2, s_3_2, -1, 3, 0} }; static const symbol s_4_0[4] = { 'i', 'q', 'U', 'e' }; static const symbol s_4_1[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; static const symbol s_4_2[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_4_3[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_4_4[5] = { 'l', 'o', 'g', 'i', 'e' }; static const symbol s_4_5[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_4_6[4] = { 'i', 's', 'm', 'e' }; static const symbol s_4_7[4] = { 'e', 'u', 's', 'e' }; static const symbol s_4_8[4] = { 'i', 's', 't', 'e' }; static const symbol s_4_9[3] = { 'i', 'v', 'e' }; static const symbol s_4_10[2] = { 'i', 'f' }; static const symbol s_4_11[5] = { 'u', 's', 'i', 'o', 'n' }; static const symbol s_4_12[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_4_13[5] = { 'u', 't', 'i', 'o', 'n' }; static const symbol s_4_14[5] = { 'a', 't', 'e', 'u', 'r' }; static const symbol s_4_15[5] = { 'i', 'q', 'U', 'e', 's' }; static const symbol s_4_16[7] = { 'a', 't', 'r', 'i', 'c', 'e', 's' }; static const symbol s_4_17[5] = { 'a', 'n', 'c', 'e', 's' }; static const symbol s_4_18[5] = { 'e', 'n', 'c', 'e', 's' }; static const symbol s_4_19[6] = { 'l', 'o', 'g', 'i', 'e', 's' }; static const symbol s_4_20[5] = { 'a', 'b', 'l', 'e', 's' }; static const symbol s_4_21[5] = { 'i', 's', 'm', 'e', 's' }; static const symbol s_4_22[5] = { 'e', 'u', 's', 'e', 's' }; static const symbol s_4_23[5] = { 'i', 's', 't', 'e', 's' }; static const symbol s_4_24[4] = { 'i', 'v', 'e', 's' }; static const symbol s_4_25[3] = { 'i', 'f', 's' }; static const symbol s_4_26[6] = { 'u', 's', 'i', 'o', 'n', 's' }; static const symbol s_4_27[6] = { 'a', 't', 'i', 'o', 'n', 's' }; static const symbol s_4_28[6] = { 'u', 't', 'i', 'o', 'n', 's' }; static const symbol s_4_29[6] = { 'a', 't', 'e', 'u', 'r', 's' }; static const symbol s_4_30[5] = { 'm', 'e', 'n', 't', 's' }; static const symbol s_4_31[6] = { 'e', 'm', 'e', 'n', 't', 's' }; static const symbol s_4_32[9] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't', 's' }; static const symbol s_4_33[5] = { 'i', 't', 0xC3, 0xA9, 's' }; static const symbol s_4_34[4] = { 'm', 'e', 'n', 't' }; static const symbol s_4_35[5] = { 'e', 'm', 'e', 'n', 't' }; static const symbol s_4_36[8] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't' }; static const symbol s_4_37[6] = { 'a', 'm', 'm', 'e', 'n', 't' }; static const symbol s_4_38[6] = { 'e', 'm', 'm', 'e', 'n', 't' }; static const symbol s_4_39[3] = { 'a', 'u', 'x' }; static const symbol s_4_40[4] = { 'e', 'a', 'u', 'x' }; static const symbol s_4_41[3] = { 'e', 'u', 'x' }; static const symbol s_4_42[4] = { 'i', 't', 0xC3, 0xA9 }; static const struct among a_4[43] = { /* 0 */ { 4, s_4_0, -1, 1, 0}, /* 1 */ { 6, s_4_1, -1, 2, 0}, /* 2 */ { 4, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 5, 0}, /* 4 */ { 5, s_4_4, -1, 3, 0}, /* 5 */ { 4, s_4_5, -1, 1, 0}, /* 6 */ { 4, s_4_6, -1, 1, 0}, /* 7 */ { 4, s_4_7, -1, 11, 0}, /* 8 */ { 4, s_4_8, -1, 1, 0}, /* 9 */ { 3, s_4_9, -1, 8, 0}, /* 10 */ { 2, s_4_10, -1, 8, 0}, /* 11 */ { 5, s_4_11, -1, 4, 0}, /* 12 */ { 5, s_4_12, -1, 2, 0}, /* 13 */ { 5, s_4_13, -1, 4, 0}, /* 14 */ { 5, s_4_14, -1, 2, 0}, /* 15 */ { 5, s_4_15, -1, 1, 0}, /* 16 */ { 7, s_4_16, -1, 2, 0}, /* 17 */ { 5, s_4_17, -1, 1, 0}, /* 18 */ { 5, s_4_18, -1, 5, 0}, /* 19 */ { 6, s_4_19, -1, 3, 0}, /* 20 */ { 5, s_4_20, -1, 1, 0}, /* 21 */ { 5, s_4_21, -1, 1, 0}, /* 22 */ { 5, s_4_22, -1, 11, 0}, /* 23 */ { 5, s_4_23, -1, 1, 0}, /* 24 */ { 4, s_4_24, -1, 8, 0}, /* 25 */ { 3, s_4_25, -1, 8, 0}, /* 26 */ { 6, s_4_26, -1, 4, 0}, /* 27 */ { 6, s_4_27, -1, 2, 0}, /* 28 */ { 6, s_4_28, -1, 4, 0}, /* 29 */ { 6, s_4_29, -1, 2, 0}, /* 30 */ { 5, s_4_30, -1, 15, 0}, /* 31 */ { 6, s_4_31, 30, 6, 0}, /* 32 */ { 9, s_4_32, 31, 12, 0}, /* 33 */ { 5, s_4_33, -1, 7, 0}, /* 34 */ { 4, s_4_34, -1, 15, 0}, /* 35 */ { 5, s_4_35, 34, 6, 0}, /* 36 */ { 8, s_4_36, 35, 12, 0}, /* 37 */ { 6, s_4_37, 34, 13, 0}, /* 38 */ { 6, s_4_38, 34, 14, 0}, /* 39 */ { 3, s_4_39, -1, 10, 0}, /* 40 */ { 4, s_4_40, 39, 9, 0}, /* 41 */ { 3, s_4_41, -1, 1, 0}, /* 42 */ { 4, s_4_42, -1, 7, 0} }; static const symbol s_5_0[3] = { 'i', 'r', 'a' }; static const symbol s_5_1[2] = { 'i', 'e' }; static const symbol s_5_2[4] = { 'i', 's', 's', 'e' }; static const symbol s_5_3[7] = { 'i', 's', 's', 'a', 'n', 't', 'e' }; static const symbol s_5_4[1] = { 'i' }; static const symbol s_5_5[4] = { 'i', 'r', 'a', 'i' }; static const symbol s_5_6[2] = { 'i', 'r' }; static const symbol s_5_7[4] = { 'i', 'r', 'a', 's' }; static const symbol s_5_8[3] = { 'i', 'e', 's' }; static const symbol s_5_9[5] = { 0xC3, 0xAE, 'm', 'e', 's' }; static const symbol s_5_10[5] = { 'i', 's', 's', 'e', 's' }; static const symbol s_5_11[8] = { 'i', 's', 's', 'a', 'n', 't', 'e', 's' }; static const symbol s_5_12[5] = { 0xC3, 0xAE, 't', 'e', 's' }; static const symbol s_5_13[2] = { 'i', 's' }; static const symbol s_5_14[5] = { 'i', 'r', 'a', 'i', 's' }; static const symbol s_5_15[6] = { 'i', 's', 's', 'a', 'i', 's' }; static const symbol s_5_16[6] = { 'i', 'r', 'i', 'o', 'n', 's' }; static const symbol s_5_17[7] = { 'i', 's', 's', 'i', 'o', 'n', 's' }; static const symbol s_5_18[5] = { 'i', 'r', 'o', 'n', 's' }; static const symbol s_5_19[6] = { 'i', 's', 's', 'o', 'n', 's' }; static const symbol s_5_20[7] = { 'i', 's', 's', 'a', 'n', 't', 's' }; static const symbol s_5_21[2] = { 'i', 't' }; static const symbol s_5_22[5] = { 'i', 'r', 'a', 'i', 't' }; static const symbol s_5_23[6] = { 'i', 's', 's', 'a', 'i', 't' }; static const symbol s_5_24[6] = { 'i', 's', 's', 'a', 'n', 't' }; static const symbol s_5_25[7] = { 'i', 'r', 'a', 'I', 'e', 'n', 't' }; static const symbol s_5_26[8] = { 'i', 's', 's', 'a', 'I', 'e', 'n', 't' }; static const symbol s_5_27[5] = { 'i', 'r', 'e', 'n', 't' }; static const symbol s_5_28[6] = { 'i', 's', 's', 'e', 'n', 't' }; static const symbol s_5_29[5] = { 'i', 'r', 'o', 'n', 't' }; static const symbol s_5_30[3] = { 0xC3, 0xAE, 't' }; static const symbol s_5_31[5] = { 'i', 'r', 'i', 'e', 'z' }; static const symbol s_5_32[6] = { 'i', 's', 's', 'i', 'e', 'z' }; static const symbol s_5_33[4] = { 'i', 'r', 'e', 'z' }; static const symbol s_5_34[5] = { 'i', 's', 's', 'e', 'z' }; static const struct among a_5[35] = { /* 0 */ { 3, s_5_0, -1, 1, 0}, /* 1 */ { 2, s_5_1, -1, 1, 0}, /* 2 */ { 4, s_5_2, -1, 1, 0}, /* 3 */ { 7, s_5_3, -1, 1, 0}, /* 4 */ { 1, s_5_4, -1, 1, 0}, /* 5 */ { 4, s_5_5, 4, 1, 0}, /* 6 */ { 2, s_5_6, -1, 1, 0}, /* 7 */ { 4, s_5_7, -1, 1, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 5, s_5_9, -1, 1, 0}, /* 10 */ { 5, s_5_10, -1, 1, 0}, /* 11 */ { 8, s_5_11, -1, 1, 0}, /* 12 */ { 5, s_5_12, -1, 1, 0}, /* 13 */ { 2, s_5_13, -1, 1, 0}, /* 14 */ { 5, s_5_14, 13, 1, 0}, /* 15 */ { 6, s_5_15, 13, 1, 0}, /* 16 */ { 6, s_5_16, -1, 1, 0}, /* 17 */ { 7, s_5_17, -1, 1, 0}, /* 18 */ { 5, s_5_18, -1, 1, 0}, /* 19 */ { 6, s_5_19, -1, 1, 0}, /* 20 */ { 7, s_5_20, -1, 1, 0}, /* 21 */ { 2, s_5_21, -1, 1, 0}, /* 22 */ { 5, s_5_22, 21, 1, 0}, /* 23 */ { 6, s_5_23, 21, 1, 0}, /* 24 */ { 6, s_5_24, -1, 1, 0}, /* 25 */ { 7, s_5_25, -1, 1, 0}, /* 26 */ { 8, s_5_26, -1, 1, 0}, /* 27 */ { 5, s_5_27, -1, 1, 0}, /* 28 */ { 6, s_5_28, -1, 1, 0}, /* 29 */ { 5, s_5_29, -1, 1, 0}, /* 30 */ { 3, s_5_30, -1, 1, 0}, /* 31 */ { 5, s_5_31, -1, 1, 0}, /* 32 */ { 6, s_5_32, -1, 1, 0}, /* 33 */ { 4, s_5_33, -1, 1, 0}, /* 34 */ { 5, s_5_34, -1, 1, 0} }; static const symbol s_6_0[1] = { 'a' }; static const symbol s_6_1[3] = { 'e', 'r', 'a' }; static const symbol s_6_2[4] = { 'a', 's', 's', 'e' }; static const symbol s_6_3[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_4[3] = { 0xC3, 0xA9, 'e' }; static const symbol s_6_5[2] = { 'a', 'i' }; static const symbol s_6_6[4] = { 'e', 'r', 'a', 'i' }; static const symbol s_6_7[2] = { 'e', 'r' }; static const symbol s_6_8[2] = { 'a', 's' }; static const symbol s_6_9[4] = { 'e', 'r', 'a', 's' }; static const symbol s_6_10[5] = { 0xC3, 0xA2, 'm', 'e', 's' }; static const symbol s_6_11[5] = { 'a', 's', 's', 'e', 's' }; static const symbol s_6_12[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_6_13[5] = { 0xC3, 0xA2, 't', 'e', 's' }; static const symbol s_6_14[4] = { 0xC3, 0xA9, 'e', 's' }; static const symbol s_6_15[3] = { 'a', 'i', 's' }; static const symbol s_6_16[5] = { 'e', 'r', 'a', 'i', 's' }; static const symbol s_6_17[4] = { 'i', 'o', 'n', 's' }; static const symbol s_6_18[6] = { 'e', 'r', 'i', 'o', 'n', 's' }; static const symbol s_6_19[7] = { 'a', 's', 's', 'i', 'o', 'n', 's' }; static const symbol s_6_20[5] = { 'e', 'r', 'o', 'n', 's' }; static const symbol s_6_21[4] = { 'a', 'n', 't', 's' }; static const symbol s_6_22[3] = { 0xC3, 0xA9, 's' }; static const symbol s_6_23[3] = { 'a', 'i', 't' }; static const symbol s_6_24[5] = { 'e', 'r', 'a', 'i', 't' }; static const symbol s_6_25[3] = { 'a', 'n', 't' }; static const symbol s_6_26[5] = { 'a', 'I', 'e', 'n', 't' }; static const symbol s_6_27[7] = { 'e', 'r', 'a', 'I', 'e', 'n', 't' }; static const symbol s_6_28[6] = { 0xC3, 0xA8, 'r', 'e', 'n', 't' }; static const symbol s_6_29[6] = { 'a', 's', 's', 'e', 'n', 't' }; static const symbol s_6_30[5] = { 'e', 'r', 'o', 'n', 't' }; static const symbol s_6_31[3] = { 0xC3, 0xA2, 't' }; static const symbol s_6_32[2] = { 'e', 'z' }; static const symbol s_6_33[3] = { 'i', 'e', 'z' }; static const symbol s_6_34[5] = { 'e', 'r', 'i', 'e', 'z' }; static const symbol s_6_35[6] = { 'a', 's', 's', 'i', 'e', 'z' }; static const symbol s_6_36[4] = { 'e', 'r', 'e', 'z' }; static const symbol s_6_37[2] = { 0xC3, 0xA9 }; static const struct among a_6[38] = { /* 0 */ { 1, s_6_0, -1, 3, 0}, /* 1 */ { 3, s_6_1, 0, 2, 0}, /* 2 */ { 4, s_6_2, -1, 3, 0}, /* 3 */ { 4, s_6_3, -1, 3, 0}, /* 4 */ { 3, s_6_4, -1, 2, 0}, /* 5 */ { 2, s_6_5, -1, 3, 0}, /* 6 */ { 4, s_6_6, 5, 2, 0}, /* 7 */ { 2, s_6_7, -1, 2, 0}, /* 8 */ { 2, s_6_8, -1, 3, 0}, /* 9 */ { 4, s_6_9, 8, 2, 0}, /* 10 */ { 5, s_6_10, -1, 3, 0}, /* 11 */ { 5, s_6_11, -1, 3, 0}, /* 12 */ { 5, s_6_12, -1, 3, 0}, /* 13 */ { 5, s_6_13, -1, 3, 0}, /* 14 */ { 4, s_6_14, -1, 2, 0}, /* 15 */ { 3, s_6_15, -1, 3, 0}, /* 16 */ { 5, s_6_16, 15, 2, 0}, /* 17 */ { 4, s_6_17, -1, 1, 0}, /* 18 */ { 6, s_6_18, 17, 2, 0}, /* 19 */ { 7, s_6_19, 17, 3, 0}, /* 20 */ { 5, s_6_20, -1, 2, 0}, /* 21 */ { 4, s_6_21, -1, 3, 0}, /* 22 */ { 3, s_6_22, -1, 2, 0}, /* 23 */ { 3, s_6_23, -1, 3, 0}, /* 24 */ { 5, s_6_24, 23, 2, 0}, /* 25 */ { 3, s_6_25, -1, 3, 0}, /* 26 */ { 5, s_6_26, -1, 3, 0}, /* 27 */ { 7, s_6_27, 26, 2, 0}, /* 28 */ { 6, s_6_28, -1, 2, 0}, /* 29 */ { 6, s_6_29, -1, 3, 0}, /* 30 */ { 5, s_6_30, -1, 2, 0}, /* 31 */ { 3, s_6_31, -1, 3, 0}, /* 32 */ { 2, s_6_32, -1, 2, 0}, /* 33 */ { 3, s_6_33, 32, 2, 0}, /* 34 */ { 5, s_6_34, 33, 2, 0}, /* 35 */ { 6, s_6_35, 33, 3, 0}, /* 36 */ { 4, s_6_36, 32, 2, 0}, /* 37 */ { 2, s_6_37, -1, 2, 0} }; static const symbol s_7_0[1] = { 'e' }; static const symbol s_7_1[5] = { 'I', 0xC3, 0xA8, 'r', 'e' }; static const symbol s_7_2[5] = { 'i', 0xC3, 0xA8, 'r', 'e' }; static const symbol s_7_3[3] = { 'i', 'o', 'n' }; static const symbol s_7_4[3] = { 'I', 'e', 'r' }; static const symbol s_7_5[3] = { 'i', 'e', 'r' }; static const symbol s_7_6[2] = { 0xC3, 0xAB }; static const struct among a_7[7] = { /* 0 */ { 1, s_7_0, -1, 3, 0}, /* 1 */ { 5, s_7_1, 0, 2, 0}, /* 2 */ { 5, s_7_2, 0, 2, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 3, s_7_4, -1, 2, 0}, /* 5 */ { 3, s_7_5, -1, 2, 0}, /* 6 */ { 2, s_7_6, -1, 4, 0} }; static const symbol s_8_0[3] = { 'e', 'l', 'l' }; static const symbol s_8_1[4] = { 'e', 'i', 'l', 'l' }; static const symbol s_8_2[3] = { 'e', 'n', 'n' }; static const symbol s_8_3[3] = { 'o', 'n', 'n' }; static const symbol s_8_4[3] = { 'e', 't', 't' }; static const struct among a_8[5] = { /* 0 */ { 3, s_8_0, -1, -1, 0}, /* 1 */ { 4, s_8_1, -1, -1, 0}, /* 2 */ { 3, s_8_2, -1, -1, 0}, /* 3 */ { 3, s_8_3, -1, -1, 0}, /* 4 */ { 3, s_8_4, -1, -1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5 }; static const unsigned char g_keep_with_s[] = { 1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const symbol s_0[] = { 'u' }; static const symbol s_1[] = { 'U' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'I' }; static const symbol s_4[] = { 'y' }; static const symbol s_5[] = { 'Y' }; static const symbol s_6[] = { 'y' }; static const symbol s_7[] = { 'Y' }; static const symbol s_8[] = { 'q' }; static const symbol s_9[] = { 'u' }; static const symbol s_10[] = { 'U' }; static const symbol s_11[] = { 'i' }; static const symbol s_12[] = { 'u' }; static const symbol s_13[] = { 'y' }; static const symbol s_14[] = { 'i', 'c' }; static const symbol s_15[] = { 'i', 'q', 'U' }; static const symbol s_16[] = { 'l', 'o', 'g' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'e', 'n', 't' }; static const symbol s_19[] = { 'a', 't' }; static const symbol s_20[] = { 'e', 'u', 'x' }; static const symbol s_21[] = { 'i' }; static const symbol s_22[] = { 'a', 'b', 'l' }; static const symbol s_23[] = { 'i', 'q', 'U' }; static const symbol s_24[] = { 'a', 't' }; static const symbol s_25[] = { 'i', 'c' }; static const symbol s_26[] = { 'i', 'q', 'U' }; static const symbol s_27[] = { 'e', 'a', 'u' }; static const symbol s_28[] = { 'a', 'l' }; static const symbol s_29[] = { 'e', 'u', 'x' }; static const symbol s_30[] = { 'a', 'n', 't' }; static const symbol s_31[] = { 'e', 'n', 't' }; static const symbol s_32[] = { 'e' }; static const symbol s_33[] = { 's' }; static const symbol s_34[] = { 's' }; static const symbol s_35[] = { 't' }; static const symbol s_36[] = { 'i' }; static const symbol s_37[] = { 'g', 'u' }; static const symbol s_38[] = { 0xC3, 0xA9 }; static const symbol s_39[] = { 0xC3, 0xA8 }; static const symbol s_40[] = { 'e' }; static const symbol s_41[] = { 'Y' }; static const symbol s_42[] = { 'i' }; static const symbol s_43[] = { 0xC3, 0xA7 }; static const symbol s_44[] = { 'c' }; static int r_prelude(struct SN_env * z) { while(1) { /* repeat, line 38 */ int c1 = z->c; while(1) { /* goto, line 38 */ int c2 = z->c; { int c3 = z->c; /* or, line 44 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab3; z->bra = z->c; /* [, line 40 */ { int c4 = z->c; /* or, line 40 */ if (!(eq_s(z, 1, s_0))) goto lab5; z->ket = z->c; /* ], line 40 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab5; { int ret = slice_from_s(z, 1, s_1); /* <-, line 40 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = c4; if (!(eq_s(z, 1, s_2))) goto lab6; z->ket = z->c; /* ], line 41 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab6; { int ret = slice_from_s(z, 1, s_3); /* <-, line 41 */ if (ret < 0) return ret; } goto lab4; lab6: z->c = c4; if (!(eq_s(z, 1, s_4))) goto lab3; z->ket = z->c; /* ], line 42 */ { int ret = slice_from_s(z, 1, s_5); /* <-, line 42 */ if (ret < 0) return ret; } } lab4: goto lab2; lab3: z->c = c3; z->bra = z->c; /* [, line 45 */ if (!(eq_s(z, 1, s_6))) goto lab7; z->ket = z->c; /* ], line 45 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab7; { int ret = slice_from_s(z, 1, s_7); /* <-, line 45 */ if (ret < 0) return ret; } goto lab2; lab7: z->c = c3; if (!(eq_s(z, 1, s_8))) goto lab1; z->bra = z->c; /* [, line 47 */ if (!(eq_s(z, 1, s_9))) goto lab1; z->ket = z->c; /* ], line 47 */ { int ret = slice_from_s(z, 1, s_10); /* <-, line 47 */ if (ret < 0) return ret; } } lab2: z->c = c2; break; lab1: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* goto, line 38 */ } } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 56 */ { int c2 = z->c; /* or, line 58 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab2; if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab2; z->c = ret; /* next, line 57 */ } goto lab1; lab2: z->c = c2; if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((331776 >> (z->p[z->c + 2] & 0x1f)) & 1)) goto lab3; if (!(find_among(z, a_0, 3))) goto lab3; /* among, line 59 */ goto lab1; lab3: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 66 */ } { /* gopast */ /* grouping v, line 66 */ int ret = out_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab0; z->c += ret; } } lab1: z->I[0] = z->c; /* setmark pV, line 67 */ lab0: z->c = c1; } { int c3 = z->c; /* do, line 69 */ { /* gopast */ /* grouping v, line 70 */ int ret = out_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 70 */ int ret = in_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 70 */ { /* gopast */ /* grouping v, line 71 */ int ret = out_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 71 */ int ret = in_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 71 */ lab4: z->c = c3; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 75 */ int c1 = z->c; z->bra = z->c; /* [, line 77 */ if (z->c >= z->l || z->p[z->c + 0] >> 5 != 2 || !((35652096 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 4; else among_var = find_among(z, a_1, 4); /* substring, line 77 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 77 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_13); /* <-, line 80 */ if (ret < 0) return ret; } break; case 4: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 81 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 92 */ among_var = find_among_b(z, a_4, 43); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 96 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 99 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 100 */ z->ket = z->c; /* [, line 100 */ if (!(eq_s_b(z, 2, s_14))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 100 */ { int m1 = z->l - z->c; (void)m1; /* or, line 100 */ { int ret = r_R2(z); if (ret == 0) goto lab2; /* call R2, line 100 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 100 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m1; { int ret = slice_from_s(z, 3, s_15); /* <-, line 100 */ if (ret < 0) return ret; } } lab1: lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 104 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_16); /* <-, line 104 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 107 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_17); /* <-, line 107 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 110 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_18); /* <-, line 110 */ if (ret < 0) return ret; } break; case 6: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 114 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 114 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 115 */ z->ket = z->c; /* [, line 116 */ among_var = find_among_b(z, a_2, 6); /* substring, line 116 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 116 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab3; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 117 */ if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 117 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } break; case 2: { int m2 = z->l - z->c; (void)m2; /* or, line 118 */ { int ret = r_R2(z); if (ret == 0) goto lab5; /* call R2, line 118 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 118 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = z->l - m2; { int ret = r_R1(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R1, line 118 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_20); /* <-, line 118 */ if (ret < 0) return ret; } } lab4: break; case 3: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 120 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 120 */ if (ret < 0) return ret; } break; case 4: { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call RV, line 122 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_21); /* <-, line 122 */ if (ret < 0) return ret; } break; } lab3: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 129 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 129 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 130 */ z->ket = z->c; /* [, line 131 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab6; } among_var = find_among_b(z, a_3, 3); /* substring, line 131 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab6; } z->bra = z->c; /* ], line 131 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab6; } case 1: { int m3 = z->l - z->c; (void)m3; /* or, line 132 */ { int ret = r_R2(z); if (ret == 0) goto lab8; /* call R2, line 132 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 132 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m3; { int ret = slice_from_s(z, 3, s_22); /* <-, line 132 */ if (ret < 0) return ret; } } lab7: break; case 2: { int m4 = z->l - z->c; (void)m4; /* or, line 133 */ { int ret = r_R2(z); if (ret == 0) goto lab10; /* call R2, line 133 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 133 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m4; { int ret = slice_from_s(z, 3, s_23); /* <-, line 133 */ if (ret < 0) return ret; } } lab9: break; case 3: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; } lab6: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 141 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 141 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 142 */ z->ket = z->c; /* [, line 142 */ if (!(eq_s_b(z, 2, s_24))) { z->c = z->l - m_keep; goto lab11; } z->bra = z->c; /* ], line 142 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab11; } /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 142 */ if (!(eq_s_b(z, 2, s_25))) { z->c = z->l - m_keep; goto lab11; } z->bra = z->c; /* ], line 142 */ { int m5 = z->l - z->c; (void)m5; /* or, line 142 */ { int ret = r_R2(z); if (ret == 0) goto lab13; /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } goto lab12; lab13: z->c = z->l - m5; { int ret = slice_from_s(z, 3, s_26); /* <-, line 142 */ if (ret < 0) return ret; } } lab12: lab11: ; } break; case 9: { int ret = slice_from_s(z, 3, s_27); /* <-, line 144 */ if (ret < 0) return ret; } break; case 10: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 145 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_28); /* <-, line 145 */ if (ret < 0) return ret; } break; case 11: { int m6 = z->l - z->c; (void)m6; /* or, line 147 */ { int ret = r_R2(z); if (ret == 0) goto lab15; /* call R2, line 147 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } goto lab14; lab15: z->c = z->l - m6; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 147 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_29); /* <-, line 147 */ if (ret < 0) return ret; } } lab14: break; case 12: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 150 */ if (ret < 0) return ret; } if (out_grouping_b_U(z, g_v, 97, 251, 0)) return 0; { int ret = slice_del(z); /* delete, line 150 */ if (ret < 0) return ret; } break; case 13: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 155 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_30); /* <-, line 155 */ if (ret < 0) return ret; } return 0; /* fail, line 155 */ break; case 14: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 156 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_31); /* <-, line 156 */ if (ret < 0) return ret; } return 0; /* fail, line 156 */ break; case 15: { int m_test = z->l - z->c; /* test, line 158 */ if (in_grouping_b_U(z, g_v, 97, 251, 0)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 158 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } return 0; /* fail, line 158 */ break; } return 1; } static int r_i_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 163 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 163 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 164 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68944418 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_5, 35); /* substring, line 164 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 164 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: if (out_grouping_b_U(z, g_v, 97, 251, 0)) { z->lb = mlimit; return 0; } { int ret = slice_del(z); /* delete, line 170 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 174 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 174 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 175 */ among_var = find_among_b(z, a_6, 38); /* substring, line 175 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 175 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = r_R2(z); if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 177 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 185 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 190 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 191 */ z->ket = z->c; /* [, line 191 */ if (!(eq_s_b(z, 1, s_32))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 191 */ { int ret = slice_del(z); /* delete, line 191 */ if (ret < 0) return ret; } lab0: ; } break; } z->lb = mlimit; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 199 */ z->ket = z->c; /* [, line 199 */ if (!(eq_s_b(z, 1, s_33))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 199 */ { int m_test = z->l - z->c; /* test, line 199 */ if (out_grouping_b_U(z, g_keep_with_s, 97, 232, 0)) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 199 */ if (ret < 0) return ret; } lab0: ; } { int mlimit; /* setlimit, line 200 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 200 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 201 */ among_var = find_among_b(z, a_7, 7); /* substring, line 201 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 201 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = r_R2(z); if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 202 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* or, line 202 */ if (!(eq_s_b(z, 1, s_34))) goto lab2; goto lab1; lab2: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_35))) { z->lb = mlimit; return 0; } } lab1: { int ret = slice_del(z); /* delete, line 202 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_36); /* <-, line 204 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 205 */ if (ret < 0) return ret; } break; case 4: if (!(eq_s_b(z, 2, s_37))) { z->lb = mlimit; return 0; } { int ret = slice_del(z); /* delete, line 206 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_un_double(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 212 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1069056 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_8, 5))) return 0; /* among, line 212 */ z->c = z->l - m_test; } z->ket = z->c; /* [, line 212 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 212 */ } z->bra = z->c; /* ], line 212 */ { int ret = slice_del(z); /* delete, line 212 */ if (ret < 0) return ret; } return 1; } static int r_un_accent(struct SN_env * z) { { int i = 1; while(1) { /* atleast, line 216 */ if (out_grouping_b_U(z, g_v, 97, 251, 0)) goto lab0; i--; continue; lab0: break; } if (i > 0) return 0; } z->ket = z->c; /* [, line 217 */ { int m1 = z->l - z->c; (void)m1; /* or, line 217 */ if (!(eq_s_b(z, 2, s_38))) goto lab2; goto lab1; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_39))) return 0; } lab1: z->bra = z->c; /* ], line 217 */ { int ret = slice_from_s(z, 1, s_40); /* <-, line 217 */ if (ret < 0) return ret; } return 1; } extern int french_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 223 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 223 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 224 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 224 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 225 */ { int m3 = z->l - z->c; (void)m3; /* do, line 227 */ { int m4 = z->l - z->c; (void)m4; /* or, line 237 */ { int m5 = z->l - z->c; (void)m5; /* and, line 233 */ { int m6 = z->l - z->c; (void)m6; /* or, line 229 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab6; /* call standard_suffix, line 229 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_i_verb_suffix(z); if (ret == 0) goto lab7; /* call i_verb_suffix, line 230 */ if (ret < 0) return ret; } goto lab5; lab7: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ if (ret < 0) return ret; } } lab5: z->c = z->l - m5; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 234 */ z->ket = z->c; /* [, line 234 */ { int m7 = z->l - z->c; (void)m7; /* or, line 234 */ if (!(eq_s_b(z, 1, s_41))) goto lab10; z->bra = z->c; /* ], line 234 */ { int ret = slice_from_s(z, 1, s_42); /* <-, line 234 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m7; if (!(eq_s_b(z, 2, s_43))) { z->c = z->l - m_keep; goto lab8; } z->bra = z->c; /* ], line 235 */ { int ret = slice_from_s(z, 1, s_44); /* <-, line 235 */ if (ret < 0) return ret; } } lab9: lab8: ; } } goto lab3; lab4: z->c = z->l - m4; { int ret = r_residual_suffix(z); if (ret == 0) goto lab2; /* call residual_suffix, line 238 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m8 = z->l - z->c; (void)m8; /* do, line 243 */ { int ret = r_un_double(z); if (ret == 0) goto lab11; /* call un_double, line 243 */ if (ret < 0) return ret; } lab11: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 244 */ { int ret = r_un_accent(z); if (ret == 0) goto lab12; /* call un_accent, line 244 */ if (ret < 0) return ret; } lab12: z->c = z->l - m9; } z->c = z->lb; { int c10 = z->c; /* do, line 246 */ { int ret = r_postlude(z); if (ret == 0) goto lab13; /* call postlude, line 246 */ if (ret < 0) return ret; } lab13: z->c = c10; } return 1; } extern struct SN_env * french_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void french_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_french.h000066400000000000000000000004661217574114600305420ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * french_UTF_8_create_env(void); extern void french_UTF_8_close_env(struct SN_env * z); extern int french_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_german.c000066400000000000000000000431111217574114600305330ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int german_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * german_UTF_8_create_env(void); extern void german_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 'U' }; static const symbol s_0_2[1] = { 'Y' }; static const symbol s_0_3[2] = { 0xC3, 0xA4 }; static const symbol s_0_4[2] = { 0xC3, 0xB6 }; static const symbol s_0_5[2] = { 0xC3, 0xBC }; static const struct among a_0[6] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 1, s_0_1, 0, 2, 0}, /* 2 */ { 1, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, 3, 0}, /* 4 */ { 2, s_0_4, 0, 4, 0}, /* 5 */ { 2, s_0_5, 0, 5, 0} }; static const symbol s_1_0[1] = { 'e' }; static const symbol s_1_1[2] = { 'e', 'm' }; static const symbol s_1_2[2] = { 'e', 'n' }; static const symbol s_1_3[3] = { 'e', 'r', 'n' }; static const symbol s_1_4[2] = { 'e', 'r' }; static const symbol s_1_5[1] = { 's' }; static const symbol s_1_6[2] = { 'e', 's' }; static const struct among a_1[7] = { /* 0 */ { 1, s_1_0, -1, 2, 0}, /* 1 */ { 2, s_1_1, -1, 1, 0}, /* 2 */ { 2, s_1_2, -1, 2, 0}, /* 3 */ { 3, s_1_3, -1, 1, 0}, /* 4 */ { 2, s_1_4, -1, 1, 0}, /* 5 */ { 1, s_1_5, -1, 3, 0}, /* 6 */ { 2, s_1_6, 5, 2, 0} }; static const symbol s_2_0[2] = { 'e', 'n' }; static const symbol s_2_1[2] = { 'e', 'r' }; static const symbol s_2_2[2] = { 's', 't' }; static const symbol s_2_3[3] = { 'e', 's', 't' }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 2, s_2_1, -1, 1, 0}, /* 2 */ { 2, s_2_2, -1, 2, 0}, /* 3 */ { 3, s_2_3, 2, 1, 0} }; static const symbol s_3_0[2] = { 'i', 'g' }; static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0} }; static const symbol s_4_0[3] = { 'e', 'n', 'd' }; static const symbol s_4_1[2] = { 'i', 'g' }; static const symbol s_4_2[3] = { 'u', 'n', 'g' }; static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; static const symbol s_4_5[2] = { 'i', 'k' }; static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; static const struct among a_4[8] = { /* 0 */ { 3, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 2, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 3, 0}, /* 4 */ { 4, s_4_4, -1, 2, 0}, /* 5 */ { 2, s_4_5, -1, 2, 0}, /* 6 */ { 4, s_4_6, -1, 3, 0}, /* 7 */ { 4, s_4_7, -1, 4, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; static const unsigned char g_s_ending[] = { 117, 30, 5 }; static const unsigned char g_st_ending[] = { 117, 30, 4 }; static const symbol s_0[] = { 0xC3, 0x9F }; static const symbol s_1[] = { 's', 's' }; static const symbol s_2[] = { 'u' }; static const symbol s_3[] = { 'U' }; static const symbol s_4[] = { 'y' }; static const symbol s_5[] = { 'Y' }; static const symbol s_6[] = { 'y' }; static const symbol s_7[] = { 'u' }; static const symbol s_8[] = { 'a' }; static const symbol s_9[] = { 'o' }; static const symbol s_10[] = { 'u' }; static const symbol s_11[] = { 's' }; static const symbol s_12[] = { 'n', 'i', 's' }; static const symbol s_13[] = { 'i', 'g' }; static const symbol s_14[] = { 'e' }; static const symbol s_15[] = { 'e' }; static const symbol s_16[] = { 'e', 'r' }; static const symbol s_17[] = { 'e', 'n' }; static int r_prelude(struct SN_env * z) { { int c_test = z->c; /* test, line 35 */ while(1) { /* repeat, line 35 */ int c1 = z->c; { int c2 = z->c; /* or, line 38 */ z->bra = z->c; /* [, line 37 */ if (!(eq_s(z, 2, s_0))) goto lab2; z->ket = z->c; /* ], line 37 */ { int ret = slice_from_s(z, 2, s_1); /* <-, line 37 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 38 */ } } lab1: continue; lab0: z->c = c1; break; } z->c = c_test; } while(1) { /* repeat, line 41 */ int c3 = z->c; while(1) { /* goto, line 41 */ int c4 = z->c; if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; z->bra = z->c; /* [, line 42 */ { int c5 = z->c; /* or, line 42 */ if (!(eq_s(z, 1, s_2))) goto lab6; z->ket = z->c; /* ], line 42 */ if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab6; { int ret = slice_from_s(z, 1, s_3); /* <-, line 42 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = c5; if (!(eq_s(z, 1, s_4))) goto lab4; z->ket = z->c; /* ], line 43 */ if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; { int ret = slice_from_s(z, 1, s_5); /* <-, line 43 */ if (ret < 0) return ret; } } lab5: z->c = c4; break; lab4: z->c = c4; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab3; z->c = ret; /* goto, line 41 */ } } continue; lab3: z->c = c3; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c_test = z->c; /* test, line 52 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) return 0; z->c = ret; /* hop, line 52 */ } z->I[2] = z->c; /* setmark x, line 52 */ z->c = c_test; } { /* gopast */ /* grouping v, line 54 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 54 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 54 */ /* try, line 55 */ if (!(z->I[0] < z->I[2])) goto lab0; z->I[0] = z->I[2]; lab0: { /* gopast */ /* grouping v, line 56 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 56 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 56 */ return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 60 */ int c1 = z->c; z->bra = z->c; /* [, line 62 */ among_var = find_among(z, a_0, 6); /* substring, line 62 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 62 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_6); /* <-, line 63 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_7); /* <-, line 64 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_8); /* <-, line 65 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_9); /* <-, line 66 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_10); /* <-, line 67 */ if (ret < 0) return ret; } break; case 6: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 68 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 79 */ z->ket = z->c; /* [, line 80 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; among_var = find_among_b(z, a_1, 7); /* substring, line 80 */ if (!(among_var)) goto lab0; z->bra = z->c; /* ], line 80 */ { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 80 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_del(z); /* delete, line 82 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 85 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 86 */ z->ket = z->c; /* [, line 86 */ if (!(eq_s_b(z, 1, s_11))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 86 */ if (!(eq_s_b(z, 3, s_12))) { z->c = z->l - m_keep; goto lab1; } { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } lab1: ; } break; case 3: if (in_grouping_b_U(z, g_s_ending, 98, 116, 0)) goto lab0; { int ret = slice_del(z); /* delete, line 89 */ if (ret < 0) return ret; } break; } lab0: z->c = z->l - m1; } { int m2 = z->l - z->c; (void)m2; /* do, line 93 */ z->ket = z->c; /* [, line 94 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; among_var = find_among_b(z, a_2, 4); /* substring, line 94 */ if (!(among_var)) goto lab2; z->bra = z->c; /* ], line 94 */ { int ret = r_R1(z); if (ret == 0) goto lab2; /* call R1, line 94 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab2; case 1: { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b_U(z, g_st_ending, 98, 116, 0)) goto lab2; { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 3); if (ret < 0) goto lab2; z->c = ret; /* hop, line 99 */ } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; } lab2: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 103 */ z->ket = z->c; /* [, line 104 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3; among_var = find_among_b(z, a_4, 8); /* substring, line 104 */ if (!(among_var)) goto lab3; z->bra = z->c; /* ], line 104 */ { int ret = r_R2(z); if (ret == 0) goto lab3; /* call R2, line 104 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab3; case 1: { int ret = slice_del(z); /* delete, line 106 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 107 */ z->ket = z->c; /* [, line 107 */ if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab4; } z->bra = z->c; /* ], line 107 */ { int m4 = z->l - z->c; (void)m4; /* not, line 107 */ if (!(eq_s_b(z, 1, s_14))) goto lab5; { z->c = z->l - m_keep; goto lab4; } lab5: z->c = z->l - m4; } { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 107 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 107 */ if (ret < 0) return ret; } lab4: ; } break; case 2: { int m5 = z->l - z->c; (void)m5; /* not, line 110 */ if (!(eq_s_b(z, 1, s_15))) goto lab6; goto lab3; lab6: z->c = z->l - m5; } { int ret = slice_del(z); /* delete, line 110 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ z->ket = z->c; /* [, line 115 */ { int m6 = z->l - z->c; (void)m6; /* or, line 115 */ if (!(eq_s_b(z, 2, s_16))) goto lab9; goto lab8; lab9: z->c = z->l - m6; if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab7; } } lab8: z->bra = z->c; /* ], line 115 */ { int ret = r_R1(z); if (ret == 0) { z->c = z->l - m_keep; goto lab7; } /* call R1, line 115 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 115 */ if (ret < 0) return ret; } lab7: ; } break; case 4: { int ret = slice_del(z); /* delete, line 119 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 120 */ z->ket = z->c; /* [, line 121 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab10; } among_var = find_among_b(z, a_3, 2); /* substring, line 121 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab10; } z->bra = z->c; /* ], line 121 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call R2, line 121 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab10; } case 1: { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } break; } lab10: ; } break; } lab3: z->c = z->l - m3; } return 1; } extern int german_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 134 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 134 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 135 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 135 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 136 */ { int m3 = z->l - z->c; (void)m3; /* do, line 137 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab2; /* call standard_suffix, line 137 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; { int c4 = z->c; /* do, line 138 */ { int ret = r_postlude(z); if (ret == 0) goto lab3; /* call postlude, line 138 */ if (ret < 0) return ret; } lab3: z->c = c4; } return 1; } extern struct SN_env * german_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void german_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_german.h000066400000000000000000000004661217574114600305460ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * german_UTF_8_create_env(void); extern void german_UTF_8_close_env(struct SN_env * z); extern int german_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_hungarian.c000066400000000000000000001210131217574114600312340ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int hungarian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_double(struct SN_env * z); static int r_undouble(struct SN_env * z); static int r_factive(struct SN_env * z); static int r_instrum(struct SN_env * z); static int r_plur_owner(struct SN_env * z); static int r_sing_owner(struct SN_env * z); static int r_owned(struct SN_env * z); static int r_plural(struct SN_env * z); static int r_case_other(struct SN_env * z); static int r_case_special(struct SN_env * z); static int r_case(struct SN_env * z); static int r_v_ending(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * hungarian_UTF_8_create_env(void); extern void hungarian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[2] = { 'c', 's' }; static const symbol s_0_1[3] = { 'd', 'z', 's' }; static const symbol s_0_2[2] = { 'g', 'y' }; static const symbol s_0_3[2] = { 'l', 'y' }; static const symbol s_0_4[2] = { 'n', 'y' }; static const symbol s_0_5[2] = { 's', 'z' }; static const symbol s_0_6[2] = { 't', 'y' }; static const symbol s_0_7[2] = { 'z', 's' }; static const struct among a_0[8] = { /* 0 */ { 2, s_0_0, -1, -1, 0}, /* 1 */ { 3, s_0_1, -1, -1, 0}, /* 2 */ { 2, s_0_2, -1, -1, 0}, /* 3 */ { 2, s_0_3, -1, -1, 0}, /* 4 */ { 2, s_0_4, -1, -1, 0}, /* 5 */ { 2, s_0_5, -1, -1, 0}, /* 6 */ { 2, s_0_6, -1, -1, 0}, /* 7 */ { 2, s_0_7, -1, -1, 0} }; static const symbol s_1_0[2] = { 0xC3, 0xA1 }; static const symbol s_1_1[2] = { 0xC3, 0xA9 }; static const struct among a_1[2] = { /* 0 */ { 2, s_1_0, -1, 1, 0}, /* 1 */ { 2, s_1_1, -1, 2, 0} }; static const symbol s_2_0[2] = { 'b', 'b' }; static const symbol s_2_1[2] = { 'c', 'c' }; static const symbol s_2_2[2] = { 'd', 'd' }; static const symbol s_2_3[2] = { 'f', 'f' }; static const symbol s_2_4[2] = { 'g', 'g' }; static const symbol s_2_5[2] = { 'j', 'j' }; static const symbol s_2_6[2] = { 'k', 'k' }; static const symbol s_2_7[2] = { 'l', 'l' }; static const symbol s_2_8[2] = { 'm', 'm' }; static const symbol s_2_9[2] = { 'n', 'n' }; static const symbol s_2_10[2] = { 'p', 'p' }; static const symbol s_2_11[2] = { 'r', 'r' }; static const symbol s_2_12[3] = { 'c', 'c', 's' }; static const symbol s_2_13[2] = { 's', 's' }; static const symbol s_2_14[3] = { 'z', 'z', 's' }; static const symbol s_2_15[2] = { 't', 't' }; static const symbol s_2_16[2] = { 'v', 'v' }; static const symbol s_2_17[3] = { 'g', 'g', 'y' }; static const symbol s_2_18[3] = { 'l', 'l', 'y' }; static const symbol s_2_19[3] = { 'n', 'n', 'y' }; static const symbol s_2_20[3] = { 't', 't', 'y' }; static const symbol s_2_21[3] = { 's', 's', 'z' }; static const symbol s_2_22[2] = { 'z', 'z' }; static const struct among a_2[23] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, -1, 0}, /* 4 */ { 2, s_2_4, -1, -1, 0}, /* 5 */ { 2, s_2_5, -1, -1, 0}, /* 6 */ { 2, s_2_6, -1, -1, 0}, /* 7 */ { 2, s_2_7, -1, -1, 0}, /* 8 */ { 2, s_2_8, -1, -1, 0}, /* 9 */ { 2, s_2_9, -1, -1, 0}, /* 10 */ { 2, s_2_10, -1, -1, 0}, /* 11 */ { 2, s_2_11, -1, -1, 0}, /* 12 */ { 3, s_2_12, -1, -1, 0}, /* 13 */ { 2, s_2_13, -1, -1, 0}, /* 14 */ { 3, s_2_14, -1, -1, 0}, /* 15 */ { 2, s_2_15, -1, -1, 0}, /* 16 */ { 2, s_2_16, -1, -1, 0}, /* 17 */ { 3, s_2_17, -1, -1, 0}, /* 18 */ { 3, s_2_18, -1, -1, 0}, /* 19 */ { 3, s_2_19, -1, -1, 0}, /* 20 */ { 3, s_2_20, -1, -1, 0}, /* 21 */ { 3, s_2_21, -1, -1, 0}, /* 22 */ { 2, s_2_22, -1, -1, 0} }; static const symbol s_3_0[2] = { 'a', 'l' }; static const symbol s_3_1[2] = { 'e', 'l' }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 2, s_3_1, -1, 2, 0} }; static const symbol s_4_0[2] = { 'b', 'a' }; static const symbol s_4_1[2] = { 'r', 'a' }; static const symbol s_4_2[2] = { 'b', 'e' }; static const symbol s_4_3[2] = { 'r', 'e' }; static const symbol s_4_4[2] = { 'i', 'g' }; static const symbol s_4_5[3] = { 'n', 'a', 'k' }; static const symbol s_4_6[3] = { 'n', 'e', 'k' }; static const symbol s_4_7[3] = { 'v', 'a', 'l' }; static const symbol s_4_8[3] = { 'v', 'e', 'l' }; static const symbol s_4_9[2] = { 'u', 'l' }; static const symbol s_4_10[4] = { 'n', 0xC3, 0xA1, 'l' }; static const symbol s_4_11[4] = { 'n', 0xC3, 0xA9, 'l' }; static const symbol s_4_12[4] = { 'b', 0xC3, 0xB3, 'l' }; static const symbol s_4_13[4] = { 'r', 0xC3, 0xB3, 'l' }; static const symbol s_4_14[4] = { 't', 0xC3, 0xB3, 'l' }; static const symbol s_4_15[4] = { 'b', 0xC3, 0xB5, 'l' }; static const symbol s_4_16[4] = { 'r', 0xC3, 0xB5, 'l' }; static const symbol s_4_17[4] = { 't', 0xC3, 0xB5, 'l' }; static const symbol s_4_18[3] = { 0xC3, 0xBC, 'l' }; static const symbol s_4_19[1] = { 'n' }; static const symbol s_4_20[2] = { 'a', 'n' }; static const symbol s_4_21[3] = { 'b', 'a', 'n' }; static const symbol s_4_22[2] = { 'e', 'n' }; static const symbol s_4_23[3] = { 'b', 'e', 'n' }; static const symbol s_4_24[7] = { 'k', 0xC3, 0xA9, 'p', 'p', 'e', 'n' }; static const symbol s_4_25[2] = { 'o', 'n' }; static const symbol s_4_26[3] = { 0xC3, 0xB6, 'n' }; static const symbol s_4_27[5] = { 'k', 0xC3, 0xA9, 'p', 'p' }; static const symbol s_4_28[3] = { 'k', 'o', 'r' }; static const symbol s_4_29[1] = { 't' }; static const symbol s_4_30[2] = { 'a', 't' }; static const symbol s_4_31[2] = { 'e', 't' }; static const symbol s_4_32[5] = { 'k', 0xC3, 0xA9, 'n', 't' }; static const symbol s_4_33[7] = { 'a', 'n', 'k', 0xC3, 0xA9, 'n', 't' }; static const symbol s_4_34[7] = { 'e', 'n', 'k', 0xC3, 0xA9, 'n', 't' }; static const symbol s_4_35[7] = { 'o', 'n', 'k', 0xC3, 0xA9, 'n', 't' }; static const symbol s_4_36[2] = { 'o', 't' }; static const symbol s_4_37[4] = { 0xC3, 0xA9, 'r', 't' }; static const symbol s_4_38[3] = { 0xC3, 0xB6, 't' }; static const symbol s_4_39[3] = { 'h', 'e', 'z' }; static const symbol s_4_40[3] = { 'h', 'o', 'z' }; static const symbol s_4_41[4] = { 'h', 0xC3, 0xB6, 'z' }; static const symbol s_4_42[3] = { 'v', 0xC3, 0xA1 }; static const symbol s_4_43[3] = { 'v', 0xC3, 0xA9 }; static const struct among a_4[44] = { /* 0 */ { 2, s_4_0, -1, -1, 0}, /* 1 */ { 2, s_4_1, -1, -1, 0}, /* 2 */ { 2, s_4_2, -1, -1, 0}, /* 3 */ { 2, s_4_3, -1, -1, 0}, /* 4 */ { 2, s_4_4, -1, -1, 0}, /* 5 */ { 3, s_4_5, -1, -1, 0}, /* 6 */ { 3, s_4_6, -1, -1, 0}, /* 7 */ { 3, s_4_7, -1, -1, 0}, /* 8 */ { 3, s_4_8, -1, -1, 0}, /* 9 */ { 2, s_4_9, -1, -1, 0}, /* 10 */ { 4, s_4_10, -1, -1, 0}, /* 11 */ { 4, s_4_11, -1, -1, 0}, /* 12 */ { 4, s_4_12, -1, -1, 0}, /* 13 */ { 4, s_4_13, -1, -1, 0}, /* 14 */ { 4, s_4_14, -1, -1, 0}, /* 15 */ { 4, s_4_15, -1, -1, 0}, /* 16 */ { 4, s_4_16, -1, -1, 0}, /* 17 */ { 4, s_4_17, -1, -1, 0}, /* 18 */ { 3, s_4_18, -1, -1, 0}, /* 19 */ { 1, s_4_19, -1, -1, 0}, /* 20 */ { 2, s_4_20, 19, -1, 0}, /* 21 */ { 3, s_4_21, 20, -1, 0}, /* 22 */ { 2, s_4_22, 19, -1, 0}, /* 23 */ { 3, s_4_23, 22, -1, 0}, /* 24 */ { 7, s_4_24, 22, -1, 0}, /* 25 */ { 2, s_4_25, 19, -1, 0}, /* 26 */ { 3, s_4_26, 19, -1, 0}, /* 27 */ { 5, s_4_27, -1, -1, 0}, /* 28 */ { 3, s_4_28, -1, -1, 0}, /* 29 */ { 1, s_4_29, -1, -1, 0}, /* 30 */ { 2, s_4_30, 29, -1, 0}, /* 31 */ { 2, s_4_31, 29, -1, 0}, /* 32 */ { 5, s_4_32, 29, -1, 0}, /* 33 */ { 7, s_4_33, 32, -1, 0}, /* 34 */ { 7, s_4_34, 32, -1, 0}, /* 35 */ { 7, s_4_35, 32, -1, 0}, /* 36 */ { 2, s_4_36, 29, -1, 0}, /* 37 */ { 4, s_4_37, 29, -1, 0}, /* 38 */ { 3, s_4_38, 29, -1, 0}, /* 39 */ { 3, s_4_39, -1, -1, 0}, /* 40 */ { 3, s_4_40, -1, -1, 0}, /* 41 */ { 4, s_4_41, -1, -1, 0}, /* 42 */ { 3, s_4_42, -1, -1, 0}, /* 43 */ { 3, s_4_43, -1, -1, 0} }; static const symbol s_5_0[3] = { 0xC3, 0xA1, 'n' }; static const symbol s_5_1[3] = { 0xC3, 0xA9, 'n' }; static const symbol s_5_2[8] = { 0xC3, 0xA1, 'n', 'k', 0xC3, 0xA9, 'n', 't' }; static const struct among a_5[3] = { /* 0 */ { 3, s_5_0, -1, 2, 0}, /* 1 */ { 3, s_5_1, -1, 1, 0}, /* 2 */ { 8, s_5_2, -1, 3, 0} }; static const symbol s_6_0[4] = { 's', 't', 'u', 'l' }; static const symbol s_6_1[5] = { 'a', 's', 't', 'u', 'l' }; static const symbol s_6_2[6] = { 0xC3, 0xA1, 's', 't', 'u', 'l' }; static const symbol s_6_3[5] = { 's', 't', 0xC3, 0xBC, 'l' }; static const symbol s_6_4[6] = { 'e', 's', 't', 0xC3, 0xBC, 'l' }; static const symbol s_6_5[7] = { 0xC3, 0xA9, 's', 't', 0xC3, 0xBC, 'l' }; static const struct among a_6[6] = { /* 0 */ { 4, s_6_0, -1, 2, 0}, /* 1 */ { 5, s_6_1, 0, 1, 0}, /* 2 */ { 6, s_6_2, 0, 3, 0}, /* 3 */ { 5, s_6_3, -1, 2, 0}, /* 4 */ { 6, s_6_4, 3, 1, 0}, /* 5 */ { 7, s_6_5, 3, 4, 0} }; static const symbol s_7_0[2] = { 0xC3, 0xA1 }; static const symbol s_7_1[2] = { 0xC3, 0xA9 }; static const struct among a_7[2] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 2, s_7_1, -1, 2, 0} }; static const symbol s_8_0[1] = { 'k' }; static const symbol s_8_1[2] = { 'a', 'k' }; static const symbol s_8_2[2] = { 'e', 'k' }; static const symbol s_8_3[2] = { 'o', 'k' }; static const symbol s_8_4[3] = { 0xC3, 0xA1, 'k' }; static const symbol s_8_5[3] = { 0xC3, 0xA9, 'k' }; static const symbol s_8_6[3] = { 0xC3, 0xB6, 'k' }; static const struct among a_8[7] = { /* 0 */ { 1, s_8_0, -1, 7, 0}, /* 1 */ { 2, s_8_1, 0, 4, 0}, /* 2 */ { 2, s_8_2, 0, 6, 0}, /* 3 */ { 2, s_8_3, 0, 5, 0}, /* 4 */ { 3, s_8_4, 0, 1, 0}, /* 5 */ { 3, s_8_5, 0, 2, 0}, /* 6 */ { 3, s_8_6, 0, 3, 0} }; static const symbol s_9_0[3] = { 0xC3, 0xA9, 'i' }; static const symbol s_9_1[5] = { 0xC3, 0xA1, 0xC3, 0xA9, 'i' }; static const symbol s_9_2[5] = { 0xC3, 0xA9, 0xC3, 0xA9, 'i' }; static const symbol s_9_3[2] = { 0xC3, 0xA9 }; static const symbol s_9_4[3] = { 'k', 0xC3, 0xA9 }; static const symbol s_9_5[4] = { 'a', 'k', 0xC3, 0xA9 }; static const symbol s_9_6[4] = { 'e', 'k', 0xC3, 0xA9 }; static const symbol s_9_7[4] = { 'o', 'k', 0xC3, 0xA9 }; static const symbol s_9_8[5] = { 0xC3, 0xA1, 'k', 0xC3, 0xA9 }; static const symbol s_9_9[5] = { 0xC3, 0xA9, 'k', 0xC3, 0xA9 }; static const symbol s_9_10[5] = { 0xC3, 0xB6, 'k', 0xC3, 0xA9 }; static const symbol s_9_11[4] = { 0xC3, 0xA9, 0xC3, 0xA9 }; static const struct among a_9[12] = { /* 0 */ { 3, s_9_0, -1, 7, 0}, /* 1 */ { 5, s_9_1, 0, 6, 0}, /* 2 */ { 5, s_9_2, 0, 5, 0}, /* 3 */ { 2, s_9_3, -1, 9, 0}, /* 4 */ { 3, s_9_4, 3, 4, 0}, /* 5 */ { 4, s_9_5, 4, 1, 0}, /* 6 */ { 4, s_9_6, 4, 1, 0}, /* 7 */ { 4, s_9_7, 4, 1, 0}, /* 8 */ { 5, s_9_8, 4, 3, 0}, /* 9 */ { 5, s_9_9, 4, 2, 0}, /* 10 */ { 5, s_9_10, 4, 1, 0}, /* 11 */ { 4, s_9_11, 3, 8, 0} }; static const symbol s_10_0[1] = { 'a' }; static const symbol s_10_1[2] = { 'j', 'a' }; static const symbol s_10_2[1] = { 'd' }; static const symbol s_10_3[2] = { 'a', 'd' }; static const symbol s_10_4[2] = { 'e', 'd' }; static const symbol s_10_5[2] = { 'o', 'd' }; static const symbol s_10_6[3] = { 0xC3, 0xA1, 'd' }; static const symbol s_10_7[3] = { 0xC3, 0xA9, 'd' }; static const symbol s_10_8[3] = { 0xC3, 0xB6, 'd' }; static const symbol s_10_9[1] = { 'e' }; static const symbol s_10_10[2] = { 'j', 'e' }; static const symbol s_10_11[2] = { 'n', 'k' }; static const symbol s_10_12[3] = { 'u', 'n', 'k' }; static const symbol s_10_13[4] = { 0xC3, 0xA1, 'n', 'k' }; static const symbol s_10_14[4] = { 0xC3, 0xA9, 'n', 'k' }; static const symbol s_10_15[4] = { 0xC3, 0xBC, 'n', 'k' }; static const symbol s_10_16[2] = { 'u', 'k' }; static const symbol s_10_17[3] = { 'j', 'u', 'k' }; static const symbol s_10_18[5] = { 0xC3, 0xA1, 'j', 'u', 'k' }; static const symbol s_10_19[3] = { 0xC3, 0xBC, 'k' }; static const symbol s_10_20[4] = { 'j', 0xC3, 0xBC, 'k' }; static const symbol s_10_21[6] = { 0xC3, 0xA9, 'j', 0xC3, 0xBC, 'k' }; static const symbol s_10_22[1] = { 'm' }; static const symbol s_10_23[2] = { 'a', 'm' }; static const symbol s_10_24[2] = { 'e', 'm' }; static const symbol s_10_25[2] = { 'o', 'm' }; static const symbol s_10_26[3] = { 0xC3, 0xA1, 'm' }; static const symbol s_10_27[3] = { 0xC3, 0xA9, 'm' }; static const symbol s_10_28[1] = { 'o' }; static const symbol s_10_29[2] = { 0xC3, 0xA1 }; static const symbol s_10_30[2] = { 0xC3, 0xA9 }; static const struct among a_10[31] = { /* 0 */ { 1, s_10_0, -1, 18, 0}, /* 1 */ { 2, s_10_1, 0, 17, 0}, /* 2 */ { 1, s_10_2, -1, 16, 0}, /* 3 */ { 2, s_10_3, 2, 13, 0}, /* 4 */ { 2, s_10_4, 2, 13, 0}, /* 5 */ { 2, s_10_5, 2, 13, 0}, /* 6 */ { 3, s_10_6, 2, 14, 0}, /* 7 */ { 3, s_10_7, 2, 15, 0}, /* 8 */ { 3, s_10_8, 2, 13, 0}, /* 9 */ { 1, s_10_9, -1, 18, 0}, /* 10 */ { 2, s_10_10, 9, 17, 0}, /* 11 */ { 2, s_10_11, -1, 4, 0}, /* 12 */ { 3, s_10_12, 11, 1, 0}, /* 13 */ { 4, s_10_13, 11, 2, 0}, /* 14 */ { 4, s_10_14, 11, 3, 0}, /* 15 */ { 4, s_10_15, 11, 1, 0}, /* 16 */ { 2, s_10_16, -1, 8, 0}, /* 17 */ { 3, s_10_17, 16, 7, 0}, /* 18 */ { 5, s_10_18, 17, 5, 0}, /* 19 */ { 3, s_10_19, -1, 8, 0}, /* 20 */ { 4, s_10_20, 19, 7, 0}, /* 21 */ { 6, s_10_21, 20, 6, 0}, /* 22 */ { 1, s_10_22, -1, 12, 0}, /* 23 */ { 2, s_10_23, 22, 9, 0}, /* 24 */ { 2, s_10_24, 22, 9, 0}, /* 25 */ { 2, s_10_25, 22, 9, 0}, /* 26 */ { 3, s_10_26, 22, 10, 0}, /* 27 */ { 3, s_10_27, 22, 11, 0}, /* 28 */ { 1, s_10_28, -1, 18, 0}, /* 29 */ { 2, s_10_29, -1, 19, 0}, /* 30 */ { 2, s_10_30, -1, 20, 0} }; static const symbol s_11_0[2] = { 'i', 'd' }; static const symbol s_11_1[3] = { 'a', 'i', 'd' }; static const symbol s_11_2[4] = { 'j', 'a', 'i', 'd' }; static const symbol s_11_3[3] = { 'e', 'i', 'd' }; static const symbol s_11_4[4] = { 'j', 'e', 'i', 'd' }; static const symbol s_11_5[4] = { 0xC3, 0xA1, 'i', 'd' }; static const symbol s_11_6[4] = { 0xC3, 0xA9, 'i', 'd' }; static const symbol s_11_7[1] = { 'i' }; static const symbol s_11_8[2] = { 'a', 'i' }; static const symbol s_11_9[3] = { 'j', 'a', 'i' }; static const symbol s_11_10[2] = { 'e', 'i' }; static const symbol s_11_11[3] = { 'j', 'e', 'i' }; static const symbol s_11_12[3] = { 0xC3, 0xA1, 'i' }; static const symbol s_11_13[3] = { 0xC3, 0xA9, 'i' }; static const symbol s_11_14[4] = { 'i', 't', 'e', 'k' }; static const symbol s_11_15[5] = { 'e', 'i', 't', 'e', 'k' }; static const symbol s_11_16[6] = { 'j', 'e', 'i', 't', 'e', 'k' }; static const symbol s_11_17[6] = { 0xC3, 0xA9, 'i', 't', 'e', 'k' }; static const symbol s_11_18[2] = { 'i', 'k' }; static const symbol s_11_19[3] = { 'a', 'i', 'k' }; static const symbol s_11_20[4] = { 'j', 'a', 'i', 'k' }; static const symbol s_11_21[3] = { 'e', 'i', 'k' }; static const symbol s_11_22[4] = { 'j', 'e', 'i', 'k' }; static const symbol s_11_23[4] = { 0xC3, 0xA1, 'i', 'k' }; static const symbol s_11_24[4] = { 0xC3, 0xA9, 'i', 'k' }; static const symbol s_11_25[3] = { 'i', 'n', 'k' }; static const symbol s_11_26[4] = { 'a', 'i', 'n', 'k' }; static const symbol s_11_27[5] = { 'j', 'a', 'i', 'n', 'k' }; static const symbol s_11_28[4] = { 'e', 'i', 'n', 'k' }; static const symbol s_11_29[5] = { 'j', 'e', 'i', 'n', 'k' }; static const symbol s_11_30[5] = { 0xC3, 0xA1, 'i', 'n', 'k' }; static const symbol s_11_31[5] = { 0xC3, 0xA9, 'i', 'n', 'k' }; static const symbol s_11_32[5] = { 'a', 'i', 't', 'o', 'k' }; static const symbol s_11_33[6] = { 'j', 'a', 'i', 't', 'o', 'k' }; static const symbol s_11_34[6] = { 0xC3, 0xA1, 'i', 't', 'o', 'k' }; static const symbol s_11_35[2] = { 'i', 'm' }; static const symbol s_11_36[3] = { 'a', 'i', 'm' }; static const symbol s_11_37[4] = { 'j', 'a', 'i', 'm' }; static const symbol s_11_38[3] = { 'e', 'i', 'm' }; static const symbol s_11_39[4] = { 'j', 'e', 'i', 'm' }; static const symbol s_11_40[4] = { 0xC3, 0xA1, 'i', 'm' }; static const symbol s_11_41[4] = { 0xC3, 0xA9, 'i', 'm' }; static const struct among a_11[42] = { /* 0 */ { 2, s_11_0, -1, 10, 0}, /* 1 */ { 3, s_11_1, 0, 9, 0}, /* 2 */ { 4, s_11_2, 1, 6, 0}, /* 3 */ { 3, s_11_3, 0, 9, 0}, /* 4 */ { 4, s_11_4, 3, 6, 0}, /* 5 */ { 4, s_11_5, 0, 7, 0}, /* 6 */ { 4, s_11_6, 0, 8, 0}, /* 7 */ { 1, s_11_7, -1, 15, 0}, /* 8 */ { 2, s_11_8, 7, 14, 0}, /* 9 */ { 3, s_11_9, 8, 11, 0}, /* 10 */ { 2, s_11_10, 7, 14, 0}, /* 11 */ { 3, s_11_11, 10, 11, 0}, /* 12 */ { 3, s_11_12, 7, 12, 0}, /* 13 */ { 3, s_11_13, 7, 13, 0}, /* 14 */ { 4, s_11_14, -1, 24, 0}, /* 15 */ { 5, s_11_15, 14, 21, 0}, /* 16 */ { 6, s_11_16, 15, 20, 0}, /* 17 */ { 6, s_11_17, 14, 23, 0}, /* 18 */ { 2, s_11_18, -1, 29, 0}, /* 19 */ { 3, s_11_19, 18, 26, 0}, /* 20 */ { 4, s_11_20, 19, 25, 0}, /* 21 */ { 3, s_11_21, 18, 26, 0}, /* 22 */ { 4, s_11_22, 21, 25, 0}, /* 23 */ { 4, s_11_23, 18, 27, 0}, /* 24 */ { 4, s_11_24, 18, 28, 0}, /* 25 */ { 3, s_11_25, -1, 20, 0}, /* 26 */ { 4, s_11_26, 25, 17, 0}, /* 27 */ { 5, s_11_27, 26, 16, 0}, /* 28 */ { 4, s_11_28, 25, 17, 0}, /* 29 */ { 5, s_11_29, 28, 16, 0}, /* 30 */ { 5, s_11_30, 25, 18, 0}, /* 31 */ { 5, s_11_31, 25, 19, 0}, /* 32 */ { 5, s_11_32, -1, 21, 0}, /* 33 */ { 6, s_11_33, 32, 20, 0}, /* 34 */ { 6, s_11_34, -1, 22, 0}, /* 35 */ { 2, s_11_35, -1, 5, 0}, /* 36 */ { 3, s_11_36, 35, 4, 0}, /* 37 */ { 4, s_11_37, 36, 1, 0}, /* 38 */ { 3, s_11_38, 35, 4, 0}, /* 39 */ { 4, s_11_39, 38, 1, 0}, /* 40 */ { 4, s_11_40, 35, 2, 0}, /* 41 */ { 4, s_11_41, 35, 3, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'e' }; static const symbol s_3[] = { 'a' }; static const symbol s_4[] = { 'a' }; static const symbol s_5[] = { 'a' }; static const symbol s_6[] = { 'e' }; static const symbol s_7[] = { 'a' }; static const symbol s_8[] = { 'e' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'a' }; static const symbol s_11[] = { 'e' }; static const symbol s_12[] = { 'a' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'a' }; static const symbol s_15[] = { 'e' }; static const symbol s_16[] = { 'a' }; static const symbol s_17[] = { 'e' }; static const symbol s_18[] = { 'a' }; static const symbol s_19[] = { 'e' }; static const symbol s_20[] = { 'a' }; static const symbol s_21[] = { 'e' }; static const symbol s_22[] = { 'a' }; static const symbol s_23[] = { 'e' }; static const symbol s_24[] = { 'a' }; static const symbol s_25[] = { 'e' }; static const symbol s_26[] = { 'a' }; static const symbol s_27[] = { 'e' }; static const symbol s_28[] = { 'a' }; static const symbol s_29[] = { 'e' }; static const symbol s_30[] = { 'a' }; static const symbol s_31[] = { 'e' }; static const symbol s_32[] = { 'a' }; static const symbol s_33[] = { 'e' }; static const symbol s_34[] = { 'a' }; static const symbol s_35[] = { 'e' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c1 = z->c; /* or, line 51 */ if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab1; if (in_grouping_U(z, g_v, 97, 252, 1) < 0) goto lab1; /* goto */ /* non v, line 48 */ { int c2 = z->c; /* or, line 49 */ if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 3 || !((101187584 >> (z->p[z->c + 1] & 0x1f)) & 1)) goto lab3; if (!(find_among(z, a_0, 8))) goto lab3; /* among, line 49 */ goto lab2; lab3: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab1; z->c = ret; /* next, line 49 */ } } lab2: z->I[0] = z->c; /* setmark p1, line 50 */ goto lab0; lab1: z->c = c1; if (out_grouping_U(z, g_v, 97, 252, 0)) return 0; { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 53 */ } lab0: return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_v_ending(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 61 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 161 && z->p[z->c - 1] != 169)) return 0; among_var = find_among_b(z, a_1, 2); /* substring, line 61 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 61 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 61 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 62 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 63 */ if (ret < 0) return ret; } break; } return 1; } static int r_double(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 68 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((106790108 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 23))) return 0; /* among, line 68 */ z->c = z->l - m_test; } return 1; } static int r_undouble(struct SN_env * z) { { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 73 */ } z->ket = z->c; /* [, line 73 */ { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 1); if (ret < 0) return 0; z->c = ret; /* hop, line 73 */ } z->bra = z->c; /* ], line 73 */ { int ret = slice_del(z); /* delete, line 73 */ if (ret < 0) return ret; } return 1; } static int r_instrum(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 77 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] != 108) return 0; among_var = find_among_b(z, a_3, 2); /* substring, line 77 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 77 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 77 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 79 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 82 */ if (ret < 0) return ret; } return 1; } static int r_case(struct SN_env * z) { z->ket = z->c; /* [, line 87 */ if (!(find_among_b(z, a_4, 44))) return 0; /* substring, line 87 */ z->bra = z->c; /* ], line 87 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 87 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 111 */ if (ret < 0) return ret; } { int ret = r_v_ending(z); if (ret == 0) return 0; /* call v_ending, line 112 */ if (ret < 0) return ret; } return 1; } static int r_case_special(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 116 */ if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 110 && z->p[z->c - 1] != 116)) return 0; among_var = find_among_b(z, a_5, 3); /* substring, line 116 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 116 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 116 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_2); /* <-, line 117 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_3); /* <-, line 118 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_4); /* <-, line 119 */ if (ret < 0) return ret; } break; } return 1; } static int r_case_other(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 124 */ if (z->c - 3 <= z->lb || z->p[z->c - 1] != 108) return 0; among_var = find_among_b(z, a_6, 6); /* substring, line 124 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 124 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 124 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 126 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_5); /* <-, line 127 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_6); /* <-, line 128 */ if (ret < 0) return ret; } break; } return 1; } static int r_factive(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 133 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 161 && z->p[z->c - 1] != 169)) return 0; among_var = find_among_b(z, a_7, 2); /* substring, line 133 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 133 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 133 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 134 */ if (ret < 0) return ret; } break; case 2: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 135 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 137 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 138 */ if (ret < 0) return ret; } return 1; } static int r_plural(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 142 */ if (z->c <= z->lb || z->p[z->c - 1] != 107) return 0; among_var = find_among_b(z, a_8, 7); /* substring, line 142 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 142 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 142 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_7); /* <-, line 143 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_8); /* <-, line 144 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 146 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 148 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 149 */ if (ret < 0) return ret; } break; } return 1; } static int r_owned(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 154 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 169)) return 0; among_var = find_among_b(z, a_9, 12); /* substring, line 154 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 154 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 154 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 155 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_9); /* <-, line 156 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_10); /* <-, line 157 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_11); /* <-, line 159 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_12); /* <-, line 160 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 161 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 1, s_13); /* <-, line 162 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } break; } return 1; } static int r_sing_owner(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 168 */ among_var = find_among_b(z, a_10, 31); /* substring, line 168 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 168 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 168 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 169 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_14); /* <-, line 170 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_15); /* <-, line 171 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 172 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_16); /* <-, line 173 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_17); /* <-, line 174 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 175 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 1, s_18); /* <-, line 178 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 1, s_19); /* <-, line 179 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_del(z); /* delete, line 180 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_del(z); /* delete, line 181 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 1, s_20); /* <-, line 182 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_from_s(z, 1, s_21); /* <-, line 183 */ if (ret < 0) return ret; } break; case 16: { int ret = slice_del(z); /* delete, line 184 */ if (ret < 0) return ret; } break; case 17: { int ret = slice_del(z); /* delete, line 185 */ if (ret < 0) return ret; } break; case 18: { int ret = slice_del(z); /* delete, line 186 */ if (ret < 0) return ret; } break; case 19: { int ret = slice_from_s(z, 1, s_22); /* <-, line 187 */ if (ret < 0) return ret; } break; case 20: { int ret = slice_from_s(z, 1, s_23); /* <-, line 188 */ if (ret < 0) return ret; } break; } return 1; } static int r_plur_owner(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 193 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((10768 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_11, 42); /* substring, line 193 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 193 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 193 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_24); /* <-, line 195 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_25); /* <-, line 196 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 197 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 198 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 199 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 1, s_26); /* <-, line 200 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 1, s_27); /* <-, line 201 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 202 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_del(z); /* delete, line 203 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_del(z); /* delete, line 204 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 1, s_28); /* <-, line 205 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_from_s(z, 1, s_29); /* <-, line 206 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_del(z); /* delete, line 207 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_del(z); /* delete, line 208 */ if (ret < 0) return ret; } break; case 16: { int ret = slice_del(z); /* delete, line 209 */ if (ret < 0) return ret; } break; case 17: { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } break; case 18: { int ret = slice_from_s(z, 1, s_30); /* <-, line 211 */ if (ret < 0) return ret; } break; case 19: { int ret = slice_from_s(z, 1, s_31); /* <-, line 212 */ if (ret < 0) return ret; } break; case 20: { int ret = slice_del(z); /* delete, line 214 */ if (ret < 0) return ret; } break; case 21: { int ret = slice_del(z); /* delete, line 215 */ if (ret < 0) return ret; } break; case 22: { int ret = slice_from_s(z, 1, s_32); /* <-, line 216 */ if (ret < 0) return ret; } break; case 23: { int ret = slice_from_s(z, 1, s_33); /* <-, line 217 */ if (ret < 0) return ret; } break; case 24: { int ret = slice_del(z); /* delete, line 218 */ if (ret < 0) return ret; } break; case 25: { int ret = slice_del(z); /* delete, line 219 */ if (ret < 0) return ret; } break; case 26: { int ret = slice_del(z); /* delete, line 220 */ if (ret < 0) return ret; } break; case 27: { int ret = slice_from_s(z, 1, s_34); /* <-, line 221 */ if (ret < 0) return ret; } break; case 28: { int ret = slice_from_s(z, 1, s_35); /* <-, line 222 */ if (ret < 0) return ret; } break; case 29: { int ret = slice_del(z); /* delete, line 223 */ if (ret < 0) return ret; } break; } return 1; } extern int hungarian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 229 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 229 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 230 */ { int m2 = z->l - z->c; (void)m2; /* do, line 231 */ { int ret = r_instrum(z); if (ret == 0) goto lab1; /* call instrum, line 231 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 232 */ { int ret = r_case(z); if (ret == 0) goto lab2; /* call case, line 232 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 233 */ { int ret = r_case_special(z); if (ret == 0) goto lab3; /* call case_special, line 233 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 234 */ { int ret = r_case_other(z); if (ret == 0) goto lab4; /* call case_other, line 234 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* do, line 235 */ { int ret = r_factive(z); if (ret == 0) goto lab5; /* call factive, line 235 */ if (ret < 0) return ret; } lab5: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 236 */ { int ret = r_owned(z); if (ret == 0) goto lab6; /* call owned, line 236 */ if (ret < 0) return ret; } lab6: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 237 */ { int ret = r_sing_owner(z); if (ret == 0) goto lab7; /* call sing_owner, line 237 */ if (ret < 0) return ret; } lab7: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 238 */ { int ret = r_plur_owner(z); if (ret == 0) goto lab8; /* call plur_owner, line 238 */ if (ret < 0) return ret; } lab8: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 239 */ { int ret = r_plural(z); if (ret == 0) goto lab9; /* call plural, line 239 */ if (ret < 0) return ret; } lab9: z->c = z->l - m10; } z->c = z->lb; return 1; } extern struct SN_env * hungarian_UTF_8_create_env(void) { return SN_create_env(0, 1, 0); } extern void hungarian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_hungarian.h000066400000000000000000000004771217574114600312530ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * hungarian_UTF_8_create_env(void); extern void hungarian_UTF_8_close_env(struct SN_env * z); extern int hungarian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_italian.c000066400000000000000000001170371217574114600307140ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int italian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_vowel_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_attached_pronoun(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * italian_UTF_8_create_env(void); extern void italian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 'q', 'u' }; static const symbol s_0_2[2] = { 0xC3, 0xA1 }; static const symbol s_0_3[2] = { 0xC3, 0xA9 }; static const symbol s_0_4[2] = { 0xC3, 0xAD }; static const symbol s_0_5[2] = { 0xC3, 0xB3 }; static const symbol s_0_6[2] = { 0xC3, 0xBA }; static const struct among a_0[7] = { /* 0 */ { 0, 0, -1, 7, 0}, /* 1 */ { 2, s_0_1, 0, 6, 0}, /* 2 */ { 2, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, 2, 0}, /* 4 */ { 2, s_0_4, 0, 3, 0}, /* 5 */ { 2, s_0_5, 0, 4, 0}, /* 6 */ { 2, s_0_6, 0, 5, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'U' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_1_1, 0, 1, 0}, /* 2 */ { 1, s_1_2, 0, 2, 0} }; static const symbol s_2_0[2] = { 'l', 'a' }; static const symbol s_2_1[4] = { 'c', 'e', 'l', 'a' }; static const symbol s_2_2[6] = { 'g', 'l', 'i', 'e', 'l', 'a' }; static const symbol s_2_3[4] = { 'm', 'e', 'l', 'a' }; static const symbol s_2_4[4] = { 't', 'e', 'l', 'a' }; static const symbol s_2_5[4] = { 'v', 'e', 'l', 'a' }; static const symbol s_2_6[2] = { 'l', 'e' }; static const symbol s_2_7[4] = { 'c', 'e', 'l', 'e' }; static const symbol s_2_8[6] = { 'g', 'l', 'i', 'e', 'l', 'e' }; static const symbol s_2_9[4] = { 'm', 'e', 'l', 'e' }; static const symbol s_2_10[4] = { 't', 'e', 'l', 'e' }; static const symbol s_2_11[4] = { 'v', 'e', 'l', 'e' }; static const symbol s_2_12[2] = { 'n', 'e' }; static const symbol s_2_13[4] = { 'c', 'e', 'n', 'e' }; static const symbol s_2_14[6] = { 'g', 'l', 'i', 'e', 'n', 'e' }; static const symbol s_2_15[4] = { 'm', 'e', 'n', 'e' }; static const symbol s_2_16[4] = { 's', 'e', 'n', 'e' }; static const symbol s_2_17[4] = { 't', 'e', 'n', 'e' }; static const symbol s_2_18[4] = { 'v', 'e', 'n', 'e' }; static const symbol s_2_19[2] = { 'c', 'i' }; static const symbol s_2_20[2] = { 'l', 'i' }; static const symbol s_2_21[4] = { 'c', 'e', 'l', 'i' }; static const symbol s_2_22[6] = { 'g', 'l', 'i', 'e', 'l', 'i' }; static const symbol s_2_23[4] = { 'm', 'e', 'l', 'i' }; static const symbol s_2_24[4] = { 't', 'e', 'l', 'i' }; static const symbol s_2_25[4] = { 'v', 'e', 'l', 'i' }; static const symbol s_2_26[3] = { 'g', 'l', 'i' }; static const symbol s_2_27[2] = { 'm', 'i' }; static const symbol s_2_28[2] = { 's', 'i' }; static const symbol s_2_29[2] = { 't', 'i' }; static const symbol s_2_30[2] = { 'v', 'i' }; static const symbol s_2_31[2] = { 'l', 'o' }; static const symbol s_2_32[4] = { 'c', 'e', 'l', 'o' }; static const symbol s_2_33[6] = { 'g', 'l', 'i', 'e', 'l', 'o' }; static const symbol s_2_34[4] = { 'm', 'e', 'l', 'o' }; static const symbol s_2_35[4] = { 't', 'e', 'l', 'o' }; static const symbol s_2_36[4] = { 'v', 'e', 'l', 'o' }; static const struct among a_2[37] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 4, s_2_1, 0, -1, 0}, /* 2 */ { 6, s_2_2, 0, -1, 0}, /* 3 */ { 4, s_2_3, 0, -1, 0}, /* 4 */ { 4, s_2_4, 0, -1, 0}, /* 5 */ { 4, s_2_5, 0, -1, 0}, /* 6 */ { 2, s_2_6, -1, -1, 0}, /* 7 */ { 4, s_2_7, 6, -1, 0}, /* 8 */ { 6, s_2_8, 6, -1, 0}, /* 9 */ { 4, s_2_9, 6, -1, 0}, /* 10 */ { 4, s_2_10, 6, -1, 0}, /* 11 */ { 4, s_2_11, 6, -1, 0}, /* 12 */ { 2, s_2_12, -1, -1, 0}, /* 13 */ { 4, s_2_13, 12, -1, 0}, /* 14 */ { 6, s_2_14, 12, -1, 0}, /* 15 */ { 4, s_2_15, 12, -1, 0}, /* 16 */ { 4, s_2_16, 12, -1, 0}, /* 17 */ { 4, s_2_17, 12, -1, 0}, /* 18 */ { 4, s_2_18, 12, -1, 0}, /* 19 */ { 2, s_2_19, -1, -1, 0}, /* 20 */ { 2, s_2_20, -1, -1, 0}, /* 21 */ { 4, s_2_21, 20, -1, 0}, /* 22 */ { 6, s_2_22, 20, -1, 0}, /* 23 */ { 4, s_2_23, 20, -1, 0}, /* 24 */ { 4, s_2_24, 20, -1, 0}, /* 25 */ { 4, s_2_25, 20, -1, 0}, /* 26 */ { 3, s_2_26, 20, -1, 0}, /* 27 */ { 2, s_2_27, -1, -1, 0}, /* 28 */ { 2, s_2_28, -1, -1, 0}, /* 29 */ { 2, s_2_29, -1, -1, 0}, /* 30 */ { 2, s_2_30, -1, -1, 0}, /* 31 */ { 2, s_2_31, -1, -1, 0}, /* 32 */ { 4, s_2_32, 31, -1, 0}, /* 33 */ { 6, s_2_33, 31, -1, 0}, /* 34 */ { 4, s_2_34, 31, -1, 0}, /* 35 */ { 4, s_2_35, 31, -1, 0}, /* 36 */ { 4, s_2_36, 31, -1, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_3_1[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_3_2[2] = { 'a', 'r' }; static const symbol s_3_3[2] = { 'e', 'r' }; static const symbol s_3_4[2] = { 'i', 'r' }; static const struct among a_3[5] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 2, s_3_2, -1, 2, 0}, /* 3 */ { 2, s_3_3, -1, 2, 0}, /* 4 */ { 2, s_3_4, -1, 2, 0} }; static const symbol s_4_0[2] = { 'i', 'c' }; static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_4_2[2] = { 'o', 's' }; static const symbol s_4_3[2] = { 'i', 'v' }; static const struct among a_4[4] = { /* 0 */ { 2, s_4_0, -1, -1, 0}, /* 1 */ { 4, s_4_1, -1, -1, 0}, /* 2 */ { 2, s_4_2, -1, -1, 0}, /* 3 */ { 2, s_4_3, -1, 1, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_5_2[2] = { 'i', 'v' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, -1, 1, 0} }; static const symbol s_6_0[3] = { 'i', 'c', 'a' }; static const symbol s_6_1[5] = { 'l', 'o', 'g', 'i', 'a' }; static const symbol s_6_2[3] = { 'o', 's', 'a' }; static const symbol s_6_3[4] = { 'i', 's', 't', 'a' }; static const symbol s_6_4[3] = { 'i', 'v', 'a' }; static const symbol s_6_5[4] = { 'a', 'n', 'z', 'a' }; static const symbol s_6_6[4] = { 'e', 'n', 'z', 'a' }; static const symbol s_6_7[3] = { 'i', 'c', 'e' }; static const symbol s_6_8[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; static const symbol s_6_9[4] = { 'i', 'c', 'h', 'e' }; static const symbol s_6_10[5] = { 'l', 'o', 'g', 'i', 'e' }; static const symbol s_6_11[5] = { 'a', 'b', 'i', 'l', 'e' }; static const symbol s_6_12[5] = { 'i', 'b', 'i', 'l', 'e' }; static const symbol s_6_13[6] = { 'u', 's', 'i', 'o', 'n', 'e' }; static const symbol s_6_14[6] = { 'a', 'z', 'i', 'o', 'n', 'e' }; static const symbol s_6_15[6] = { 'u', 'z', 'i', 'o', 'n', 'e' }; static const symbol s_6_16[5] = { 'a', 't', 'o', 'r', 'e' }; static const symbol s_6_17[3] = { 'o', 's', 'e' }; static const symbol s_6_18[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_19[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_20[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_21[4] = { 'i', 's', 't', 'e' }; static const symbol s_6_22[3] = { 'i', 'v', 'e' }; static const symbol s_6_23[4] = { 'a', 'n', 'z', 'e' }; static const symbol s_6_24[4] = { 'e', 'n', 'z', 'e' }; static const symbol s_6_25[3] = { 'i', 'c', 'i' }; static const symbol s_6_26[6] = { 'a', 't', 'r', 'i', 'c', 'i' }; static const symbol s_6_27[4] = { 'i', 'c', 'h', 'i' }; static const symbol s_6_28[5] = { 'a', 'b', 'i', 'l', 'i' }; static const symbol s_6_29[5] = { 'i', 'b', 'i', 'l', 'i' }; static const symbol s_6_30[4] = { 'i', 's', 'm', 'i' }; static const symbol s_6_31[6] = { 'u', 's', 'i', 'o', 'n', 'i' }; static const symbol s_6_32[6] = { 'a', 'z', 'i', 'o', 'n', 'i' }; static const symbol s_6_33[6] = { 'u', 'z', 'i', 'o', 'n', 'i' }; static const symbol s_6_34[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_6_35[3] = { 'o', 's', 'i' }; static const symbol s_6_36[4] = { 'a', 'n', 't', 'i' }; static const symbol s_6_37[6] = { 'a', 'm', 'e', 'n', 't', 'i' }; static const symbol s_6_38[6] = { 'i', 'm', 'e', 'n', 't', 'i' }; static const symbol s_6_39[4] = { 'i', 's', 't', 'i' }; static const symbol s_6_40[3] = { 'i', 'v', 'i' }; static const symbol s_6_41[3] = { 'i', 'c', 'o' }; static const symbol s_6_42[4] = { 'i', 's', 'm', 'o' }; static const symbol s_6_43[3] = { 'o', 's', 'o' }; static const symbol s_6_44[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; static const symbol s_6_45[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; static const symbol s_6_46[3] = { 'i', 'v', 'o' }; static const symbol s_6_47[4] = { 'i', 't', 0xC3, 0xA0 }; static const symbol s_6_48[5] = { 'i', 's', 't', 0xC3, 0xA0 }; static const symbol s_6_49[5] = { 'i', 's', 't', 0xC3, 0xA8 }; static const symbol s_6_50[5] = { 'i', 's', 't', 0xC3, 0xAC }; static const struct among a_6[51] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 5, s_6_1, -1, 3, 0}, /* 2 */ { 3, s_6_2, -1, 1, 0}, /* 3 */ { 4, s_6_3, -1, 1, 0}, /* 4 */ { 3, s_6_4, -1, 9, 0}, /* 5 */ { 4, s_6_5, -1, 1, 0}, /* 6 */ { 4, s_6_6, -1, 5, 0}, /* 7 */ { 3, s_6_7, -1, 1, 0}, /* 8 */ { 6, s_6_8, 7, 1, 0}, /* 9 */ { 4, s_6_9, -1, 1, 0}, /* 10 */ { 5, s_6_10, -1, 3, 0}, /* 11 */ { 5, s_6_11, -1, 1, 0}, /* 12 */ { 5, s_6_12, -1, 1, 0}, /* 13 */ { 6, s_6_13, -1, 4, 0}, /* 14 */ { 6, s_6_14, -1, 2, 0}, /* 15 */ { 6, s_6_15, -1, 4, 0}, /* 16 */ { 5, s_6_16, -1, 2, 0}, /* 17 */ { 3, s_6_17, -1, 1, 0}, /* 18 */ { 4, s_6_18, -1, 1, 0}, /* 19 */ { 5, s_6_19, -1, 1, 0}, /* 20 */ { 6, s_6_20, 19, 7, 0}, /* 21 */ { 4, s_6_21, -1, 1, 0}, /* 22 */ { 3, s_6_22, -1, 9, 0}, /* 23 */ { 4, s_6_23, -1, 1, 0}, /* 24 */ { 4, s_6_24, -1, 5, 0}, /* 25 */ { 3, s_6_25, -1, 1, 0}, /* 26 */ { 6, s_6_26, 25, 1, 0}, /* 27 */ { 4, s_6_27, -1, 1, 0}, /* 28 */ { 5, s_6_28, -1, 1, 0}, /* 29 */ { 5, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, -1, 1, 0}, /* 31 */ { 6, s_6_31, -1, 4, 0}, /* 32 */ { 6, s_6_32, -1, 2, 0}, /* 33 */ { 6, s_6_33, -1, 4, 0}, /* 34 */ { 5, s_6_34, -1, 2, 0}, /* 35 */ { 3, s_6_35, -1, 1, 0}, /* 36 */ { 4, s_6_36, -1, 1, 0}, /* 37 */ { 6, s_6_37, -1, 6, 0}, /* 38 */ { 6, s_6_38, -1, 6, 0}, /* 39 */ { 4, s_6_39, -1, 1, 0}, /* 40 */ { 3, s_6_40, -1, 9, 0}, /* 41 */ { 3, s_6_41, -1, 1, 0}, /* 42 */ { 4, s_6_42, -1, 1, 0}, /* 43 */ { 3, s_6_43, -1, 1, 0}, /* 44 */ { 6, s_6_44, -1, 6, 0}, /* 45 */ { 6, s_6_45, -1, 6, 0}, /* 46 */ { 3, s_6_46, -1, 9, 0}, /* 47 */ { 4, s_6_47, -1, 8, 0}, /* 48 */ { 5, s_6_48, -1, 1, 0}, /* 49 */ { 5, s_6_49, -1, 1, 0}, /* 50 */ { 5, s_6_50, -1, 1, 0} }; static const symbol s_7_0[4] = { 'i', 's', 'c', 'a' }; static const symbol s_7_1[4] = { 'e', 'n', 'd', 'a' }; static const symbol s_7_2[3] = { 'a', 't', 'a' }; static const symbol s_7_3[3] = { 'i', 't', 'a' }; static const symbol s_7_4[3] = { 'u', 't', 'a' }; static const symbol s_7_5[3] = { 'a', 'v', 'a' }; static const symbol s_7_6[3] = { 'e', 'v', 'a' }; static const symbol s_7_7[3] = { 'i', 'v', 'a' }; static const symbol s_7_8[6] = { 'e', 'r', 'e', 'b', 'b', 'e' }; static const symbol s_7_9[6] = { 'i', 'r', 'e', 'b', 'b', 'e' }; static const symbol s_7_10[4] = { 'i', 's', 'c', 'e' }; static const symbol s_7_11[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_7_12[3] = { 'a', 'r', 'e' }; static const symbol s_7_13[3] = { 'e', 'r', 'e' }; static const symbol s_7_14[3] = { 'i', 'r', 'e' }; static const symbol s_7_15[4] = { 'a', 's', 's', 'e' }; static const symbol s_7_16[3] = { 'a', 't', 'e' }; static const symbol s_7_17[5] = { 'a', 'v', 'a', 't', 'e' }; static const symbol s_7_18[5] = { 'e', 'v', 'a', 't', 'e' }; static const symbol s_7_19[5] = { 'i', 'v', 'a', 't', 'e' }; static const symbol s_7_20[3] = { 'e', 't', 'e' }; static const symbol s_7_21[5] = { 'e', 'r', 'e', 't', 'e' }; static const symbol s_7_22[5] = { 'i', 'r', 'e', 't', 'e' }; static const symbol s_7_23[3] = { 'i', 't', 'e' }; static const symbol s_7_24[6] = { 'e', 'r', 'e', 's', 't', 'e' }; static const symbol s_7_25[6] = { 'i', 'r', 'e', 's', 't', 'e' }; static const symbol s_7_26[3] = { 'u', 't', 'e' }; static const symbol s_7_27[4] = { 'e', 'r', 'a', 'i' }; static const symbol s_7_28[4] = { 'i', 'r', 'a', 'i' }; static const symbol s_7_29[4] = { 'i', 's', 'c', 'i' }; static const symbol s_7_30[4] = { 'e', 'n', 'd', 'i' }; static const symbol s_7_31[4] = { 'e', 'r', 'e', 'i' }; static const symbol s_7_32[4] = { 'i', 'r', 'e', 'i' }; static const symbol s_7_33[4] = { 'a', 's', 's', 'i' }; static const symbol s_7_34[3] = { 'a', 't', 'i' }; static const symbol s_7_35[3] = { 'i', 't', 'i' }; static const symbol s_7_36[6] = { 'e', 'r', 'e', 's', 't', 'i' }; static const symbol s_7_37[6] = { 'i', 'r', 'e', 's', 't', 'i' }; static const symbol s_7_38[3] = { 'u', 't', 'i' }; static const symbol s_7_39[3] = { 'a', 'v', 'i' }; static const symbol s_7_40[3] = { 'e', 'v', 'i' }; static const symbol s_7_41[3] = { 'i', 'v', 'i' }; static const symbol s_7_42[4] = { 'i', 's', 'c', 'o' }; static const symbol s_7_43[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_7_44[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_7_45[4] = { 'Y', 'a', 'm', 'o' }; static const symbol s_7_46[4] = { 'i', 'a', 'm', 'o' }; static const symbol s_7_47[5] = { 'a', 'v', 'a', 'm', 'o' }; static const symbol s_7_48[5] = { 'e', 'v', 'a', 'm', 'o' }; static const symbol s_7_49[5] = { 'i', 'v', 'a', 'm', 'o' }; static const symbol s_7_50[5] = { 'e', 'r', 'e', 'm', 'o' }; static const symbol s_7_51[5] = { 'i', 'r', 'e', 'm', 'o' }; static const symbol s_7_52[6] = { 'a', 's', 's', 'i', 'm', 'o' }; static const symbol s_7_53[4] = { 'a', 'm', 'm', 'o' }; static const symbol s_7_54[4] = { 'e', 'm', 'm', 'o' }; static const symbol s_7_55[6] = { 'e', 'r', 'e', 'm', 'm', 'o' }; static const symbol s_7_56[6] = { 'i', 'r', 'e', 'm', 'm', 'o' }; static const symbol s_7_57[4] = { 'i', 'm', 'm', 'o' }; static const symbol s_7_58[3] = { 'a', 'n', 'o' }; static const symbol s_7_59[6] = { 'i', 's', 'c', 'a', 'n', 'o' }; static const symbol s_7_60[5] = { 'a', 'v', 'a', 'n', 'o' }; static const symbol s_7_61[5] = { 'e', 'v', 'a', 'n', 'o' }; static const symbol s_7_62[5] = { 'i', 'v', 'a', 'n', 'o' }; static const symbol s_7_63[6] = { 'e', 'r', 'a', 'n', 'n', 'o' }; static const symbol s_7_64[6] = { 'i', 'r', 'a', 'n', 'n', 'o' }; static const symbol s_7_65[3] = { 'o', 'n', 'o' }; static const symbol s_7_66[6] = { 'i', 's', 'c', 'o', 'n', 'o' }; static const symbol s_7_67[5] = { 'a', 'r', 'o', 'n', 'o' }; static const symbol s_7_68[5] = { 'e', 'r', 'o', 'n', 'o' }; static const symbol s_7_69[5] = { 'i', 'r', 'o', 'n', 'o' }; static const symbol s_7_70[8] = { 'e', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; static const symbol s_7_71[8] = { 'i', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; static const symbol s_7_72[6] = { 'a', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_73[6] = { 'e', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_74[6] = { 'i', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_75[3] = { 'a', 't', 'o' }; static const symbol s_7_76[3] = { 'i', 't', 'o' }; static const symbol s_7_77[3] = { 'u', 't', 'o' }; static const symbol s_7_78[3] = { 'a', 'v', 'o' }; static const symbol s_7_79[3] = { 'e', 'v', 'o' }; static const symbol s_7_80[3] = { 'i', 'v', 'o' }; static const symbol s_7_81[2] = { 'a', 'r' }; static const symbol s_7_82[2] = { 'i', 'r' }; static const symbol s_7_83[4] = { 'e', 'r', 0xC3, 0xA0 }; static const symbol s_7_84[4] = { 'i', 'r', 0xC3, 0xA0 }; static const symbol s_7_85[4] = { 'e', 'r', 0xC3, 0xB2 }; static const symbol s_7_86[4] = { 'i', 'r', 0xC3, 0xB2 }; static const struct among a_7[87] = { /* 0 */ { 4, s_7_0, -1, 1, 0}, /* 1 */ { 4, s_7_1, -1, 1, 0}, /* 2 */ { 3, s_7_2, -1, 1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 3, s_7_4, -1, 1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 3, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 6, s_7_8, -1, 1, 0}, /* 9 */ { 6, s_7_9, -1, 1, 0}, /* 10 */ { 4, s_7_10, -1, 1, 0}, /* 11 */ { 4, s_7_11, -1, 1, 0}, /* 12 */ { 3, s_7_12, -1, 1, 0}, /* 13 */ { 3, s_7_13, -1, 1, 0}, /* 14 */ { 3, s_7_14, -1, 1, 0}, /* 15 */ { 4, s_7_15, -1, 1, 0}, /* 16 */ { 3, s_7_16, -1, 1, 0}, /* 17 */ { 5, s_7_17, 16, 1, 0}, /* 18 */ { 5, s_7_18, 16, 1, 0}, /* 19 */ { 5, s_7_19, 16, 1, 0}, /* 20 */ { 3, s_7_20, -1, 1, 0}, /* 21 */ { 5, s_7_21, 20, 1, 0}, /* 22 */ { 5, s_7_22, 20, 1, 0}, /* 23 */ { 3, s_7_23, -1, 1, 0}, /* 24 */ { 6, s_7_24, -1, 1, 0}, /* 25 */ { 6, s_7_25, -1, 1, 0}, /* 26 */ { 3, s_7_26, -1, 1, 0}, /* 27 */ { 4, s_7_27, -1, 1, 0}, /* 28 */ { 4, s_7_28, -1, 1, 0}, /* 29 */ { 4, s_7_29, -1, 1, 0}, /* 30 */ { 4, s_7_30, -1, 1, 0}, /* 31 */ { 4, s_7_31, -1, 1, 0}, /* 32 */ { 4, s_7_32, -1, 1, 0}, /* 33 */ { 4, s_7_33, -1, 1, 0}, /* 34 */ { 3, s_7_34, -1, 1, 0}, /* 35 */ { 3, s_7_35, -1, 1, 0}, /* 36 */ { 6, s_7_36, -1, 1, 0}, /* 37 */ { 6, s_7_37, -1, 1, 0}, /* 38 */ { 3, s_7_38, -1, 1, 0}, /* 39 */ { 3, s_7_39, -1, 1, 0}, /* 40 */ { 3, s_7_40, -1, 1, 0}, /* 41 */ { 3, s_7_41, -1, 1, 0}, /* 42 */ { 4, s_7_42, -1, 1, 0}, /* 43 */ { 4, s_7_43, -1, 1, 0}, /* 44 */ { 4, s_7_44, -1, 1, 0}, /* 45 */ { 4, s_7_45, -1, 1, 0}, /* 46 */ { 4, s_7_46, -1, 1, 0}, /* 47 */ { 5, s_7_47, -1, 1, 0}, /* 48 */ { 5, s_7_48, -1, 1, 0}, /* 49 */ { 5, s_7_49, -1, 1, 0}, /* 50 */ { 5, s_7_50, -1, 1, 0}, /* 51 */ { 5, s_7_51, -1, 1, 0}, /* 52 */ { 6, s_7_52, -1, 1, 0}, /* 53 */ { 4, s_7_53, -1, 1, 0}, /* 54 */ { 4, s_7_54, -1, 1, 0}, /* 55 */ { 6, s_7_55, 54, 1, 0}, /* 56 */ { 6, s_7_56, 54, 1, 0}, /* 57 */ { 4, s_7_57, -1, 1, 0}, /* 58 */ { 3, s_7_58, -1, 1, 0}, /* 59 */ { 6, s_7_59, 58, 1, 0}, /* 60 */ { 5, s_7_60, 58, 1, 0}, /* 61 */ { 5, s_7_61, 58, 1, 0}, /* 62 */ { 5, s_7_62, 58, 1, 0}, /* 63 */ { 6, s_7_63, -1, 1, 0}, /* 64 */ { 6, s_7_64, -1, 1, 0}, /* 65 */ { 3, s_7_65, -1, 1, 0}, /* 66 */ { 6, s_7_66, 65, 1, 0}, /* 67 */ { 5, s_7_67, 65, 1, 0}, /* 68 */ { 5, s_7_68, 65, 1, 0}, /* 69 */ { 5, s_7_69, 65, 1, 0}, /* 70 */ { 8, s_7_70, -1, 1, 0}, /* 71 */ { 8, s_7_71, -1, 1, 0}, /* 72 */ { 6, s_7_72, -1, 1, 0}, /* 73 */ { 6, s_7_73, -1, 1, 0}, /* 74 */ { 6, s_7_74, -1, 1, 0}, /* 75 */ { 3, s_7_75, -1, 1, 0}, /* 76 */ { 3, s_7_76, -1, 1, 0}, /* 77 */ { 3, s_7_77, -1, 1, 0}, /* 78 */ { 3, s_7_78, -1, 1, 0}, /* 79 */ { 3, s_7_79, -1, 1, 0}, /* 80 */ { 3, s_7_80, -1, 1, 0}, /* 81 */ { 2, s_7_81, -1, 1, 0}, /* 82 */ { 2, s_7_82, -1, 1, 0}, /* 83 */ { 4, s_7_83, -1, 1, 0}, /* 84 */ { 4, s_7_84, -1, 1, 0}, /* 85 */ { 4, s_7_85, -1, 1, 0}, /* 86 */ { 4, s_7_86, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1 }; static const unsigned char g_AEIO[] = { 17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2 }; static const unsigned char g_CG[] = { 17 }; static const symbol s_0[] = { 0xC3, 0xA0 }; static const symbol s_1[] = { 0xC3, 0xA8 }; static const symbol s_2[] = { 0xC3, 0xAC }; static const symbol s_3[] = { 0xC3, 0xB2 }; static const symbol s_4[] = { 0xC3, 0xB9 }; static const symbol s_5[] = { 'q', 'U' }; static const symbol s_6[] = { 'u' }; static const symbol s_7[] = { 'U' }; static const symbol s_8[] = { 'i' }; static const symbol s_9[] = { 'I' }; static const symbol s_10[] = { 'i' }; static const symbol s_11[] = { 'u' }; static const symbol s_12[] = { 'e' }; static const symbol s_13[] = { 'i', 'c' }; static const symbol s_14[] = { 'l', 'o', 'g' }; static const symbol s_15[] = { 'u' }; static const symbol s_16[] = { 'e', 'n', 't', 'e' }; static const symbol s_17[] = { 'a', 't' }; static const symbol s_18[] = { 'a', 't' }; static const symbol s_19[] = { 'i', 'c' }; static const symbol s_20[] = { 'i' }; static const symbol s_21[] = { 'h' }; static int r_prelude(struct SN_env * z) { int among_var; { int c_test = z->c; /* test, line 35 */ while(1) { /* repeat, line 35 */ int c1 = z->c; z->bra = z->c; /* [, line 36 */ among_var = find_among(z, a_0, 7); /* substring, line 36 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 36 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 37 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_1); /* <-, line 38 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_2); /* <-, line 39 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_3); /* <-, line 40 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 2, s_4); /* <-, line 41 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_5); /* <-, line 42 */ if (ret < 0) return ret; } break; case 7: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 43 */ } break; } continue; lab0: z->c = c1; break; } z->c = c_test; } while(1) { /* repeat, line 46 */ int c2 = z->c; while(1) { /* goto, line 46 */ int c3 = z->c; if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; z->bra = z->c; /* [, line 47 */ { int c4 = z->c; /* or, line 47 */ if (!(eq_s(z, 1, s_6))) goto lab4; z->ket = z->c; /* ], line 47 */ if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab4; { int ret = slice_from_s(z, 1, s_7); /* <-, line 47 */ if (ret < 0) return ret; } goto lab3; lab4: z->c = c4; if (!(eq_s(z, 1, s_8))) goto lab2; z->ket = z->c; /* ], line 48 */ if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; { int ret = slice_from_s(z, 1, s_9); /* <-, line 48 */ if (ret < 0) return ret; } } lab3: z->c = c3; break; lab2: z->c = c3; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab1; z->c = ret; /* goto, line 46 */ } } continue; lab1: z->c = c2; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 58 */ { int c2 = z->c; /* or, line 60 */ if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; { int c3 = z->c; /* or, line 59 */ if (out_grouping_U(z, g_v, 97, 249, 0)) goto lab4; { /* gopast */ /* grouping v, line 59 */ int ret = out_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; { /* gopast */ /* non v, line 59 */ int ret = in_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping_U(z, g_v, 97, 249, 0)) goto lab0; { int c4 = z->c; /* or, line 61 */ if (out_grouping_U(z, g_v, 97, 249, 0)) goto lab6; { /* gopast */ /* grouping v, line 61 */ int ret = out_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab0; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 61 */ } } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 62 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 64 */ { /* gopast */ /* grouping v, line 65 */ int ret = out_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 65 */ int ret = in_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 65 */ { /* gopast */ /* grouping v, line 66 */ int ret = out_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 66 */ int ret = in_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 66 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 70 */ int c1 = z->c; z->bra = z->c; /* [, line 72 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 72 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 72 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_10); /* <-, line 73 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_11); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 75 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_attached_pronoun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 87 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33314 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 37))) return 0; /* substring, line 87 */ z->bra = z->c; /* ], line 87 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; among_var = find_among_b(z, a_3, 5); /* among, line 97 */ if (!(among_var)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 97 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 98 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 99 */ if (ret < 0) return ret; } break; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 104 */ among_var = find_among_b(z, a_6, 51); /* substring, line 104 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 104 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 111 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 111 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 113 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ z->ket = z->c; /* [, line 114 */ if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 114 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 114 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 114 */ if (ret < 0) return ret; } lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_14); /* <-, line 117 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 119 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_15); /* <-, line 119 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 121 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_16); /* <-, line 121 */ if (ret < 0) return ret; } break; case 6: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 123 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } break; case 7: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 125 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 126 */ z->ket = z->c; /* [, line 127 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4722696 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_4, 4); /* substring, line 127 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 127 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 127 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: z->ket = z->c; /* [, line 128 */ if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 128 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 128 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 128 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ z->ket = z->c; /* [, line 136 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_5, 3); /* substring, line 136 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 136 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 137 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 137 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 9: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 143 */ z->ket = z->c; /* [, line 143 */ if (!(eq_s_b(z, 2, s_18))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 143 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 143 */ if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 143 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } lab3: ; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 148 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 148 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 149 */ among_var = find_among_b(z, a_7, 87); /* substring, line 149 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 149 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_vowel_suffix(struct SN_env * z) { { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 171 */ z->ket = z->c; /* [, line 172 */ if (in_grouping_b_U(z, g_AEIO, 97, 242, 0)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 172 */ { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 172 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 172 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 173 */ if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 173 */ { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 173 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 173 */ if (ret < 0) return ret; } lab0: ; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 175 */ z->ket = z->c; /* [, line 176 */ if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 176 */ if (in_grouping_b_U(z, g_CG, 99, 103, 0)) { z->c = z->l - m_keep; goto lab1; } { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call RV, line 176 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } lab1: ; } return 1; } extern int italian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 182 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 182 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 183 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 183 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 184 */ { int m3 = z->l - z->c; (void)m3; /* do, line 185 */ { int ret = r_attached_pronoun(z); if (ret == 0) goto lab2; /* call attached_pronoun, line 185 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 186 */ { int m5 = z->l - z->c; (void)m5; /* or, line 186 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab5; /* call standard_suffix, line 186 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = z->l - m5; { int ret = r_verb_suffix(z); if (ret == 0) goto lab3; /* call verb_suffix, line 186 */ if (ret < 0) return ret; } } lab4: lab3: z->c = z->l - m4; } { int m6 = z->l - z->c; (void)m6; /* do, line 187 */ { int ret = r_vowel_suffix(z); if (ret == 0) goto lab6; /* call vowel_suffix, line 187 */ if (ret < 0) return ret; } lab6: z->c = z->l - m6; } z->c = z->lb; { int c7 = z->c; /* do, line 189 */ { int ret = r_postlude(z); if (ret == 0) goto lab7; /* call postlude, line 189 */ if (ret < 0) return ret; } lab7: z->c = c7; } return 1; } extern struct SN_env * italian_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void italian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_italian.h000066400000000000000000000004711217574114600307120ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * italian_UTF_8_create_env(void); extern void italian_UTF_8_close_env(struct SN_env * z); extern int italian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_norwegian.c000066400000000000000000000234751217574114600312660ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int norwegian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * norwegian_UTF_8_create_env(void); extern void norwegian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'a' }; static const symbol s_0_1[1] = { 'e' }; static const symbol s_0_2[3] = { 'e', 'd', 'e' }; static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_0_5[3] = { 'a', 'n', 'e' }; static const symbol s_0_6[3] = { 'e', 'n', 'e' }; static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; static const symbol s_0_9[2] = { 'e', 'n' }; static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; static const symbol s_0_11[2] = { 'a', 'r' }; static const symbol s_0_12[2] = { 'e', 'r' }; static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; static const symbol s_0_14[1] = { 's' }; static const symbol s_0_15[2] = { 'a', 's' }; static const symbol s_0_16[2] = { 'e', 's' }; static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; static const symbol s_0_21[3] = { 'e', 'n', 's' }; static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; static const symbol s_0_23[3] = { 'e', 'r', 's' }; static const symbol s_0_24[3] = { 'e', 't', 's' }; static const symbol s_0_25[2] = { 'e', 't' }; static const symbol s_0_26[3] = { 'h', 'e', 't' }; static const symbol s_0_27[3] = { 'e', 'r', 't' }; static const symbol s_0_28[3] = { 'a', 's', 't' }; static const struct among a_0[29] = { /* 0 */ { 1, s_0_0, -1, 1, 0}, /* 1 */ { 1, s_0_1, -1, 1, 0}, /* 2 */ { 3, s_0_2, 1, 1, 0}, /* 3 */ { 4, s_0_3, 1, 1, 0}, /* 4 */ { 4, s_0_4, 1, 1, 0}, /* 5 */ { 3, s_0_5, 1, 1, 0}, /* 6 */ { 3, s_0_6, 1, 1, 0}, /* 7 */ { 6, s_0_7, 6, 1, 0}, /* 8 */ { 4, s_0_8, 1, 3, 0}, /* 9 */ { 2, s_0_9, -1, 1, 0}, /* 10 */ { 5, s_0_10, 9, 1, 0}, /* 11 */ { 2, s_0_11, -1, 1, 0}, /* 12 */ { 2, s_0_12, -1, 1, 0}, /* 13 */ { 5, s_0_13, 12, 1, 0}, /* 14 */ { 1, s_0_14, -1, 2, 0}, /* 15 */ { 2, s_0_15, 14, 1, 0}, /* 16 */ { 2, s_0_16, 14, 1, 0}, /* 17 */ { 4, s_0_17, 16, 1, 0}, /* 18 */ { 5, s_0_18, 16, 1, 0}, /* 19 */ { 4, s_0_19, 16, 1, 0}, /* 20 */ { 7, s_0_20, 19, 1, 0}, /* 21 */ { 3, s_0_21, 14, 1, 0}, /* 22 */ { 6, s_0_22, 21, 1, 0}, /* 23 */ { 3, s_0_23, 14, 1, 0}, /* 24 */ { 3, s_0_24, 14, 1, 0}, /* 25 */ { 2, s_0_25, -1, 1, 0}, /* 26 */ { 3, s_0_26, 25, 1, 0}, /* 27 */ { 3, s_0_27, -1, 3, 0}, /* 28 */ { 3, s_0_28, -1, 1, 0} }; static const symbol s_1_0[2] = { 'd', 't' }; static const symbol s_1_1[2] = { 'v', 't' }; static const struct among a_1[2] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0} }; static const symbol s_2_0[3] = { 'l', 'e', 'g' }; static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; static const symbol s_2_2[2] = { 'i', 'g' }; static const symbol s_2_3[3] = { 'e', 'i', 'g' }; static const symbol s_2_4[3] = { 'l', 'i', 'g' }; static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; static const symbol s_2_6[3] = { 'e', 'l', 's' }; static const symbol s_2_7[3] = { 'l', 'o', 'v' }; static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; static const struct among a_2[11] = { /* 0 */ { 3, s_2_0, -1, 1, 0}, /* 1 */ { 4, s_2_1, 0, 1, 0}, /* 2 */ { 2, s_2_2, -1, 1, 0}, /* 3 */ { 3, s_2_3, 2, 1, 0}, /* 4 */ { 3, s_2_4, 2, 1, 0}, /* 5 */ { 4, s_2_5, 4, 1, 0}, /* 6 */ { 3, s_2_6, -1, 1, 0}, /* 7 */ { 3, s_2_7, -1, 1, 0}, /* 8 */ { 4, s_2_8, 7, 1, 0}, /* 9 */ { 4, s_2_9, 7, 1, 0}, /* 10 */ { 7, s_2_10, 9, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; static const symbol s_0[] = { 'k' }; static const symbol s_1[] = { 'e', 'r' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 30 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) return 0; z->c = ret; /* hop, line 30 */ } z->I[1] = z->c; /* setmark x, line 30 */ z->c = c_test; } if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ { /* gopast */ /* non v, line 31 */ int ret = in_grouping_U(z, g_v, 97, 248, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 31 */ /* try, line 32 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 38 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 38 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 38 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 38 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 2: { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ if (in_grouping_b_U(z, g_s_ending, 98, 122, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_0))) return 0; if (out_grouping_b_U(z, g_v, 97, 248, 0)) return 0; } lab0: { int ret = slice_del(z); /* delete, line 46 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 53 */ { int mlimit; /* setlimit, line 54 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 54 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 54 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ z->bra = z->c; /* ], line 54 */ z->lb = mlimit; } z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 59 */ } z->bra = z->c; /* ], line 59 */ { int ret = slice_del(z); /* delete, line 59 */ if (ret < 0) return ret; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 63 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 63 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 63 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 63 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 67 */ if (ret < 0) return ret; } break; } return 1; } extern int norwegian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 74 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 74 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 75 */ { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 76 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 78 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } z->c = z->lb; return 1; } extern struct SN_env * norwegian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } extern void norwegian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_norwegian.h000066400000000000000000000004771217574114600312700ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * norwegian_UTF_8_create_env(void); extern void norwegian_UTF_8_close_env(struct SN_env * z); extern int norwegian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_porter.c000066400000000000000000000611731217574114600306050ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int porter_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_Step_5b(struct SN_env * z); static int r_Step_5a(struct SN_env * z); static int r_Step_4(struct SN_env * z); static int r_Step_3(struct SN_env * z); static int r_Step_2(struct SN_env * z); static int r_Step_1c(struct SN_env * z); static int r_Step_1b(struct SN_env * z); static int r_Step_1a(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_shortv(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * porter_UTF_8_create_env(void); extern void porter_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 's' }; static const symbol s_0_1[3] = { 'i', 'e', 's' }; static const symbol s_0_2[4] = { 's', 's', 'e', 's' }; static const symbol s_0_3[2] = { 's', 's' }; static const struct among a_0[4] = { /* 0 */ { 1, s_0_0, -1, 3, 0}, /* 1 */ { 3, s_0_1, 0, 2, 0}, /* 2 */ { 4, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, -1, 0} }; static const symbol s_1_1[2] = { 'b', 'b' }; static const symbol s_1_2[2] = { 'd', 'd' }; static const symbol s_1_3[2] = { 'f', 'f' }; static const symbol s_1_4[2] = { 'g', 'g' }; static const symbol s_1_5[2] = { 'b', 'l' }; static const symbol s_1_6[2] = { 'm', 'm' }; static const symbol s_1_7[2] = { 'n', 'n' }; static const symbol s_1_8[2] = { 'p', 'p' }; static const symbol s_1_9[2] = { 'r', 'r' }; static const symbol s_1_10[2] = { 'a', 't' }; static const symbol s_1_11[2] = { 't', 't' }; static const symbol s_1_12[2] = { 'i', 'z' }; static const struct among a_1[13] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_1_1, 0, 2, 0}, /* 2 */ { 2, s_1_2, 0, 2, 0}, /* 3 */ { 2, s_1_3, 0, 2, 0}, /* 4 */ { 2, s_1_4, 0, 2, 0}, /* 5 */ { 2, s_1_5, 0, 1, 0}, /* 6 */ { 2, s_1_6, 0, 2, 0}, /* 7 */ { 2, s_1_7, 0, 2, 0}, /* 8 */ { 2, s_1_8, 0, 2, 0}, /* 9 */ { 2, s_1_9, 0, 2, 0}, /* 10 */ { 2, s_1_10, 0, 1, 0}, /* 11 */ { 2, s_1_11, 0, 2, 0}, /* 12 */ { 2, s_1_12, 0, 1, 0} }; static const symbol s_2_0[2] = { 'e', 'd' }; static const symbol s_2_1[3] = { 'e', 'e', 'd' }; static const symbol s_2_2[3] = { 'i', 'n', 'g' }; static const struct among a_2[3] = { /* 0 */ { 2, s_2_0, -1, 2, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 3, s_2_2, -1, 2, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 'c', 'i' }; static const symbol s_3_1[4] = { 'e', 'n', 'c', 'i' }; static const symbol s_3_2[4] = { 'a', 'b', 'l', 'i' }; static const symbol s_3_3[3] = { 'e', 'l', 'i' }; static const symbol s_3_4[4] = { 'a', 'l', 'l', 'i' }; static const symbol s_3_5[5] = { 'o', 'u', 's', 'l', 'i' }; static const symbol s_3_6[5] = { 'e', 'n', 't', 'l', 'i' }; static const symbol s_3_7[5] = { 'a', 'l', 'i', 't', 'i' }; static const symbol s_3_8[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; static const symbol s_3_9[5] = { 'i', 'v', 'i', 't', 'i' }; static const symbol s_3_10[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_3_11[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_3_12[5] = { 'a', 'l', 'i', 's', 'm' }; static const symbol s_3_13[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_3_14[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; static const symbol s_3_15[4] = { 'i', 'z', 'e', 'r' }; static const symbol s_3_16[4] = { 'a', 't', 'o', 'r' }; static const symbol s_3_17[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; static const symbol s_3_18[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; static const symbol s_3_19[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; static const struct among a_3[20] = { /* 0 */ { 4, s_3_0, -1, 3, 0}, /* 1 */ { 4, s_3_1, -1, 2, 0}, /* 2 */ { 4, s_3_2, -1, 4, 0}, /* 3 */ { 3, s_3_3, -1, 6, 0}, /* 4 */ { 4, s_3_4, -1, 9, 0}, /* 5 */ { 5, s_3_5, -1, 12, 0}, /* 6 */ { 5, s_3_6, -1, 5, 0}, /* 7 */ { 5, s_3_7, -1, 10, 0}, /* 8 */ { 6, s_3_8, -1, 14, 0}, /* 9 */ { 5, s_3_9, -1, 13, 0}, /* 10 */ { 6, s_3_10, -1, 1, 0}, /* 11 */ { 7, s_3_11, 10, 8, 0}, /* 12 */ { 5, s_3_12, -1, 10, 0}, /* 13 */ { 5, s_3_13, -1, 8, 0}, /* 14 */ { 7, s_3_14, 13, 7, 0}, /* 15 */ { 4, s_3_15, -1, 7, 0}, /* 16 */ { 4, s_3_16, -1, 8, 0}, /* 17 */ { 7, s_3_17, -1, 13, 0}, /* 18 */ { 7, s_3_18, -1, 11, 0}, /* 19 */ { 7, s_3_19, -1, 12, 0} }; static const symbol s_4_0[5] = { 'i', 'c', 'a', 't', 'e' }; static const symbol s_4_1[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_4_2[5] = { 'a', 'l', 'i', 'z', 'e' }; static const symbol s_4_3[5] = { 'i', 'c', 'i', 't', 'i' }; static const symbol s_4_4[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_4_5[3] = { 'f', 'u', 'l' }; static const symbol s_4_6[4] = { 'n', 'e', 's', 's' }; static const struct among a_4[7] = { /* 0 */ { 5, s_4_0, -1, 2, 0}, /* 1 */ { 5, s_4_1, -1, 3, 0}, /* 2 */ { 5, s_4_2, -1, 1, 0}, /* 3 */ { 5, s_4_3, -1, 2, 0}, /* 4 */ { 4, s_4_4, -1, 2, 0}, /* 5 */ { 3, s_4_5, -1, 3, 0}, /* 6 */ { 4, s_4_6, -1, 3, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_5_2[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_5_3[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_5_4[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_5_5[3] = { 'a', 't', 'e' }; static const symbol s_5_6[3] = { 'i', 'v', 'e' }; static const symbol s_5_7[3] = { 'i', 'z', 'e' }; static const symbol s_5_8[3] = { 'i', 't', 'i' }; static const symbol s_5_9[2] = { 'a', 'l' }; static const symbol s_5_10[3] = { 'i', 's', 'm' }; static const symbol s_5_11[3] = { 'i', 'o', 'n' }; static const symbol s_5_12[2] = { 'e', 'r' }; static const symbol s_5_13[3] = { 'o', 'u', 's' }; static const symbol s_5_14[3] = { 'a', 'n', 't' }; static const symbol s_5_15[3] = { 'e', 'n', 't' }; static const symbol s_5_16[4] = { 'm', 'e', 'n', 't' }; static const symbol s_5_17[5] = { 'e', 'm', 'e', 'n', 't' }; static const symbol s_5_18[2] = { 'o', 'u' }; static const struct among a_5[19] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 4, s_5_2, -1, 1, 0}, /* 3 */ { 4, s_5_3, -1, 1, 0}, /* 4 */ { 4, s_5_4, -1, 1, 0}, /* 5 */ { 3, s_5_5, -1, 1, 0}, /* 6 */ { 3, s_5_6, -1, 1, 0}, /* 7 */ { 3, s_5_7, -1, 1, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 2, s_5_9, -1, 1, 0}, /* 10 */ { 3, s_5_10, -1, 1, 0}, /* 11 */ { 3, s_5_11, -1, 2, 0}, /* 12 */ { 2, s_5_12, -1, 1, 0}, /* 13 */ { 3, s_5_13, -1, 1, 0}, /* 14 */ { 3, s_5_14, -1, 1, 0}, /* 15 */ { 3, s_5_15, -1, 1, 0}, /* 16 */ { 4, s_5_16, 15, 1, 0}, /* 17 */ { 5, s_5_17, 16, 1, 0}, /* 18 */ { 2, s_5_18, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1 }; static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; static const symbol s_0[] = { 's', 's' }; static const symbol s_1[] = { 'i' }; static const symbol s_2[] = { 'e', 'e' }; static const symbol s_3[] = { 'e' }; static const symbol s_4[] = { 'e' }; static const symbol s_5[] = { 'y' }; static const symbol s_6[] = { 'Y' }; static const symbol s_7[] = { 'i' }; static const symbol s_8[] = { 't', 'i', 'o', 'n' }; static const symbol s_9[] = { 'e', 'n', 'c', 'e' }; static const symbol s_10[] = { 'a', 'n', 'c', 'e' }; static const symbol s_11[] = { 'a', 'b', 'l', 'e' }; static const symbol s_12[] = { 'e', 'n', 't' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'i', 'z', 'e' }; static const symbol s_15[] = { 'a', 't', 'e' }; static const symbol s_16[] = { 'a', 'l' }; static const symbol s_17[] = { 'a', 'l' }; static const symbol s_18[] = { 'f', 'u', 'l' }; static const symbol s_19[] = { 'o', 'u', 's' }; static const symbol s_20[] = { 'i', 'v', 'e' }; static const symbol s_21[] = { 'b', 'l', 'e' }; static const symbol s_22[] = { 'a', 'l' }; static const symbol s_23[] = { 'i', 'c' }; static const symbol s_24[] = { 's' }; static const symbol s_25[] = { 't' }; static const symbol s_26[] = { 'e' }; static const symbol s_27[] = { 'l' }; static const symbol s_28[] = { 'l' }; static const symbol s_29[] = { 'y' }; static const symbol s_30[] = { 'Y' }; static const symbol s_31[] = { 'y' }; static const symbol s_32[] = { 'Y' }; static const symbol s_33[] = { 'Y' }; static const symbol s_34[] = { 'y' }; static int r_shortv(struct SN_env * z) { if (out_grouping_b_U(z, g_v_WXY, 89, 121, 0)) return 0; if (in_grouping_b_U(z, g_v, 97, 121, 0)) return 0; if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_Step_1a(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 25 */ if (z->c <= z->lb || z->p[z->c - 1] != 115) return 0; among_var = find_among_b(z, a_0, 4); /* substring, line 25 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 25 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 26 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 27 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 29 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_1b(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 34 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; among_var = find_among_b(z, a_2, 3); /* substring, line 34 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 34 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 35 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_2); /* <-, line 35 */ if (ret < 0) return ret; } break; case 2: { int m_test = z->l - z->c; /* test, line 38 */ { /* gopast */ /* grouping v, line 38 */ int ret = out_grouping_b_U(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 38 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 39 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else among_var = find_among_b(z, a_1, 13); /* substring, line 39 */ if (!(among_var)) return 0; z->c = z->l - m_test; } switch(among_var) { case 0: return 0; case 1: { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_3); /* <+, line 41 */ z->c = c_keep; if (ret < 0) return ret; } break; case 2: z->ket = z->c; /* [, line 44 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 44 */ } z->bra = z->c; /* ], line 44 */ { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 3: if (z->c != z->I[0]) return 0; /* atmark, line 45 */ { int m_test = z->l - z->c; /* test, line 45 */ { int ret = r_shortv(z); if (ret == 0) return 0; /* call shortv, line 45 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_4); /* <+, line 45 */ z->c = c_keep; if (ret < 0) return ret; } break; } break; } return 1; } static int r_Step_1c(struct SN_env * z) { z->ket = z->c; /* [, line 52 */ { int m1 = z->l - z->c; (void)m1; /* or, line 52 */ if (!(eq_s_b(z, 1, s_5))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_6))) return 0; } lab0: z->bra = z->c; /* ], line 52 */ { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping_b_U(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } { int ret = slice_from_s(z, 1, s_7); /* <-, line 54 */ if (ret < 0) return ret; } return 1; } static int r_Step_2(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 58 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_3, 20); /* substring, line 58 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 58 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 58 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_8); /* <-, line 59 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_9); /* <-, line 60 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_10); /* <-, line 61 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 4, s_11); /* <-, line 62 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_12); /* <-, line 63 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_13); /* <-, line 64 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_14); /* <-, line 66 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 3, s_15); /* <-, line 68 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 2, s_16); /* <-, line 69 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 2, s_17); /* <-, line 71 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 3, s_18); /* <-, line 72 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 3, s_19); /* <-, line 74 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_from_s(z, 3, s_20); /* <-, line 76 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 3, s_21); /* <-, line 77 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_3(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 82 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 7); /* substring, line 82 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 82 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 82 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_22); /* <-, line 83 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_23); /* <-, line 85 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 87 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_4(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 92 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3961384 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 19); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 92 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 95 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 96 */ if (!(eq_s_b(z, 1, s_24))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_25))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_5a(struct SN_env * z) { z->ket = z->c; /* [, line 101 */ if (!(eq_s_b(z, 1, s_26))) return 0; z->bra = z->c; /* ], line 101 */ { int m1 = z->l - z->c; (void)m1; /* or, line 102 */ { int ret = r_R2(z); if (ret == 0) goto lab1; /* call R2, line 102 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 102 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ { int ret = r_shortv(z); if (ret == 0) goto lab2; /* call shortv, line 102 */ if (ret < 0) return ret; } return 0; lab2: z->c = z->l - m2; } } lab0: { int ret = slice_del(z); /* delete, line 103 */ if (ret < 0) return ret; } return 1; } static int r_Step_5b(struct SN_env * z) { z->ket = z->c; /* [, line 107 */ if (!(eq_s_b(z, 1, s_27))) return 0; z->bra = z->c; /* ], line 107 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 108 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_28))) return 0; { int ret = slice_del(z); /* delete, line 109 */ if (ret < 0) return ret; } return 1; } extern int porter_UTF_8_stem(struct SN_env * z) { z->B[0] = 0; /* unset Y_found, line 115 */ { int c1 = z->c; /* do, line 116 */ z->bra = z->c; /* [, line 116 */ if (!(eq_s(z, 1, s_29))) goto lab0; z->ket = z->c; /* ], line 116 */ { int ret = slice_from_s(z, 1, s_30); /* <-, line 116 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 116 */ lab0: z->c = c1; } { int c2 = z->c; /* do, line 117 */ while(1) { /* repeat, line 117 */ int c3 = z->c; while(1) { /* goto, line 117 */ int c4 = z->c; if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab3; z->bra = z->c; /* [, line 117 */ if (!(eq_s(z, 1, s_31))) goto lab3; z->ket = z->c; /* ], line 117 */ z->c = c4; break; lab3: z->c = c4; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab2; z->c = ret; /* goto, line 117 */ } } { int ret = slice_from_s(z, 1, s_32); /* <-, line 117 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 117 */ continue; lab2: z->c = c3; break; } z->c = c2; } z->I[0] = z->l; z->I[1] = z->l; { int c5 = z->c; /* do, line 121 */ { /* gopast */ /* grouping v, line 122 */ int ret = out_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 122 */ int ret = in_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 122 */ { /* gopast */ /* grouping v, line 123 */ int ret = out_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 123 */ int ret = in_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 123 */ lab4: z->c = c5; } z->lb = z->c; z->c = z->l; /* backwards, line 126 */ { int m6 = z->l - z->c; (void)m6; /* do, line 127 */ { int ret = r_Step_1a(z); if (ret == 0) goto lab5; /* call Step_1a, line 127 */ if (ret < 0) return ret; } lab5: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 128 */ { int ret = r_Step_1b(z); if (ret == 0) goto lab6; /* call Step_1b, line 128 */ if (ret < 0) return ret; } lab6: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 129 */ { int ret = r_Step_1c(z); if (ret == 0) goto lab7; /* call Step_1c, line 129 */ if (ret < 0) return ret; } lab7: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 130 */ { int ret = r_Step_2(z); if (ret == 0) goto lab8; /* call Step_2, line 130 */ if (ret < 0) return ret; } lab8: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 131 */ { int ret = r_Step_3(z); if (ret == 0) goto lab9; /* call Step_3, line 131 */ if (ret < 0) return ret; } lab9: z->c = z->l - m10; } { int m11 = z->l - z->c; (void)m11; /* do, line 132 */ { int ret = r_Step_4(z); if (ret == 0) goto lab10; /* call Step_4, line 132 */ if (ret < 0) return ret; } lab10: z->c = z->l - m11; } { int m12 = z->l - z->c; (void)m12; /* do, line 133 */ { int ret = r_Step_5a(z); if (ret == 0) goto lab11; /* call Step_5a, line 133 */ if (ret < 0) return ret; } lab11: z->c = z->l - m12; } { int m13 = z->l - z->c; (void)m13; /* do, line 134 */ { int ret = r_Step_5b(z); if (ret == 0) goto lab12; /* call Step_5b, line 134 */ if (ret < 0) return ret; } lab12: z->c = z->l - m13; } z->c = z->lb; { int c14 = z->c; /* do, line 137 */ if (!(z->B[0])) goto lab13; /* Boolean test Y_found, line 137 */ while(1) { /* repeat, line 137 */ int c15 = z->c; while(1) { /* goto, line 137 */ int c16 = z->c; z->bra = z->c; /* [, line 137 */ if (!(eq_s(z, 1, s_33))) goto lab15; z->ket = z->c; /* ], line 137 */ z->c = c16; break; lab15: z->c = c16; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab14; z->c = ret; /* goto, line 137 */ } } { int ret = slice_from_s(z, 1, s_34); /* <-, line 137 */ if (ret < 0) return ret; } continue; lab14: z->c = c15; break; } lab13: z->c = c14; } return 1; } extern struct SN_env * porter_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); } extern void porter_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_porter.h000066400000000000000000000004661217574114600306100ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * porter_UTF_8_create_env(void); extern void porter_UTF_8_close_env(struct SN_env * z); extern int porter_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_portuguese.c000066400000000000000000001140401217574114600314640ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int portuguese_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_residual_form(struct SN_env * z); static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * portuguese_UTF_8_create_env(void); extern void portuguese_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 0xC3, 0xA3 }; static const symbol s_0_2[2] = { 0xC3, 0xB5 }; static const struct among a_0[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_0_1, 0, 1, 0}, /* 2 */ { 2, s_0_2, 0, 2, 0} }; static const symbol s_1_1[2] = { 'a', '~' }; static const symbol s_1_2[2] = { 'o', '~' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_1_1, 0, 1, 0}, /* 2 */ { 2, s_1_2, 0, 2, 0} }; static const symbol s_2_0[2] = { 'i', 'c' }; static const symbol s_2_1[2] = { 'a', 'd' }; static const symbol s_2_2[2] = { 'o', 's' }; static const symbol s_2_3[2] = { 'i', 'v' }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, 1, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 't', 'e' }; static const symbol s_3_1[4] = { 'a', 'v', 'e', 'l' }; static const symbol s_3_2[5] = { 0xC3, 0xAD, 'v', 'e', 'l' }; static const struct among a_3[3] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 5, s_3_2, -1, 1, 0} }; static const symbol s_4_0[2] = { 'i', 'c' }; static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_4_2[2] = { 'i', 'v' }; static const struct among a_4[3] = { /* 0 */ { 2, s_4_0, -1, 1, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 2, s_4_2, -1, 1, 0} }; static const symbol s_5_0[3] = { 'i', 'c', 'a' }; static const symbol s_5_1[6] = { 0xC3, 0xA2, 'n', 'c', 'i', 'a' }; static const symbol s_5_2[6] = { 0xC3, 0xAA, 'n', 'c', 'i', 'a' }; static const symbol s_5_3[3] = { 'i', 'r', 'a' }; static const symbol s_5_4[5] = { 'a', 'd', 'o', 'r', 'a' }; static const symbol s_5_5[3] = { 'o', 's', 'a' }; static const symbol s_5_6[4] = { 'i', 's', 't', 'a' }; static const symbol s_5_7[3] = { 'i', 'v', 'a' }; static const symbol s_5_8[3] = { 'e', 'z', 'a' }; static const symbol s_5_9[6] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a' }; static const symbol s_5_10[5] = { 'i', 'd', 'a', 'd', 'e' }; static const symbol s_5_11[4] = { 'a', 'n', 't', 'e' }; static const symbol s_5_12[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_5_13[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_5_14[5] = { 0xC3, 0xA1, 'v', 'e', 'l' }; static const symbol s_5_15[5] = { 0xC3, 0xAD, 'v', 'e', 'l' }; static const symbol s_5_16[6] = { 'u', 'c', 'i', 0xC3, 0xB3, 'n' }; static const symbol s_5_17[3] = { 'i', 'c', 'o' }; static const symbol s_5_18[4] = { 'i', 's', 'm', 'o' }; static const symbol s_5_19[3] = { 'o', 's', 'o' }; static const symbol s_5_20[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; static const symbol s_5_21[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; static const symbol s_5_22[3] = { 'i', 'v', 'o' }; static const symbol s_5_23[6] = { 'a', 0xC3, 0xA7, 'a', '~', 'o' }; static const symbol s_5_24[4] = { 'a', 'd', 'o', 'r' }; static const symbol s_5_25[4] = { 'i', 'c', 'a', 's' }; static const symbol s_5_26[7] = { 0xC3, 0xAA, 'n', 'c', 'i', 'a', 's' }; static const symbol s_5_27[4] = { 'i', 'r', 'a', 's' }; static const symbol s_5_28[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; static const symbol s_5_29[4] = { 'o', 's', 'a', 's' }; static const symbol s_5_30[5] = { 'i', 's', 't', 'a', 's' }; static const symbol s_5_31[4] = { 'i', 'v', 'a', 's' }; static const symbol s_5_32[4] = { 'e', 'z', 'a', 's' }; static const symbol s_5_33[7] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a', 's' }; static const symbol s_5_34[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; static const symbol s_5_35[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_5_36[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; static const symbol s_5_37[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_5_38[7] = { 'a', 0xC3, 0xA7, 'o', '~', 'e', 's' }; static const symbol s_5_39[4] = { 'i', 'c', 'o', 's' }; static const symbol s_5_40[5] = { 'i', 's', 'm', 'o', 's' }; static const symbol s_5_41[4] = { 'o', 's', 'o', 's' }; static const symbol s_5_42[7] = { 'a', 'm', 'e', 'n', 't', 'o', 's' }; static const symbol s_5_43[7] = { 'i', 'm', 'e', 'n', 't', 'o', 's' }; static const symbol s_5_44[4] = { 'i', 'v', 'o', 's' }; static const struct among a_5[45] = { /* 0 */ { 3, s_5_0, -1, 1, 0}, /* 1 */ { 6, s_5_1, -1, 1, 0}, /* 2 */ { 6, s_5_2, -1, 4, 0}, /* 3 */ { 3, s_5_3, -1, 9, 0}, /* 4 */ { 5, s_5_4, -1, 1, 0}, /* 5 */ { 3, s_5_5, -1, 1, 0}, /* 6 */ { 4, s_5_6, -1, 1, 0}, /* 7 */ { 3, s_5_7, -1, 8, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 6, s_5_9, -1, 2, 0}, /* 10 */ { 5, s_5_10, -1, 7, 0}, /* 11 */ { 4, s_5_11, -1, 1, 0}, /* 12 */ { 5, s_5_12, -1, 6, 0}, /* 13 */ { 6, s_5_13, 12, 5, 0}, /* 14 */ { 5, s_5_14, -1, 1, 0}, /* 15 */ { 5, s_5_15, -1, 1, 0}, /* 16 */ { 6, s_5_16, -1, 3, 0}, /* 17 */ { 3, s_5_17, -1, 1, 0}, /* 18 */ { 4, s_5_18, -1, 1, 0}, /* 19 */ { 3, s_5_19, -1, 1, 0}, /* 20 */ { 6, s_5_20, -1, 1, 0}, /* 21 */ { 6, s_5_21, -1, 1, 0}, /* 22 */ { 3, s_5_22, -1, 8, 0}, /* 23 */ { 6, s_5_23, -1, 1, 0}, /* 24 */ { 4, s_5_24, -1, 1, 0}, /* 25 */ { 4, s_5_25, -1, 1, 0}, /* 26 */ { 7, s_5_26, -1, 4, 0}, /* 27 */ { 4, s_5_27, -1, 9, 0}, /* 28 */ { 6, s_5_28, -1, 1, 0}, /* 29 */ { 4, s_5_29, -1, 1, 0}, /* 30 */ { 5, s_5_30, -1, 1, 0}, /* 31 */ { 4, s_5_31, -1, 8, 0}, /* 32 */ { 4, s_5_32, -1, 1, 0}, /* 33 */ { 7, s_5_33, -1, 2, 0}, /* 34 */ { 6, s_5_34, -1, 7, 0}, /* 35 */ { 7, s_5_35, -1, 3, 0}, /* 36 */ { 6, s_5_36, -1, 1, 0}, /* 37 */ { 5, s_5_37, -1, 1, 0}, /* 38 */ { 7, s_5_38, -1, 1, 0}, /* 39 */ { 4, s_5_39, -1, 1, 0}, /* 40 */ { 5, s_5_40, -1, 1, 0}, /* 41 */ { 4, s_5_41, -1, 1, 0}, /* 42 */ { 7, s_5_42, -1, 1, 0}, /* 43 */ { 7, s_5_43, -1, 1, 0}, /* 44 */ { 4, s_5_44, -1, 8, 0} }; static const symbol s_6_0[3] = { 'a', 'd', 'a' }; static const symbol s_6_1[3] = { 'i', 'd', 'a' }; static const symbol s_6_2[2] = { 'i', 'a' }; static const symbol s_6_3[4] = { 'a', 'r', 'i', 'a' }; static const symbol s_6_4[4] = { 'e', 'r', 'i', 'a' }; static const symbol s_6_5[4] = { 'i', 'r', 'i', 'a' }; static const symbol s_6_6[3] = { 'a', 'r', 'a' }; static const symbol s_6_7[3] = { 'e', 'r', 'a' }; static const symbol s_6_8[3] = { 'i', 'r', 'a' }; static const symbol s_6_9[3] = { 'a', 'v', 'a' }; static const symbol s_6_10[4] = { 'a', 's', 's', 'e' }; static const symbol s_6_11[4] = { 'e', 's', 's', 'e' }; static const symbol s_6_12[4] = { 'i', 's', 's', 'e' }; static const symbol s_6_13[4] = { 'a', 's', 't', 'e' }; static const symbol s_6_14[4] = { 'e', 's', 't', 'e' }; static const symbol s_6_15[4] = { 'i', 's', 't', 'e' }; static const symbol s_6_16[2] = { 'e', 'i' }; static const symbol s_6_17[4] = { 'a', 'r', 'e', 'i' }; static const symbol s_6_18[4] = { 'e', 'r', 'e', 'i' }; static const symbol s_6_19[4] = { 'i', 'r', 'e', 'i' }; static const symbol s_6_20[2] = { 'a', 'm' }; static const symbol s_6_21[3] = { 'i', 'a', 'm' }; static const symbol s_6_22[5] = { 'a', 'r', 'i', 'a', 'm' }; static const symbol s_6_23[5] = { 'e', 'r', 'i', 'a', 'm' }; static const symbol s_6_24[5] = { 'i', 'r', 'i', 'a', 'm' }; static const symbol s_6_25[4] = { 'a', 'r', 'a', 'm' }; static const symbol s_6_26[4] = { 'e', 'r', 'a', 'm' }; static const symbol s_6_27[4] = { 'i', 'r', 'a', 'm' }; static const symbol s_6_28[4] = { 'a', 'v', 'a', 'm' }; static const symbol s_6_29[2] = { 'e', 'm' }; static const symbol s_6_30[4] = { 'a', 'r', 'e', 'm' }; static const symbol s_6_31[4] = { 'e', 'r', 'e', 'm' }; static const symbol s_6_32[4] = { 'i', 'r', 'e', 'm' }; static const symbol s_6_33[5] = { 'a', 's', 's', 'e', 'm' }; static const symbol s_6_34[5] = { 'e', 's', 's', 'e', 'm' }; static const symbol s_6_35[5] = { 'i', 's', 's', 'e', 'm' }; static const symbol s_6_36[3] = { 'a', 'd', 'o' }; static const symbol s_6_37[3] = { 'i', 'd', 'o' }; static const symbol s_6_38[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_6_39[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_6_40[4] = { 'i', 'n', 'd', 'o' }; static const symbol s_6_41[5] = { 'a', 'r', 'a', '~', 'o' }; static const symbol s_6_42[5] = { 'e', 'r', 'a', '~', 'o' }; static const symbol s_6_43[5] = { 'i', 'r', 'a', '~', 'o' }; static const symbol s_6_44[2] = { 'a', 'r' }; static const symbol s_6_45[2] = { 'e', 'r' }; static const symbol s_6_46[2] = { 'i', 'r' }; static const symbol s_6_47[2] = { 'a', 's' }; static const symbol s_6_48[4] = { 'a', 'd', 'a', 's' }; static const symbol s_6_49[4] = { 'i', 'd', 'a', 's' }; static const symbol s_6_50[3] = { 'i', 'a', 's' }; static const symbol s_6_51[5] = { 'a', 'r', 'i', 'a', 's' }; static const symbol s_6_52[5] = { 'e', 'r', 'i', 'a', 's' }; static const symbol s_6_53[5] = { 'i', 'r', 'i', 'a', 's' }; static const symbol s_6_54[4] = { 'a', 'r', 'a', 's' }; static const symbol s_6_55[4] = { 'e', 'r', 'a', 's' }; static const symbol s_6_56[4] = { 'i', 'r', 'a', 's' }; static const symbol s_6_57[4] = { 'a', 'v', 'a', 's' }; static const symbol s_6_58[2] = { 'e', 's' }; static const symbol s_6_59[5] = { 'a', 'r', 'd', 'e', 's' }; static const symbol s_6_60[5] = { 'e', 'r', 'd', 'e', 's' }; static const symbol s_6_61[5] = { 'i', 'r', 'd', 'e', 's' }; static const symbol s_6_62[4] = { 'a', 'r', 'e', 's' }; static const symbol s_6_63[4] = { 'e', 'r', 'e', 's' }; static const symbol s_6_64[4] = { 'i', 'r', 'e', 's' }; static const symbol s_6_65[5] = { 'a', 's', 's', 'e', 's' }; static const symbol s_6_66[5] = { 'e', 's', 's', 'e', 's' }; static const symbol s_6_67[5] = { 'i', 's', 's', 'e', 's' }; static const symbol s_6_68[5] = { 'a', 's', 't', 'e', 's' }; static const symbol s_6_69[5] = { 'e', 's', 't', 'e', 's' }; static const symbol s_6_70[5] = { 'i', 's', 't', 'e', 's' }; static const symbol s_6_71[2] = { 'i', 's' }; static const symbol s_6_72[3] = { 'a', 'i', 's' }; static const symbol s_6_73[3] = { 'e', 'i', 's' }; static const symbol s_6_74[5] = { 'a', 'r', 'e', 'i', 's' }; static const symbol s_6_75[5] = { 'e', 'r', 'e', 'i', 's' }; static const symbol s_6_76[5] = { 'i', 'r', 'e', 'i', 's' }; static const symbol s_6_77[6] = { 0xC3, 0xA1, 'r', 'e', 'i', 's' }; static const symbol s_6_78[6] = { 0xC3, 0xA9, 'r', 'e', 'i', 's' }; static const symbol s_6_79[6] = { 0xC3, 0xAD, 'r', 'e', 'i', 's' }; static const symbol s_6_80[7] = { 0xC3, 0xA1, 's', 's', 'e', 'i', 's' }; static const symbol s_6_81[7] = { 0xC3, 0xA9, 's', 's', 'e', 'i', 's' }; static const symbol s_6_82[7] = { 0xC3, 0xAD, 's', 's', 'e', 'i', 's' }; static const symbol s_6_83[6] = { 0xC3, 0xA1, 'v', 'e', 'i', 's' }; static const symbol s_6_84[5] = { 0xC3, 0xAD, 'e', 'i', 's' }; static const symbol s_6_85[7] = { 'a', 'r', 0xC3, 0xAD, 'e', 'i', 's' }; static const symbol s_6_86[7] = { 'e', 'r', 0xC3, 0xAD, 'e', 'i', 's' }; static const symbol s_6_87[7] = { 'i', 'r', 0xC3, 0xAD, 'e', 'i', 's' }; static const symbol s_6_88[4] = { 'a', 'd', 'o', 's' }; static const symbol s_6_89[4] = { 'i', 'd', 'o', 's' }; static const symbol s_6_90[4] = { 'a', 'm', 'o', 's' }; static const symbol s_6_91[7] = { 0xC3, 0xA1, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_92[7] = { 0xC3, 0xA9, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_93[7] = { 0xC3, 0xAD, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_94[7] = { 0xC3, 0xA1, 'v', 'a', 'm', 'o', 's' }; static const symbol s_6_95[6] = { 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_6_96[8] = { 'a', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_6_97[8] = { 'e', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_6_98[8] = { 'i', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_6_99[4] = { 'e', 'm', 'o', 's' }; static const symbol s_6_100[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_101[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_102[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_103[8] = { 0xC3, 0xA1, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_104[8] = { 0xC3, 0xAA, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_105[8] = { 0xC3, 0xAD, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_106[4] = { 'i', 'm', 'o', 's' }; static const symbol s_6_107[5] = { 'a', 'r', 'm', 'o', 's' }; static const symbol s_6_108[5] = { 'e', 'r', 'm', 'o', 's' }; static const symbol s_6_109[5] = { 'i', 'r', 'm', 'o', 's' }; static const symbol s_6_110[5] = { 0xC3, 0xA1, 'm', 'o', 's' }; static const symbol s_6_111[5] = { 'a', 'r', 0xC3, 0xA1, 's' }; static const symbol s_6_112[5] = { 'e', 'r', 0xC3, 0xA1, 's' }; static const symbol s_6_113[5] = { 'i', 'r', 0xC3, 0xA1, 's' }; static const symbol s_6_114[2] = { 'e', 'u' }; static const symbol s_6_115[2] = { 'i', 'u' }; static const symbol s_6_116[2] = { 'o', 'u' }; static const symbol s_6_117[4] = { 'a', 'r', 0xC3, 0xA1 }; static const symbol s_6_118[4] = { 'e', 'r', 0xC3, 0xA1 }; static const symbol s_6_119[4] = { 'i', 'r', 0xC3, 0xA1 }; static const struct among a_6[120] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 3, s_6_1, -1, 1, 0}, /* 2 */ { 2, s_6_2, -1, 1, 0}, /* 3 */ { 4, s_6_3, 2, 1, 0}, /* 4 */ { 4, s_6_4, 2, 1, 0}, /* 5 */ { 4, s_6_5, 2, 1, 0}, /* 6 */ { 3, s_6_6, -1, 1, 0}, /* 7 */ { 3, s_6_7, -1, 1, 0}, /* 8 */ { 3, s_6_8, -1, 1, 0}, /* 9 */ { 3, s_6_9, -1, 1, 0}, /* 10 */ { 4, s_6_10, -1, 1, 0}, /* 11 */ { 4, s_6_11, -1, 1, 0}, /* 12 */ { 4, s_6_12, -1, 1, 0}, /* 13 */ { 4, s_6_13, -1, 1, 0}, /* 14 */ { 4, s_6_14, -1, 1, 0}, /* 15 */ { 4, s_6_15, -1, 1, 0}, /* 16 */ { 2, s_6_16, -1, 1, 0}, /* 17 */ { 4, s_6_17, 16, 1, 0}, /* 18 */ { 4, s_6_18, 16, 1, 0}, /* 19 */ { 4, s_6_19, 16, 1, 0}, /* 20 */ { 2, s_6_20, -1, 1, 0}, /* 21 */ { 3, s_6_21, 20, 1, 0}, /* 22 */ { 5, s_6_22, 21, 1, 0}, /* 23 */ { 5, s_6_23, 21, 1, 0}, /* 24 */ { 5, s_6_24, 21, 1, 0}, /* 25 */ { 4, s_6_25, 20, 1, 0}, /* 26 */ { 4, s_6_26, 20, 1, 0}, /* 27 */ { 4, s_6_27, 20, 1, 0}, /* 28 */ { 4, s_6_28, 20, 1, 0}, /* 29 */ { 2, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, 29, 1, 0}, /* 31 */ { 4, s_6_31, 29, 1, 0}, /* 32 */ { 4, s_6_32, 29, 1, 0}, /* 33 */ { 5, s_6_33, 29, 1, 0}, /* 34 */ { 5, s_6_34, 29, 1, 0}, /* 35 */ { 5, s_6_35, 29, 1, 0}, /* 36 */ { 3, s_6_36, -1, 1, 0}, /* 37 */ { 3, s_6_37, -1, 1, 0}, /* 38 */ { 4, s_6_38, -1, 1, 0}, /* 39 */ { 4, s_6_39, -1, 1, 0}, /* 40 */ { 4, s_6_40, -1, 1, 0}, /* 41 */ { 5, s_6_41, -1, 1, 0}, /* 42 */ { 5, s_6_42, -1, 1, 0}, /* 43 */ { 5, s_6_43, -1, 1, 0}, /* 44 */ { 2, s_6_44, -1, 1, 0}, /* 45 */ { 2, s_6_45, -1, 1, 0}, /* 46 */ { 2, s_6_46, -1, 1, 0}, /* 47 */ { 2, s_6_47, -1, 1, 0}, /* 48 */ { 4, s_6_48, 47, 1, 0}, /* 49 */ { 4, s_6_49, 47, 1, 0}, /* 50 */ { 3, s_6_50, 47, 1, 0}, /* 51 */ { 5, s_6_51, 50, 1, 0}, /* 52 */ { 5, s_6_52, 50, 1, 0}, /* 53 */ { 5, s_6_53, 50, 1, 0}, /* 54 */ { 4, s_6_54, 47, 1, 0}, /* 55 */ { 4, s_6_55, 47, 1, 0}, /* 56 */ { 4, s_6_56, 47, 1, 0}, /* 57 */ { 4, s_6_57, 47, 1, 0}, /* 58 */ { 2, s_6_58, -1, 1, 0}, /* 59 */ { 5, s_6_59, 58, 1, 0}, /* 60 */ { 5, s_6_60, 58, 1, 0}, /* 61 */ { 5, s_6_61, 58, 1, 0}, /* 62 */ { 4, s_6_62, 58, 1, 0}, /* 63 */ { 4, s_6_63, 58, 1, 0}, /* 64 */ { 4, s_6_64, 58, 1, 0}, /* 65 */ { 5, s_6_65, 58, 1, 0}, /* 66 */ { 5, s_6_66, 58, 1, 0}, /* 67 */ { 5, s_6_67, 58, 1, 0}, /* 68 */ { 5, s_6_68, 58, 1, 0}, /* 69 */ { 5, s_6_69, 58, 1, 0}, /* 70 */ { 5, s_6_70, 58, 1, 0}, /* 71 */ { 2, s_6_71, -1, 1, 0}, /* 72 */ { 3, s_6_72, 71, 1, 0}, /* 73 */ { 3, s_6_73, 71, 1, 0}, /* 74 */ { 5, s_6_74, 73, 1, 0}, /* 75 */ { 5, s_6_75, 73, 1, 0}, /* 76 */ { 5, s_6_76, 73, 1, 0}, /* 77 */ { 6, s_6_77, 73, 1, 0}, /* 78 */ { 6, s_6_78, 73, 1, 0}, /* 79 */ { 6, s_6_79, 73, 1, 0}, /* 80 */ { 7, s_6_80, 73, 1, 0}, /* 81 */ { 7, s_6_81, 73, 1, 0}, /* 82 */ { 7, s_6_82, 73, 1, 0}, /* 83 */ { 6, s_6_83, 73, 1, 0}, /* 84 */ { 5, s_6_84, 73, 1, 0}, /* 85 */ { 7, s_6_85, 84, 1, 0}, /* 86 */ { 7, s_6_86, 84, 1, 0}, /* 87 */ { 7, s_6_87, 84, 1, 0}, /* 88 */ { 4, s_6_88, -1, 1, 0}, /* 89 */ { 4, s_6_89, -1, 1, 0}, /* 90 */ { 4, s_6_90, -1, 1, 0}, /* 91 */ { 7, s_6_91, 90, 1, 0}, /* 92 */ { 7, s_6_92, 90, 1, 0}, /* 93 */ { 7, s_6_93, 90, 1, 0}, /* 94 */ { 7, s_6_94, 90, 1, 0}, /* 95 */ { 6, s_6_95, 90, 1, 0}, /* 96 */ { 8, s_6_96, 95, 1, 0}, /* 97 */ { 8, s_6_97, 95, 1, 0}, /* 98 */ { 8, s_6_98, 95, 1, 0}, /* 99 */ { 4, s_6_99, -1, 1, 0}, /*100 */ { 6, s_6_100, 99, 1, 0}, /*101 */ { 6, s_6_101, 99, 1, 0}, /*102 */ { 6, s_6_102, 99, 1, 0}, /*103 */ { 8, s_6_103, 99, 1, 0}, /*104 */ { 8, s_6_104, 99, 1, 0}, /*105 */ { 8, s_6_105, 99, 1, 0}, /*106 */ { 4, s_6_106, -1, 1, 0}, /*107 */ { 5, s_6_107, -1, 1, 0}, /*108 */ { 5, s_6_108, -1, 1, 0}, /*109 */ { 5, s_6_109, -1, 1, 0}, /*110 */ { 5, s_6_110, -1, 1, 0}, /*111 */ { 5, s_6_111, -1, 1, 0}, /*112 */ { 5, s_6_112, -1, 1, 0}, /*113 */ { 5, s_6_113, -1, 1, 0}, /*114 */ { 2, s_6_114, -1, 1, 0}, /*115 */ { 2, s_6_115, -1, 1, 0}, /*116 */ { 2, s_6_116, -1, 1, 0}, /*117 */ { 4, s_6_117, -1, 1, 0}, /*118 */ { 4, s_6_118, -1, 1, 0}, /*119 */ { 4, s_6_119, -1, 1, 0} }; static const symbol s_7_0[1] = { 'a' }; static const symbol s_7_1[1] = { 'i' }; static const symbol s_7_2[1] = { 'o' }; static const symbol s_7_3[2] = { 'o', 's' }; static const symbol s_7_4[2] = { 0xC3, 0xA1 }; static const symbol s_7_5[2] = { 0xC3, 0xAD }; static const symbol s_7_6[2] = { 0xC3, 0xB3 }; static const struct among a_7[7] = { /* 0 */ { 1, s_7_0, -1, 1, 0}, /* 1 */ { 1, s_7_1, -1, 1, 0}, /* 2 */ { 1, s_7_2, -1, 1, 0}, /* 3 */ { 2, s_7_3, -1, 1, 0}, /* 4 */ { 2, s_7_4, -1, 1, 0}, /* 5 */ { 2, s_7_5, -1, 1, 0}, /* 6 */ { 2, s_7_6, -1, 1, 0} }; static const symbol s_8_0[1] = { 'e' }; static const symbol s_8_1[2] = { 0xC3, 0xA7 }; static const symbol s_8_2[2] = { 0xC3, 0xA9 }; static const symbol s_8_3[2] = { 0xC3, 0xAA }; static const struct among a_8[4] = { /* 0 */ { 1, s_8_0, -1, 1, 0}, /* 1 */ { 2, s_8_1, -1, 2, 0}, /* 2 */ { 2, s_8_2, -1, 1, 0}, /* 3 */ { 2, s_8_3, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2 }; static const symbol s_0[] = { 'a', '~' }; static const symbol s_1[] = { 'o', '~' }; static const symbol s_2[] = { 0xC3, 0xA3 }; static const symbol s_3[] = { 0xC3, 0xB5 }; static const symbol s_4[] = { 'l', 'o', 'g' }; static const symbol s_5[] = { 'u' }; static const symbol s_6[] = { 'e', 'n', 't', 'e' }; static const symbol s_7[] = { 'a', 't' }; static const symbol s_8[] = { 'a', 't' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'i', 'r' }; static const symbol s_11[] = { 'u' }; static const symbol s_12[] = { 'g' }; static const symbol s_13[] = { 'i' }; static const symbol s_14[] = { 'c' }; static const symbol s_15[] = { 'c' }; static const symbol s_16[] = { 'i' }; static const symbol s_17[] = { 'c' }; static int r_prelude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 36 */ int c1 = z->c; z->bra = z->c; /* [, line 37 */ if (z->c + 1 >= z->l || (z->p[z->c + 1] != 163 && z->p[z->c + 1] != 181)) among_var = 3; else among_var = find_among(z, a_0, 3); /* substring, line 37 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 37 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 38 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_1); /* <-, line 39 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 40 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 50 */ { int c2 = z->c; /* or, line 52 */ if (in_grouping_U(z, g_v, 97, 250, 0)) goto lab2; { int c3 = z->c; /* or, line 51 */ if (out_grouping_U(z, g_v, 97, 250, 0)) goto lab4; { /* gopast */ /* grouping v, line 51 */ int ret = out_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping_U(z, g_v, 97, 250, 0)) goto lab2; { /* gopast */ /* non v, line 51 */ int ret = in_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping_U(z, g_v, 97, 250, 0)) goto lab0; { int c4 = z->c; /* or, line 53 */ if (out_grouping_U(z, g_v, 97, 250, 0)) goto lab6; { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping_U(z, g_v, 97, 250, 0)) goto lab0; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 53 */ } } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 54 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 56 */ { /* gopast */ /* grouping v, line 57 */ int ret = out_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 57 */ int ret = in_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 57 */ { /* gopast */ /* grouping v, line 58 */ int ret = out_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 58 */ int ret = in_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 58 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 62 */ int c1 = z->c; z->bra = z->c; /* [, line 63 */ if (z->c + 1 >= z->l || z->p[z->c + 1] != 126) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 63 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 63 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 2, s_2); /* <-, line 64 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_3); /* <-, line 65 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 66 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 77 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((839714 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 45); /* substring, line 77 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 77 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 93 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 93 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 98 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_4); /* <-, line 98 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 102 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_5); /* <-, line 102 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 106 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_6); /* <-, line 106 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 110 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 110 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ z->ket = z->c; /* [, line 112 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_2, 4); /* substring, line 112 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 112 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 112 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 112 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: z->ket = z->c; /* [, line 113 */ if (!(eq_s_b(z, 2, s_7))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 113 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 113 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } break; } lab0: ; } break; case 6: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 122 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 123 */ z->ket = z->c; /* [, line 124 */ if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_3, 3); /* substring, line 124 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 124 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 127 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ z->ket = z->c; /* [, line 136 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_4, 3); /* substring, line 136 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 136 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 139 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 139 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 146 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 146 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 147 */ z->ket = z->c; /* [, line 148 */ if (!(eq_s_b(z, 2, s_8))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 148 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 148 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 148 */ if (ret < 0) return ret; } lab3: ; } break; case 9: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 153 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_9))) return 0; { int ret = slice_from_s(z, 2, s_10); /* <-, line 154 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 159 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 159 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 160 */ among_var = find_among_b(z, a_6, 120); /* substring, line 160 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 160 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 184 */ among_var = find_among_b(z, a_7, 7); /* substring, line 184 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 184 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 187 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 187 */ if (ret < 0) return ret; } break; } return 1; } static int r_residual_form(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 192 */ among_var = find_among_b(z, a_8, 4); /* substring, line 192 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 192 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 194 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 194 */ { int m1 = z->l - z->c; (void)m1; /* or, line 194 */ if (!(eq_s_b(z, 1, s_11))) goto lab1; z->bra = z->c; /* ], line 194 */ { int m_test = z->l - z->c; /* test, line 194 */ if (!(eq_s_b(z, 1, s_12))) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_13))) return 0; z->bra = z->c; /* ], line 195 */ { int m_test = z->l - z->c; /* test, line 195 */ if (!(eq_s_b(z, 1, s_14))) return 0; z->c = z->l - m_test; } } lab0: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 195 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 195 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_15); /* <-, line 196 */ if (ret < 0) return ret; } break; } return 1; } extern int portuguese_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 202 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 202 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 203 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 203 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 204 */ { int m3 = z->l - z->c; (void)m3; /* do, line 205 */ { int m4 = z->l - z->c; (void)m4; /* or, line 209 */ { int m5 = z->l - z->c; (void)m5; /* and, line 207 */ { int m6 = z->l - z->c; (void)m6; /* or, line 206 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab6; /* call standard_suffix, line 206 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 206 */ if (ret < 0) return ret; } } lab5: z->c = z->l - m5; { int m7 = z->l - z->c; (void)m7; /* do, line 207 */ z->ket = z->c; /* [, line 207 */ if (!(eq_s_b(z, 1, s_16))) goto lab7; z->bra = z->c; /* ], line 207 */ { int m_test = z->l - z->c; /* test, line 207 */ if (!(eq_s_b(z, 1, s_17))) goto lab7; z->c = z->l - m_test; } { int ret = r_RV(z); if (ret == 0) goto lab7; /* call RV, line 207 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 207 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } } goto lab3; lab4: z->c = z->l - m4; { int ret = r_residual_suffix(z); if (ret == 0) goto lab2; /* call residual_suffix, line 209 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m8 = z->l - z->c; (void)m8; /* do, line 211 */ { int ret = r_residual_form(z); if (ret == 0) goto lab8; /* call residual_form, line 211 */ if (ret < 0) return ret; } lab8: z->c = z->l - m8; } z->c = z->lb; { int c9 = z->c; /* do, line 213 */ { int ret = r_postlude(z); if (ret == 0) goto lab9; /* call postlude, line 213 */ if (ret < 0) return ret; } lab9: z->c = c9; } return 1; } extern struct SN_env * portuguese_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void portuguese_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_portuguese.h000066400000000000000000000005021217574114600314660ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * portuguese_UTF_8_create_env(void); extern void portuguese_UTF_8_close_env(struct SN_env * z); extern int portuguese_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_romanian.c000066400000000000000000001117521217574114600310750ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int romanian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_vowel_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_combo_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_step_0(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * romanian_UTF_8_create_env(void); extern void romanian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 'I' }; static const symbol s_0_2[1] = { 'U' }; static const struct among a_0[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 2, 0} }; static const symbol s_1_0[2] = { 'e', 'a' }; static const symbol s_1_1[5] = { 'a', 0xC5, 0xA3, 'i', 'a' }; static const symbol s_1_2[3] = { 'a', 'u', 'a' }; static const symbol s_1_3[3] = { 'i', 'u', 'a' }; static const symbol s_1_4[5] = { 'a', 0xC5, 0xA3, 'i', 'e' }; static const symbol s_1_5[3] = { 'e', 'l', 'e' }; static const symbol s_1_6[3] = { 'i', 'l', 'e' }; static const symbol s_1_7[4] = { 'i', 'i', 'l', 'e' }; static const symbol s_1_8[3] = { 'i', 'e', 'i' }; static const symbol s_1_9[4] = { 'a', 't', 'e', 'i' }; static const symbol s_1_10[2] = { 'i', 'i' }; static const symbol s_1_11[4] = { 'u', 'l', 'u', 'i' }; static const symbol s_1_12[2] = { 'u', 'l' }; static const symbol s_1_13[4] = { 'e', 'l', 'o', 'r' }; static const symbol s_1_14[4] = { 'i', 'l', 'o', 'r' }; static const symbol s_1_15[5] = { 'i', 'i', 'l', 'o', 'r' }; static const struct among a_1[16] = { /* 0 */ { 2, s_1_0, -1, 3, 0}, /* 1 */ { 5, s_1_1, -1, 7, 0}, /* 2 */ { 3, s_1_2, -1, 2, 0}, /* 3 */ { 3, s_1_3, -1, 4, 0}, /* 4 */ { 5, s_1_4, -1, 7, 0}, /* 5 */ { 3, s_1_5, -1, 3, 0}, /* 6 */ { 3, s_1_6, -1, 5, 0}, /* 7 */ { 4, s_1_7, 6, 4, 0}, /* 8 */ { 3, s_1_8, -1, 4, 0}, /* 9 */ { 4, s_1_9, -1, 6, 0}, /* 10 */ { 2, s_1_10, -1, 4, 0}, /* 11 */ { 4, s_1_11, -1, 1, 0}, /* 12 */ { 2, s_1_12, -1, 1, 0}, /* 13 */ { 4, s_1_13, -1, 3, 0}, /* 14 */ { 4, s_1_14, -1, 4, 0}, /* 15 */ { 5, s_1_15, 14, 4, 0} }; static const symbol s_2_0[5] = { 'i', 'c', 'a', 'l', 'a' }; static const symbol s_2_1[5] = { 'i', 'c', 'i', 'v', 'a' }; static const symbol s_2_2[5] = { 'a', 't', 'i', 'v', 'a' }; static const symbol s_2_3[5] = { 'i', 't', 'i', 'v', 'a' }; static const symbol s_2_4[5] = { 'i', 'c', 'a', 'l', 'e' }; static const symbol s_2_5[7] = { 'a', 0xC5, 0xA3, 'i', 'u', 'n', 'e' }; static const symbol s_2_6[7] = { 'i', 0xC5, 0xA3, 'i', 'u', 'n', 'e' }; static const symbol s_2_7[6] = { 'a', 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_8[6] = { 'i', 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_9[7] = { 0xC4, 0x83, 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_10[7] = { 'i', 'c', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_11[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_12[9] = { 'i', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_13[7] = { 'i', 'v', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_14[5] = { 'i', 'c', 'i', 'v', 'e' }; static const symbol s_2_15[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_2_16[5] = { 'i', 't', 'i', 'v', 'e' }; static const symbol s_2_17[5] = { 'i', 'c', 'a', 'l', 'i' }; static const symbol s_2_18[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_2_19[7] = { 'i', 'c', 'a', 't', 'o', 'r', 'i' }; static const symbol s_2_20[5] = { 'i', 't', 'o', 'r', 'i' }; static const symbol s_2_21[6] = { 0xC4, 0x83, 't', 'o', 'r', 'i' }; static const symbol s_2_22[7] = { 'i', 'c', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_23[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_24[7] = { 'i', 'v', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_25[5] = { 'i', 'c', 'i', 'v', 'i' }; static const symbol s_2_26[5] = { 'a', 't', 'i', 'v', 'i' }; static const symbol s_2_27[5] = { 'i', 't', 'i', 'v', 'i' }; static const symbol s_2_28[7] = { 'i', 'c', 'i', 't', 0xC4, 0x83, 'i' }; static const symbol s_2_29[9] = { 'a', 'b', 'i', 'l', 'i', 't', 0xC4, 0x83, 'i' }; static const symbol s_2_30[7] = { 'i', 'v', 'i', 't', 0xC4, 0x83, 'i' }; static const symbol s_2_31[9] = { 'i', 'c', 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_2_32[11] = { 'a', 'b', 'i', 'l', 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_2_33[9] = { 'i', 'v', 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_2_34[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_2_35[4] = { 'a', 't', 'o', 'r' }; static const symbol s_2_36[6] = { 'i', 'c', 'a', 't', 'o', 'r' }; static const symbol s_2_37[4] = { 'i', 't', 'o', 'r' }; static const symbol s_2_38[5] = { 0xC4, 0x83, 't', 'o', 'r' }; static const symbol s_2_39[4] = { 'i', 'c', 'i', 'v' }; static const symbol s_2_40[4] = { 'a', 't', 'i', 'v' }; static const symbol s_2_41[4] = { 'i', 't', 'i', 'v' }; static const symbol s_2_42[6] = { 'i', 'c', 'a', 'l', 0xC4, 0x83 }; static const symbol s_2_43[6] = { 'i', 'c', 'i', 'v', 0xC4, 0x83 }; static const symbol s_2_44[6] = { 'a', 't', 'i', 'v', 0xC4, 0x83 }; static const symbol s_2_45[6] = { 'i', 't', 'i', 'v', 0xC4, 0x83 }; static const struct among a_2[46] = { /* 0 */ { 5, s_2_0, -1, 4, 0}, /* 1 */ { 5, s_2_1, -1, 4, 0}, /* 2 */ { 5, s_2_2, -1, 5, 0}, /* 3 */ { 5, s_2_3, -1, 6, 0}, /* 4 */ { 5, s_2_4, -1, 4, 0}, /* 5 */ { 7, s_2_5, -1, 5, 0}, /* 6 */ { 7, s_2_6, -1, 6, 0}, /* 7 */ { 6, s_2_7, -1, 5, 0}, /* 8 */ { 6, s_2_8, -1, 6, 0}, /* 9 */ { 7, s_2_9, -1, 5, 0}, /* 10 */ { 7, s_2_10, -1, 4, 0}, /* 11 */ { 9, s_2_11, -1, 1, 0}, /* 12 */ { 9, s_2_12, -1, 2, 0}, /* 13 */ { 7, s_2_13, -1, 3, 0}, /* 14 */ { 5, s_2_14, -1, 4, 0}, /* 15 */ { 5, s_2_15, -1, 5, 0}, /* 16 */ { 5, s_2_16, -1, 6, 0}, /* 17 */ { 5, s_2_17, -1, 4, 0}, /* 18 */ { 5, s_2_18, -1, 5, 0}, /* 19 */ { 7, s_2_19, 18, 4, 0}, /* 20 */ { 5, s_2_20, -1, 6, 0}, /* 21 */ { 6, s_2_21, -1, 5, 0}, /* 22 */ { 7, s_2_22, -1, 4, 0}, /* 23 */ { 9, s_2_23, -1, 1, 0}, /* 24 */ { 7, s_2_24, -1, 3, 0}, /* 25 */ { 5, s_2_25, -1, 4, 0}, /* 26 */ { 5, s_2_26, -1, 5, 0}, /* 27 */ { 5, s_2_27, -1, 6, 0}, /* 28 */ { 7, s_2_28, -1, 4, 0}, /* 29 */ { 9, s_2_29, -1, 1, 0}, /* 30 */ { 7, s_2_30, -1, 3, 0}, /* 31 */ { 9, s_2_31, -1, 4, 0}, /* 32 */ { 11, s_2_32, -1, 1, 0}, /* 33 */ { 9, s_2_33, -1, 3, 0}, /* 34 */ { 4, s_2_34, -1, 4, 0}, /* 35 */ { 4, s_2_35, -1, 5, 0}, /* 36 */ { 6, s_2_36, 35, 4, 0}, /* 37 */ { 4, s_2_37, -1, 6, 0}, /* 38 */ { 5, s_2_38, -1, 5, 0}, /* 39 */ { 4, s_2_39, -1, 4, 0}, /* 40 */ { 4, s_2_40, -1, 5, 0}, /* 41 */ { 4, s_2_41, -1, 6, 0}, /* 42 */ { 6, s_2_42, -1, 4, 0}, /* 43 */ { 6, s_2_43, -1, 4, 0}, /* 44 */ { 6, s_2_44, -1, 5, 0}, /* 45 */ { 6, s_2_45, -1, 6, 0} }; static const symbol s_3_0[3] = { 'i', 'c', 'a' }; static const symbol s_3_1[5] = { 'a', 'b', 'i', 'l', 'a' }; static const symbol s_3_2[5] = { 'i', 'b', 'i', 'l', 'a' }; static const symbol s_3_3[4] = { 'o', 'a', 's', 'a' }; static const symbol s_3_4[3] = { 'a', 't', 'a' }; static const symbol s_3_5[3] = { 'i', 't', 'a' }; static const symbol s_3_6[4] = { 'a', 'n', 't', 'a' }; static const symbol s_3_7[4] = { 'i', 's', 't', 'a' }; static const symbol s_3_8[3] = { 'u', 't', 'a' }; static const symbol s_3_9[3] = { 'i', 'v', 'a' }; static const symbol s_3_10[2] = { 'i', 'c' }; static const symbol s_3_11[3] = { 'i', 'c', 'e' }; static const symbol s_3_12[5] = { 'a', 'b', 'i', 'l', 'e' }; static const symbol s_3_13[5] = { 'i', 'b', 'i', 'l', 'e' }; static const symbol s_3_14[4] = { 'i', 's', 'm', 'e' }; static const symbol s_3_15[4] = { 'i', 'u', 'n', 'e' }; static const symbol s_3_16[4] = { 'o', 'a', 's', 'e' }; static const symbol s_3_17[3] = { 'a', 't', 'e' }; static const symbol s_3_18[5] = { 'i', 't', 'a', 't', 'e' }; static const symbol s_3_19[3] = { 'i', 't', 'e' }; static const symbol s_3_20[4] = { 'a', 'n', 't', 'e' }; static const symbol s_3_21[4] = { 'i', 's', 't', 'e' }; static const symbol s_3_22[3] = { 'u', 't', 'e' }; static const symbol s_3_23[3] = { 'i', 'v', 'e' }; static const symbol s_3_24[3] = { 'i', 'c', 'i' }; static const symbol s_3_25[5] = { 'a', 'b', 'i', 'l', 'i' }; static const symbol s_3_26[5] = { 'i', 'b', 'i', 'l', 'i' }; static const symbol s_3_27[4] = { 'i', 'u', 'n', 'i' }; static const symbol s_3_28[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_3_29[3] = { 'o', 's', 'i' }; static const symbol s_3_30[3] = { 'a', 't', 'i' }; static const symbol s_3_31[5] = { 'i', 't', 'a', 't', 'i' }; static const symbol s_3_32[3] = { 'i', 't', 'i' }; static const symbol s_3_33[4] = { 'a', 'n', 't', 'i' }; static const symbol s_3_34[4] = { 'i', 's', 't', 'i' }; static const symbol s_3_35[3] = { 'u', 't', 'i' }; static const symbol s_3_36[5] = { 'i', 0xC5, 0x9F, 't', 'i' }; static const symbol s_3_37[3] = { 'i', 'v', 'i' }; static const symbol s_3_38[5] = { 'i', 't', 0xC4, 0x83, 'i' }; static const symbol s_3_39[4] = { 'o', 0xC5, 0x9F, 'i' }; static const symbol s_3_40[7] = { 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_3_41[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_3_42[4] = { 'i', 'b', 'i', 'l' }; static const symbol s_3_43[3] = { 'i', 's', 'm' }; static const symbol s_3_44[4] = { 'a', 't', 'o', 'r' }; static const symbol s_3_45[2] = { 'o', 's' }; static const symbol s_3_46[2] = { 'a', 't' }; static const symbol s_3_47[2] = { 'i', 't' }; static const symbol s_3_48[3] = { 'a', 'n', 't' }; static const symbol s_3_49[3] = { 'i', 's', 't' }; static const symbol s_3_50[2] = { 'u', 't' }; static const symbol s_3_51[2] = { 'i', 'v' }; static const symbol s_3_52[4] = { 'i', 'c', 0xC4, 0x83 }; static const symbol s_3_53[6] = { 'a', 'b', 'i', 'l', 0xC4, 0x83 }; static const symbol s_3_54[6] = { 'i', 'b', 'i', 'l', 0xC4, 0x83 }; static const symbol s_3_55[5] = { 'o', 'a', 's', 0xC4, 0x83 }; static const symbol s_3_56[4] = { 'a', 't', 0xC4, 0x83 }; static const symbol s_3_57[4] = { 'i', 't', 0xC4, 0x83 }; static const symbol s_3_58[5] = { 'a', 'n', 't', 0xC4, 0x83 }; static const symbol s_3_59[5] = { 'i', 's', 't', 0xC4, 0x83 }; static const symbol s_3_60[4] = { 'u', 't', 0xC4, 0x83 }; static const symbol s_3_61[4] = { 'i', 'v', 0xC4, 0x83 }; static const struct among a_3[62] = { /* 0 */ { 3, s_3_0, -1, 1, 0}, /* 1 */ { 5, s_3_1, -1, 1, 0}, /* 2 */ { 5, s_3_2, -1, 1, 0}, /* 3 */ { 4, s_3_3, -1, 1, 0}, /* 4 */ { 3, s_3_4, -1, 1, 0}, /* 5 */ { 3, s_3_5, -1, 1, 0}, /* 6 */ { 4, s_3_6, -1, 1, 0}, /* 7 */ { 4, s_3_7, -1, 3, 0}, /* 8 */ { 3, s_3_8, -1, 1, 0}, /* 9 */ { 3, s_3_9, -1, 1, 0}, /* 10 */ { 2, s_3_10, -1, 1, 0}, /* 11 */ { 3, s_3_11, -1, 1, 0}, /* 12 */ { 5, s_3_12, -1, 1, 0}, /* 13 */ { 5, s_3_13, -1, 1, 0}, /* 14 */ { 4, s_3_14, -1, 3, 0}, /* 15 */ { 4, s_3_15, -1, 2, 0}, /* 16 */ { 4, s_3_16, -1, 1, 0}, /* 17 */ { 3, s_3_17, -1, 1, 0}, /* 18 */ { 5, s_3_18, 17, 1, 0}, /* 19 */ { 3, s_3_19, -1, 1, 0}, /* 20 */ { 4, s_3_20, -1, 1, 0}, /* 21 */ { 4, s_3_21, -1, 3, 0}, /* 22 */ { 3, s_3_22, -1, 1, 0}, /* 23 */ { 3, s_3_23, -1, 1, 0}, /* 24 */ { 3, s_3_24, -1, 1, 0}, /* 25 */ { 5, s_3_25, -1, 1, 0}, /* 26 */ { 5, s_3_26, -1, 1, 0}, /* 27 */ { 4, s_3_27, -1, 2, 0}, /* 28 */ { 5, s_3_28, -1, 1, 0}, /* 29 */ { 3, s_3_29, -1, 1, 0}, /* 30 */ { 3, s_3_30, -1, 1, 0}, /* 31 */ { 5, s_3_31, 30, 1, 0}, /* 32 */ { 3, s_3_32, -1, 1, 0}, /* 33 */ { 4, s_3_33, -1, 1, 0}, /* 34 */ { 4, s_3_34, -1, 3, 0}, /* 35 */ { 3, s_3_35, -1, 1, 0}, /* 36 */ { 5, s_3_36, -1, 3, 0}, /* 37 */ { 3, s_3_37, -1, 1, 0}, /* 38 */ { 5, s_3_38, -1, 1, 0}, /* 39 */ { 4, s_3_39, -1, 1, 0}, /* 40 */ { 7, s_3_40, -1, 1, 0}, /* 41 */ { 4, s_3_41, -1, 1, 0}, /* 42 */ { 4, s_3_42, -1, 1, 0}, /* 43 */ { 3, s_3_43, -1, 3, 0}, /* 44 */ { 4, s_3_44, -1, 1, 0}, /* 45 */ { 2, s_3_45, -1, 1, 0}, /* 46 */ { 2, s_3_46, -1, 1, 0}, /* 47 */ { 2, s_3_47, -1, 1, 0}, /* 48 */ { 3, s_3_48, -1, 1, 0}, /* 49 */ { 3, s_3_49, -1, 3, 0}, /* 50 */ { 2, s_3_50, -1, 1, 0}, /* 51 */ { 2, s_3_51, -1, 1, 0}, /* 52 */ { 4, s_3_52, -1, 1, 0}, /* 53 */ { 6, s_3_53, -1, 1, 0}, /* 54 */ { 6, s_3_54, -1, 1, 0}, /* 55 */ { 5, s_3_55, -1, 1, 0}, /* 56 */ { 4, s_3_56, -1, 1, 0}, /* 57 */ { 4, s_3_57, -1, 1, 0}, /* 58 */ { 5, s_3_58, -1, 1, 0}, /* 59 */ { 5, s_3_59, -1, 3, 0}, /* 60 */ { 4, s_3_60, -1, 1, 0}, /* 61 */ { 4, s_3_61, -1, 1, 0} }; static const symbol s_4_0[2] = { 'e', 'a' }; static const symbol s_4_1[2] = { 'i', 'a' }; static const symbol s_4_2[3] = { 'e', 's', 'c' }; static const symbol s_4_3[4] = { 0xC4, 0x83, 's', 'c' }; static const symbol s_4_4[3] = { 'i', 'n', 'd' }; static const symbol s_4_5[4] = { 0xC3, 0xA2, 'n', 'd' }; static const symbol s_4_6[3] = { 'a', 'r', 'e' }; static const symbol s_4_7[3] = { 'e', 'r', 'e' }; static const symbol s_4_8[3] = { 'i', 'r', 'e' }; static const symbol s_4_9[4] = { 0xC3, 0xA2, 'r', 'e' }; static const symbol s_4_10[2] = { 's', 'e' }; static const symbol s_4_11[3] = { 'a', 's', 'e' }; static const symbol s_4_12[4] = { 's', 'e', 's', 'e' }; static const symbol s_4_13[3] = { 'i', 's', 'e' }; static const symbol s_4_14[3] = { 'u', 's', 'e' }; static const symbol s_4_15[4] = { 0xC3, 0xA2, 's', 'e' }; static const symbol s_4_16[5] = { 'e', 0xC5, 0x9F, 't', 'e' }; static const symbol s_4_17[6] = { 0xC4, 0x83, 0xC5, 0x9F, 't', 'e' }; static const symbol s_4_18[3] = { 'e', 'z', 'e' }; static const symbol s_4_19[2] = { 'a', 'i' }; static const symbol s_4_20[3] = { 'e', 'a', 'i' }; static const symbol s_4_21[3] = { 'i', 'a', 'i' }; static const symbol s_4_22[3] = { 's', 'e', 'i' }; static const symbol s_4_23[5] = { 'e', 0xC5, 0x9F, 't', 'i' }; static const symbol s_4_24[6] = { 0xC4, 0x83, 0xC5, 0x9F, 't', 'i' }; static const symbol s_4_25[2] = { 'u', 'i' }; static const symbol s_4_26[3] = { 'e', 'z', 'i' }; static const symbol s_4_27[4] = { 'a', 0xC5, 0x9F, 'i' }; static const symbol s_4_28[5] = { 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_29[6] = { 'a', 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_30[7] = { 's', 'e', 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_31[6] = { 'i', 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_32[6] = { 'u', 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_33[7] = { 0xC3, 0xA2, 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_34[4] = { 'i', 0xC5, 0x9F, 'i' }; static const symbol s_4_35[4] = { 'u', 0xC5, 0x9F, 'i' }; static const symbol s_4_36[5] = { 0xC3, 0xA2, 0xC5, 0x9F, 'i' }; static const symbol s_4_37[3] = { 0xC3, 0xA2, 'i' }; static const symbol s_4_38[4] = { 'a', 0xC5, 0xA3, 'i' }; static const symbol s_4_39[5] = { 'e', 'a', 0xC5, 0xA3, 'i' }; static const symbol s_4_40[5] = { 'i', 'a', 0xC5, 0xA3, 'i' }; static const symbol s_4_41[4] = { 'e', 0xC5, 0xA3, 'i' }; static const symbol s_4_42[4] = { 'i', 0xC5, 0xA3, 'i' }; static const symbol s_4_43[7] = { 'a', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_44[8] = { 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_45[9] = { 'a', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_46[10] = { 's', 'e', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_47[9] = { 'i', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_48[9] = { 'u', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_49[10] = { 0xC3, 0xA2, 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_50[7] = { 'i', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_51[7] = { 'u', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_52[8] = { 0xC3, 0xA2, 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_53[5] = { 0xC3, 0xA2, 0xC5, 0xA3, 'i' }; static const symbol s_4_54[2] = { 'a', 'm' }; static const symbol s_4_55[3] = { 'e', 'a', 'm' }; static const symbol s_4_56[3] = { 'i', 'a', 'm' }; static const symbol s_4_57[2] = { 'e', 'm' }; static const symbol s_4_58[4] = { 'a', 's', 'e', 'm' }; static const symbol s_4_59[5] = { 's', 'e', 's', 'e', 'm' }; static const symbol s_4_60[4] = { 'i', 's', 'e', 'm' }; static const symbol s_4_61[4] = { 'u', 's', 'e', 'm' }; static const symbol s_4_62[5] = { 0xC3, 0xA2, 's', 'e', 'm' }; static const symbol s_4_63[2] = { 'i', 'm' }; static const symbol s_4_64[3] = { 0xC4, 0x83, 'm' }; static const symbol s_4_65[5] = { 'a', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_66[6] = { 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_67[7] = { 'a', 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_68[8] = { 's', 'e', 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_69[7] = { 'i', 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_70[7] = { 'u', 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_71[8] = { 0xC3, 0xA2, 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_72[5] = { 'i', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_73[5] = { 'u', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_74[6] = { 0xC3, 0xA2, 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_75[3] = { 0xC3, 0xA2, 'm' }; static const symbol s_4_76[2] = { 'a', 'u' }; static const symbol s_4_77[3] = { 'e', 'a', 'u' }; static const symbol s_4_78[3] = { 'i', 'a', 'u' }; static const symbol s_4_79[4] = { 'i', 'n', 'd', 'u' }; static const symbol s_4_80[5] = { 0xC3, 0xA2, 'n', 'd', 'u' }; static const symbol s_4_81[2] = { 'e', 'z' }; static const symbol s_4_82[6] = { 'e', 'a', 's', 'c', 0xC4, 0x83 }; static const symbol s_4_83[4] = { 'a', 'r', 0xC4, 0x83 }; static const symbol s_4_84[5] = { 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_85[6] = { 'a', 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_86[7] = { 's', 'e', 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_87[6] = { 'i', 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_88[6] = { 'u', 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_89[7] = { 0xC3, 0xA2, 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_90[4] = { 'i', 'r', 0xC4, 0x83 }; static const symbol s_4_91[4] = { 'u', 'r', 0xC4, 0x83 }; static const symbol s_4_92[5] = { 0xC3, 0xA2, 'r', 0xC4, 0x83 }; static const symbol s_4_93[5] = { 'e', 'a', 'z', 0xC4, 0x83 }; static const struct among a_4[94] = { /* 0 */ { 2, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 1, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 1, 0}, /* 4 */ { 3, s_4_4, -1, 1, 0}, /* 5 */ { 4, s_4_5, -1, 1, 0}, /* 6 */ { 3, s_4_6, -1, 1, 0}, /* 7 */ { 3, s_4_7, -1, 1, 0}, /* 8 */ { 3, s_4_8, -1, 1, 0}, /* 9 */ { 4, s_4_9, -1, 1, 0}, /* 10 */ { 2, s_4_10, -1, 2, 0}, /* 11 */ { 3, s_4_11, 10, 1, 0}, /* 12 */ { 4, s_4_12, 10, 2, 0}, /* 13 */ { 3, s_4_13, 10, 1, 0}, /* 14 */ { 3, s_4_14, 10, 1, 0}, /* 15 */ { 4, s_4_15, 10, 1, 0}, /* 16 */ { 5, s_4_16, -1, 1, 0}, /* 17 */ { 6, s_4_17, -1, 1, 0}, /* 18 */ { 3, s_4_18, -1, 1, 0}, /* 19 */ { 2, s_4_19, -1, 1, 0}, /* 20 */ { 3, s_4_20, 19, 1, 0}, /* 21 */ { 3, s_4_21, 19, 1, 0}, /* 22 */ { 3, s_4_22, -1, 2, 0}, /* 23 */ { 5, s_4_23, -1, 1, 0}, /* 24 */ { 6, s_4_24, -1, 1, 0}, /* 25 */ { 2, s_4_25, -1, 1, 0}, /* 26 */ { 3, s_4_26, -1, 1, 0}, /* 27 */ { 4, s_4_27, -1, 1, 0}, /* 28 */ { 5, s_4_28, -1, 2, 0}, /* 29 */ { 6, s_4_29, 28, 1, 0}, /* 30 */ { 7, s_4_30, 28, 2, 0}, /* 31 */ { 6, s_4_31, 28, 1, 0}, /* 32 */ { 6, s_4_32, 28, 1, 0}, /* 33 */ { 7, s_4_33, 28, 1, 0}, /* 34 */ { 4, s_4_34, -1, 1, 0}, /* 35 */ { 4, s_4_35, -1, 1, 0}, /* 36 */ { 5, s_4_36, -1, 1, 0}, /* 37 */ { 3, s_4_37, -1, 1, 0}, /* 38 */ { 4, s_4_38, -1, 2, 0}, /* 39 */ { 5, s_4_39, 38, 1, 0}, /* 40 */ { 5, s_4_40, 38, 1, 0}, /* 41 */ { 4, s_4_41, -1, 2, 0}, /* 42 */ { 4, s_4_42, -1, 2, 0}, /* 43 */ { 7, s_4_43, -1, 1, 0}, /* 44 */ { 8, s_4_44, -1, 2, 0}, /* 45 */ { 9, s_4_45, 44, 1, 0}, /* 46 */ { 10, s_4_46, 44, 2, 0}, /* 47 */ { 9, s_4_47, 44, 1, 0}, /* 48 */ { 9, s_4_48, 44, 1, 0}, /* 49 */ { 10, s_4_49, 44, 1, 0}, /* 50 */ { 7, s_4_50, -1, 1, 0}, /* 51 */ { 7, s_4_51, -1, 1, 0}, /* 52 */ { 8, s_4_52, -1, 1, 0}, /* 53 */ { 5, s_4_53, -1, 2, 0}, /* 54 */ { 2, s_4_54, -1, 1, 0}, /* 55 */ { 3, s_4_55, 54, 1, 0}, /* 56 */ { 3, s_4_56, 54, 1, 0}, /* 57 */ { 2, s_4_57, -1, 2, 0}, /* 58 */ { 4, s_4_58, 57, 1, 0}, /* 59 */ { 5, s_4_59, 57, 2, 0}, /* 60 */ { 4, s_4_60, 57, 1, 0}, /* 61 */ { 4, s_4_61, 57, 1, 0}, /* 62 */ { 5, s_4_62, 57, 1, 0}, /* 63 */ { 2, s_4_63, -1, 2, 0}, /* 64 */ { 3, s_4_64, -1, 2, 0}, /* 65 */ { 5, s_4_65, 64, 1, 0}, /* 66 */ { 6, s_4_66, 64, 2, 0}, /* 67 */ { 7, s_4_67, 66, 1, 0}, /* 68 */ { 8, s_4_68, 66, 2, 0}, /* 69 */ { 7, s_4_69, 66, 1, 0}, /* 70 */ { 7, s_4_70, 66, 1, 0}, /* 71 */ { 8, s_4_71, 66, 1, 0}, /* 72 */ { 5, s_4_72, 64, 1, 0}, /* 73 */ { 5, s_4_73, 64, 1, 0}, /* 74 */ { 6, s_4_74, 64, 1, 0}, /* 75 */ { 3, s_4_75, -1, 2, 0}, /* 76 */ { 2, s_4_76, -1, 1, 0}, /* 77 */ { 3, s_4_77, 76, 1, 0}, /* 78 */ { 3, s_4_78, 76, 1, 0}, /* 79 */ { 4, s_4_79, -1, 1, 0}, /* 80 */ { 5, s_4_80, -1, 1, 0}, /* 81 */ { 2, s_4_81, -1, 1, 0}, /* 82 */ { 6, s_4_82, -1, 1, 0}, /* 83 */ { 4, s_4_83, -1, 1, 0}, /* 84 */ { 5, s_4_84, -1, 2, 0}, /* 85 */ { 6, s_4_85, 84, 1, 0}, /* 86 */ { 7, s_4_86, 84, 2, 0}, /* 87 */ { 6, s_4_87, 84, 1, 0}, /* 88 */ { 6, s_4_88, 84, 1, 0}, /* 89 */ { 7, s_4_89, 84, 1, 0}, /* 90 */ { 4, s_4_90, -1, 1, 0}, /* 91 */ { 4, s_4_91, -1, 1, 0}, /* 92 */ { 5, s_4_92, -1, 1, 0}, /* 93 */ { 5, s_4_93, -1, 1, 0} }; static const symbol s_5_0[1] = { 'a' }; static const symbol s_5_1[1] = { 'e' }; static const symbol s_5_2[2] = { 'i', 'e' }; static const symbol s_5_3[1] = { 'i' }; static const symbol s_5_4[2] = { 0xC4, 0x83 }; static const struct among a_5[5] = { /* 0 */ { 1, s_5_0, -1, 1, 0}, /* 1 */ { 1, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, 1, 1, 0}, /* 3 */ { 1, s_5_3, -1, 1, 0}, /* 4 */ { 2, s_5_4, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 32, 0, 0, 4 }; static const symbol s_0[] = { 'u' }; static const symbol s_1[] = { 'U' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'I' }; static const symbol s_4[] = { 'i' }; static const symbol s_5[] = { 'u' }; static const symbol s_6[] = { 'a' }; static const symbol s_7[] = { 'e' }; static const symbol s_8[] = { 'i' }; static const symbol s_9[] = { 'a', 'b' }; static const symbol s_10[] = { 'i' }; static const symbol s_11[] = { 'a', 't' }; static const symbol s_12[] = { 'a', 0xC5, 0xA3, 'i' }; static const symbol s_13[] = { 'a', 'b', 'i', 'l' }; static const symbol s_14[] = { 'i', 'b', 'i', 'l' }; static const symbol s_15[] = { 'i', 'v' }; static const symbol s_16[] = { 'i', 'c' }; static const symbol s_17[] = { 'a', 't' }; static const symbol s_18[] = { 'i', 't' }; static const symbol s_19[] = { 0xC5, 0xA3 }; static const symbol s_20[] = { 't' }; static const symbol s_21[] = { 'i', 's', 't' }; static const symbol s_22[] = { 'u' }; static int r_prelude(struct SN_env * z) { while(1) { /* repeat, line 32 */ int c1 = z->c; while(1) { /* goto, line 32 */ int c2 = z->c; if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab1; z->bra = z->c; /* [, line 33 */ { int c3 = z->c; /* or, line 33 */ if (!(eq_s(z, 1, s_0))) goto lab3; z->ket = z->c; /* ], line 33 */ if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab3; { int ret = slice_from_s(z, 1, s_1); /* <-, line 33 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = c3; if (!(eq_s(z, 1, s_2))) goto lab1; z->ket = z->c; /* ], line 34 */ if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab1; { int ret = slice_from_s(z, 1, s_3); /* <-, line 34 */ if (ret < 0) return ret; } } lab2: z->c = c2; break; lab1: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* goto, line 32 */ } } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 44 */ { int c2 = z->c; /* or, line 46 */ if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab2; { int c3 = z->c; /* or, line 45 */ if (out_grouping_U(z, g_v, 97, 259, 0)) goto lab4; { /* gopast */ /* grouping v, line 45 */ int ret = out_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab2; { /* gopast */ /* non v, line 45 */ int ret = in_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping_U(z, g_v, 97, 259, 0)) goto lab0; { int c4 = z->c; /* or, line 47 */ if (out_grouping_U(z, g_v, 97, 259, 0)) goto lab6; { /* gopast */ /* grouping v, line 47 */ int ret = out_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab0; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 47 */ } } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 48 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 50 */ { /* gopast */ /* grouping v, line 51 */ int ret = out_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 51 */ int ret = in_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 51 */ { /* gopast */ /* grouping v, line 52 */ int ret = out_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 52 */ int ret = in_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 52 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 56 */ int c1 = z->c; z->bra = z->c; /* [, line 58 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else among_var = find_among(z, a_0, 3); /* substring, line 58 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 58 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_4); /* <-, line 59 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_5); /* <-, line 60 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 61 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_step_0(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 73 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((266786 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_1, 16); /* substring, line 73 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 73 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 73 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 75 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_6); /* <-, line 77 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_7); /* <-, line 79 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_8); /* <-, line 81 */ if (ret < 0) return ret; } break; case 5: { int m1 = z->l - z->c; (void)m1; /* not, line 83 */ if (!(eq_s_b(z, 2, s_9))) goto lab0; return 0; lab0: z->c = z->l - m1; } { int ret = slice_from_s(z, 1, s_10); /* <-, line 83 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_11); /* <-, line 85 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 4, s_12); /* <-, line 87 */ if (ret < 0) return ret; } break; } return 1; } static int r_combo_suffix(struct SN_env * z) { int among_var; { int m_test = z->l - z->c; /* test, line 91 */ z->ket = z->c; /* [, line 92 */ among_var = find_among_b(z, a_2, 46); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 92 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_13); /* <-, line 101 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_14); /* <-, line 104 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_15); /* <-, line 107 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_16); /* <-, line 113 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 2, s_17); /* <-, line 118 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_18); /* <-, line 122 */ if (ret < 0) return ret; } break; } z->B[0] = 1; /* set standard_suffix_removed, line 125 */ z->c = z->l - m_test; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->B[0] = 0; /* unset standard_suffix_removed, line 130 */ while(1) { /* repeat, line 131 */ int m1 = z->l - z->c; (void)m1; { int ret = r_combo_suffix(z); if (ret == 0) goto lab0; /* call combo_suffix, line 131 */ if (ret < 0) return ret; } continue; lab0: z->c = z->l - m1; break; } z->ket = z->c; /* [, line 132 */ among_var = find_among_b(z, a_3, 62); /* substring, line 132 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 132 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 132 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 149 */ if (ret < 0) return ret; } break; case 2: if (!(eq_s_b(z, 2, s_19))) return 0; z->bra = z->c; /* ], line 152 */ { int ret = slice_from_s(z, 1, s_20); /* <-, line 152 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 3, s_21); /* <-, line 156 */ if (ret < 0) return ret; } break; } z->B[0] = 1; /* set standard_suffix_removed, line 160 */ return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 164 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 164 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 165 */ among_var = find_among_b(z, a_4, 94); /* substring, line 165 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 165 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int m2 = z->l - z->c; (void)m2; /* or, line 200 */ if (out_grouping_b_U(z, g_v, 97, 259, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_22))) { z->lb = mlimit; return 0; } } lab0: { int ret = slice_del(z); /* delete, line 200 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 214 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_vowel_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 219 */ among_var = find_among_b(z, a_5, 5); /* substring, line 219 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 219 */ { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 219 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 220 */ if (ret < 0) return ret; } break; } return 1; } extern int romanian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 226 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 226 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 227 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 227 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 228 */ { int m3 = z->l - z->c; (void)m3; /* do, line 229 */ { int ret = r_step_0(z); if (ret == 0) goto lab2; /* call step_0, line 229 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 230 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab3; /* call standard_suffix, line 230 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 231 */ { int m6 = z->l - z->c; (void)m6; /* or, line 231 */ if (!(z->B[0])) goto lab6; /* Boolean test standard_suffix_removed, line 231 */ goto lab5; lab6: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ if (ret < 0) return ret; } } lab5: lab4: z->c = z->l - m5; } { int m7 = z->l - z->c; (void)m7; /* do, line 232 */ { int ret = r_vowel_suffix(z); if (ret == 0) goto lab7; /* call vowel_suffix, line 232 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } z->c = z->lb; { int c8 = z->c; /* do, line 234 */ { int ret = r_postlude(z); if (ret == 0) goto lab8; /* call postlude, line 234 */ if (ret < 0) return ret; } lab8: z->c = c8; } return 1; } extern struct SN_env * romanian_UTF_8_create_env(void) { return SN_create_env(0, 3, 1); } extern void romanian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_romanian.h000066400000000000000000000004741217574114600311000ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * romanian_UTF_8_create_env(void); extern void romanian_UTF_8_close_env(struct SN_env * z); extern int romanian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_russian.c000066400000000000000000000615401217574114600307540ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int russian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_tidy_up(struct SN_env * z); static int r_derivational(struct SN_env * z); static int r_noun(struct SN_env * z); static int r_verb(struct SN_env * z); static int r_reflexive(struct SN_env * z); static int r_adjectival(struct SN_env * z); static int r_adjective(struct SN_env * z); static int r_perfective_gerund(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * russian_UTF_8_create_env(void); extern void russian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[10] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; static const symbol s_0_1[12] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; static const symbol s_0_2[12] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; static const symbol s_0_3[2] = { 0xD0, 0xB2 }; static const symbol s_0_4[4] = { 0xD1, 0x8B, 0xD0, 0xB2 }; static const symbol s_0_5[4] = { 0xD0, 0xB8, 0xD0, 0xB2 }; static const symbol s_0_6[6] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; static const symbol s_0_7[8] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; static const symbol s_0_8[8] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; static const struct among a_0[9] = { /* 0 */ { 10, s_0_0, -1, 1, 0}, /* 1 */ { 12, s_0_1, 0, 2, 0}, /* 2 */ { 12, s_0_2, 0, 2, 0}, /* 3 */ { 2, s_0_3, -1, 1, 0}, /* 4 */ { 4, s_0_4, 3, 2, 0}, /* 5 */ { 4, s_0_5, 3, 2, 0}, /* 6 */ { 6, s_0_6, -1, 1, 0}, /* 7 */ { 8, s_0_7, 6, 2, 0}, /* 8 */ { 8, s_0_8, 6, 2, 0} }; static const symbol s_1_0[6] = { 0xD0, 0xB5, 0xD0, 0xBC, 0xD1, 0x83 }; static const symbol s_1_1[6] = { 0xD0, 0xBE, 0xD0, 0xBC, 0xD1, 0x83 }; static const symbol s_1_2[4] = { 0xD1, 0x8B, 0xD1, 0x85 }; static const symbol s_1_3[4] = { 0xD0, 0xB8, 0xD1, 0x85 }; static const symbol s_1_4[4] = { 0xD1, 0x83, 0xD1, 0x8E }; static const symbol s_1_5[4] = { 0xD1, 0x8E, 0xD1, 0x8E }; static const symbol s_1_6[4] = { 0xD0, 0xB5, 0xD1, 0x8E }; static const symbol s_1_7[4] = { 0xD0, 0xBE, 0xD1, 0x8E }; static const symbol s_1_8[4] = { 0xD1, 0x8F, 0xD1, 0x8F }; static const symbol s_1_9[4] = { 0xD0, 0xB0, 0xD1, 0x8F }; static const symbol s_1_10[4] = { 0xD1, 0x8B, 0xD0, 0xB5 }; static const symbol s_1_11[4] = { 0xD0, 0xB5, 0xD0, 0xB5 }; static const symbol s_1_12[4] = { 0xD0, 0xB8, 0xD0, 0xB5 }; static const symbol s_1_13[4] = { 0xD0, 0xBE, 0xD0, 0xB5 }; static const symbol s_1_14[6] = { 0xD1, 0x8B, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_1_15[6] = { 0xD0, 0xB8, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_1_16[4] = { 0xD1, 0x8B, 0xD0, 0xB9 }; static const symbol s_1_17[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; static const symbol s_1_18[4] = { 0xD0, 0xB8, 0xD0, 0xB9 }; static const symbol s_1_19[4] = { 0xD0, 0xBE, 0xD0, 0xB9 }; static const symbol s_1_20[4] = { 0xD1, 0x8B, 0xD0, 0xBC }; static const symbol s_1_21[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_1_22[4] = { 0xD0, 0xB8, 0xD0, 0xBC }; static const symbol s_1_23[4] = { 0xD0, 0xBE, 0xD0, 0xBC }; static const symbol s_1_24[6] = { 0xD0, 0xB5, 0xD0, 0xB3, 0xD0, 0xBE }; static const symbol s_1_25[6] = { 0xD0, 0xBE, 0xD0, 0xB3, 0xD0, 0xBE }; static const struct among a_1[26] = { /* 0 */ { 6, s_1_0, -1, 1, 0}, /* 1 */ { 6, s_1_1, -1, 1, 0}, /* 2 */ { 4, s_1_2, -1, 1, 0}, /* 3 */ { 4, s_1_3, -1, 1, 0}, /* 4 */ { 4, s_1_4, -1, 1, 0}, /* 5 */ { 4, s_1_5, -1, 1, 0}, /* 6 */ { 4, s_1_6, -1, 1, 0}, /* 7 */ { 4, s_1_7, -1, 1, 0}, /* 8 */ { 4, s_1_8, -1, 1, 0}, /* 9 */ { 4, s_1_9, -1, 1, 0}, /* 10 */ { 4, s_1_10, -1, 1, 0}, /* 11 */ { 4, s_1_11, -1, 1, 0}, /* 12 */ { 4, s_1_12, -1, 1, 0}, /* 13 */ { 4, s_1_13, -1, 1, 0}, /* 14 */ { 6, s_1_14, -1, 1, 0}, /* 15 */ { 6, s_1_15, -1, 1, 0}, /* 16 */ { 4, s_1_16, -1, 1, 0}, /* 17 */ { 4, s_1_17, -1, 1, 0}, /* 18 */ { 4, s_1_18, -1, 1, 0}, /* 19 */ { 4, s_1_19, -1, 1, 0}, /* 20 */ { 4, s_1_20, -1, 1, 0}, /* 21 */ { 4, s_1_21, -1, 1, 0}, /* 22 */ { 4, s_1_22, -1, 1, 0}, /* 23 */ { 4, s_1_23, -1, 1, 0}, /* 24 */ { 6, s_1_24, -1, 1, 0}, /* 25 */ { 6, s_1_25, -1, 1, 0} }; static const symbol s_2_0[4] = { 0xD0, 0xB2, 0xD1, 0x88 }; static const symbol s_2_1[6] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88 }; static const symbol s_2_2[6] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88 }; static const symbol s_2_3[2] = { 0xD1, 0x89 }; static const symbol s_2_4[4] = { 0xD1, 0x8E, 0xD1, 0x89 }; static const symbol s_2_5[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x89 }; static const symbol s_2_6[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_2_7[4] = { 0xD0, 0xBD, 0xD0, 0xBD }; static const struct among a_2[8] = { /* 0 */ { 4, s_2_0, -1, 1, 0}, /* 1 */ { 6, s_2_1, 0, 2, 0}, /* 2 */ { 6, s_2_2, 0, 2, 0}, /* 3 */ { 2, s_2_3, -1, 1, 0}, /* 4 */ { 4, s_2_4, 3, 1, 0}, /* 5 */ { 6, s_2_5, 4, 2, 0}, /* 6 */ { 4, s_2_6, -1, 1, 0}, /* 7 */ { 4, s_2_7, -1, 1, 0} }; static const symbol s_3_0[4] = { 0xD1, 0x81, 0xD1, 0x8C }; static const symbol s_3_1[4] = { 0xD1, 0x81, 0xD1, 0x8F }; static const struct among a_3[2] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0} }; static const symbol s_4_0[4] = { 0xD1, 0x8B, 0xD1, 0x82 }; static const symbol s_4_1[4] = { 0xD1, 0x8E, 0xD1, 0x82 }; static const symbol s_4_2[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x82 }; static const symbol s_4_3[4] = { 0xD1, 0x8F, 0xD1, 0x82 }; static const symbol s_4_4[4] = { 0xD0, 0xB5, 0xD1, 0x82 }; static const symbol s_4_5[6] = { 0xD1, 0x83, 0xD0, 0xB5, 0xD1, 0x82 }; static const symbol s_4_6[4] = { 0xD0, 0xB8, 0xD1, 0x82 }; static const symbol s_4_7[4] = { 0xD0, 0xBD, 0xD1, 0x8B }; static const symbol s_4_8[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD1, 0x8B }; static const symbol s_4_9[4] = { 0xD1, 0x82, 0xD1, 0x8C }; static const symbol s_4_10[6] = { 0xD1, 0x8B, 0xD1, 0x82, 0xD1, 0x8C }; static const symbol s_4_11[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD1, 0x8C }; static const symbol s_4_12[6] = { 0xD0, 0xB5, 0xD1, 0x88, 0xD1, 0x8C }; static const symbol s_4_13[6] = { 0xD0, 0xB8, 0xD1, 0x88, 0xD1, 0x8C }; static const symbol s_4_14[2] = { 0xD1, 0x8E }; static const symbol s_4_15[4] = { 0xD1, 0x83, 0xD1, 0x8E }; static const symbol s_4_16[4] = { 0xD0, 0xBB, 0xD0, 0xB0 }; static const symbol s_4_17[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB0 }; static const symbol s_4_18[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB0 }; static const symbol s_4_19[4] = { 0xD0, 0xBD, 0xD0, 0xB0 }; static const symbol s_4_20[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xB0 }; static const symbol s_4_21[6] = { 0xD0, 0xB5, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_22[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_23[6] = { 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_24[8] = { 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_25[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_26[4] = { 0xD0, 0xBB, 0xD0, 0xB8 }; static const symbol s_4_27[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB8 }; static const symbol s_4_28[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB8 }; static const symbol s_4_29[2] = { 0xD0, 0xB9 }; static const symbol s_4_30[4] = { 0xD1, 0x83, 0xD0, 0xB9 }; static const symbol s_4_31[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; static const symbol s_4_32[2] = { 0xD0, 0xBB }; static const symbol s_4_33[4] = { 0xD1, 0x8B, 0xD0, 0xBB }; static const symbol s_4_34[4] = { 0xD0, 0xB8, 0xD0, 0xBB }; static const symbol s_4_35[4] = { 0xD1, 0x8B, 0xD0, 0xBC }; static const symbol s_4_36[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_4_37[4] = { 0xD0, 0xB8, 0xD0, 0xBC }; static const symbol s_4_38[2] = { 0xD0, 0xBD }; static const symbol s_4_39[4] = { 0xD0, 0xB5, 0xD0, 0xBD }; static const symbol s_4_40[4] = { 0xD0, 0xBB, 0xD0, 0xBE }; static const symbol s_4_41[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xBE }; static const symbol s_4_42[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xBE }; static const symbol s_4_43[4] = { 0xD0, 0xBD, 0xD0, 0xBE }; static const symbol s_4_44[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xBE }; static const symbol s_4_45[6] = { 0xD0, 0xBD, 0xD0, 0xBD, 0xD0, 0xBE }; static const struct among a_4[46] = { /* 0 */ { 4, s_4_0, -1, 2, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 6, s_4_2, 1, 2, 0}, /* 3 */ { 4, s_4_3, -1, 2, 0}, /* 4 */ { 4, s_4_4, -1, 1, 0}, /* 5 */ { 6, s_4_5, 4, 2, 0}, /* 6 */ { 4, s_4_6, -1, 2, 0}, /* 7 */ { 4, s_4_7, -1, 1, 0}, /* 8 */ { 6, s_4_8, 7, 2, 0}, /* 9 */ { 4, s_4_9, -1, 1, 0}, /* 10 */ { 6, s_4_10, 9, 2, 0}, /* 11 */ { 6, s_4_11, 9, 2, 0}, /* 12 */ { 6, s_4_12, -1, 1, 0}, /* 13 */ { 6, s_4_13, -1, 2, 0}, /* 14 */ { 2, s_4_14, -1, 2, 0}, /* 15 */ { 4, s_4_15, 14, 2, 0}, /* 16 */ { 4, s_4_16, -1, 1, 0}, /* 17 */ { 6, s_4_17, 16, 2, 0}, /* 18 */ { 6, s_4_18, 16, 2, 0}, /* 19 */ { 4, s_4_19, -1, 1, 0}, /* 20 */ { 6, s_4_20, 19, 2, 0}, /* 21 */ { 6, s_4_21, -1, 1, 0}, /* 22 */ { 6, s_4_22, -1, 2, 0}, /* 23 */ { 6, s_4_23, -1, 1, 0}, /* 24 */ { 8, s_4_24, 23, 2, 0}, /* 25 */ { 8, s_4_25, 23, 2, 0}, /* 26 */ { 4, s_4_26, -1, 1, 0}, /* 27 */ { 6, s_4_27, 26, 2, 0}, /* 28 */ { 6, s_4_28, 26, 2, 0}, /* 29 */ { 2, s_4_29, -1, 1, 0}, /* 30 */ { 4, s_4_30, 29, 2, 0}, /* 31 */ { 4, s_4_31, 29, 2, 0}, /* 32 */ { 2, s_4_32, -1, 1, 0}, /* 33 */ { 4, s_4_33, 32, 2, 0}, /* 34 */ { 4, s_4_34, 32, 2, 0}, /* 35 */ { 4, s_4_35, -1, 2, 0}, /* 36 */ { 4, s_4_36, -1, 1, 0}, /* 37 */ { 4, s_4_37, -1, 2, 0}, /* 38 */ { 2, s_4_38, -1, 1, 0}, /* 39 */ { 4, s_4_39, 38, 2, 0}, /* 40 */ { 4, s_4_40, -1, 1, 0}, /* 41 */ { 6, s_4_41, 40, 2, 0}, /* 42 */ { 6, s_4_42, 40, 2, 0}, /* 43 */ { 4, s_4_43, -1, 1, 0}, /* 44 */ { 6, s_4_44, 43, 2, 0}, /* 45 */ { 6, s_4_45, 43, 1, 0} }; static const symbol s_5_0[2] = { 0xD1, 0x83 }; static const symbol s_5_1[4] = { 0xD1, 0x8F, 0xD1, 0x85 }; static const symbol s_5_2[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD1, 0x85 }; static const symbol s_5_3[4] = { 0xD0, 0xB0, 0xD1, 0x85 }; static const symbol s_5_4[2] = { 0xD1, 0x8B }; static const symbol s_5_5[2] = { 0xD1, 0x8C }; static const symbol s_5_6[2] = { 0xD1, 0x8E }; static const symbol s_5_7[4] = { 0xD1, 0x8C, 0xD1, 0x8E }; static const symbol s_5_8[4] = { 0xD0, 0xB8, 0xD1, 0x8E }; static const symbol s_5_9[2] = { 0xD1, 0x8F }; static const symbol s_5_10[4] = { 0xD1, 0x8C, 0xD1, 0x8F }; static const symbol s_5_11[4] = { 0xD0, 0xB8, 0xD1, 0x8F }; static const symbol s_5_12[2] = { 0xD0, 0xB0 }; static const symbol s_5_13[4] = { 0xD0, 0xB5, 0xD0, 0xB2 }; static const symbol s_5_14[4] = { 0xD0, 0xBE, 0xD0, 0xB2 }; static const symbol s_5_15[2] = { 0xD0, 0xB5 }; static const symbol s_5_16[4] = { 0xD1, 0x8C, 0xD0, 0xB5 }; static const symbol s_5_17[4] = { 0xD0, 0xB8, 0xD0, 0xB5 }; static const symbol s_5_18[2] = { 0xD0, 0xB8 }; static const symbol s_5_19[4] = { 0xD0, 0xB5, 0xD0, 0xB8 }; static const symbol s_5_20[4] = { 0xD0, 0xB8, 0xD0, 0xB8 }; static const symbol s_5_21[6] = { 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_5_22[8] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_5_23[6] = { 0xD0, 0xB0, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_5_24[2] = { 0xD0, 0xB9 }; static const symbol s_5_25[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; static const symbol s_5_26[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xB9 }; static const symbol s_5_27[4] = { 0xD0, 0xB8, 0xD0, 0xB9 }; static const symbol s_5_28[4] = { 0xD0, 0xBE, 0xD0, 0xB9 }; static const symbol s_5_29[4] = { 0xD1, 0x8F, 0xD0, 0xBC }; static const symbol s_5_30[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC }; static const symbol s_5_31[4] = { 0xD0, 0xB0, 0xD0, 0xBC }; static const symbol s_5_32[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_5_33[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_5_34[4] = { 0xD0, 0xBE, 0xD0, 0xBC }; static const symbol s_5_35[2] = { 0xD0, 0xBE }; static const struct among a_5[36] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 6, s_5_2, 1, 1, 0}, /* 3 */ { 4, s_5_3, -1, 1, 0}, /* 4 */ { 2, s_5_4, -1, 1, 0}, /* 5 */ { 2, s_5_5, -1, 1, 0}, /* 6 */ { 2, s_5_6, -1, 1, 0}, /* 7 */ { 4, s_5_7, 6, 1, 0}, /* 8 */ { 4, s_5_8, 6, 1, 0}, /* 9 */ { 2, s_5_9, -1, 1, 0}, /* 10 */ { 4, s_5_10, 9, 1, 0}, /* 11 */ { 4, s_5_11, 9, 1, 0}, /* 12 */ { 2, s_5_12, -1, 1, 0}, /* 13 */ { 4, s_5_13, -1, 1, 0}, /* 14 */ { 4, s_5_14, -1, 1, 0}, /* 15 */ { 2, s_5_15, -1, 1, 0}, /* 16 */ { 4, s_5_16, 15, 1, 0}, /* 17 */ { 4, s_5_17, 15, 1, 0}, /* 18 */ { 2, s_5_18, -1, 1, 0}, /* 19 */ { 4, s_5_19, 18, 1, 0}, /* 20 */ { 4, s_5_20, 18, 1, 0}, /* 21 */ { 6, s_5_21, 18, 1, 0}, /* 22 */ { 8, s_5_22, 21, 1, 0}, /* 23 */ { 6, s_5_23, 18, 1, 0}, /* 24 */ { 2, s_5_24, -1, 1, 0}, /* 25 */ { 4, s_5_25, 24, 1, 0}, /* 26 */ { 6, s_5_26, 25, 1, 0}, /* 27 */ { 4, s_5_27, 24, 1, 0}, /* 28 */ { 4, s_5_28, 24, 1, 0}, /* 29 */ { 4, s_5_29, -1, 1, 0}, /* 30 */ { 6, s_5_30, 29, 1, 0}, /* 31 */ { 4, s_5_31, -1, 1, 0}, /* 32 */ { 4, s_5_32, -1, 1, 0}, /* 33 */ { 6, s_5_33, 32, 1, 0}, /* 34 */ { 4, s_5_34, -1, 1, 0}, /* 35 */ { 2, s_5_35, -1, 1, 0} }; static const symbol s_6_0[6] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82 }; static const symbol s_6_1[8] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82, 0xD1, 0x8C }; static const struct among a_6[2] = { /* 0 */ { 6, s_6_0, -1, 1, 0}, /* 1 */ { 8, s_6_1, -1, 1, 0} }; static const symbol s_7_0[6] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88 }; static const symbol s_7_1[2] = { 0xD1, 0x8C }; static const symbol s_7_2[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88, 0xD0, 0xB5 }; static const symbol s_7_3[2] = { 0xD0, 0xBD }; static const struct among a_7[4] = { /* 0 */ { 6, s_7_0, -1, 1, 0}, /* 1 */ { 2, s_7_1, -1, 3, 0}, /* 2 */ { 8, s_7_2, -1, 1, 0}, /* 3 */ { 2, s_7_3, -1, 2, 0} }; static const unsigned char g_v[] = { 33, 65, 8, 232 }; static const symbol s_0[] = { 0xD0, 0xB0 }; static const symbol s_1[] = { 0xD1, 0x8F }; static const symbol s_2[] = { 0xD0, 0xB0 }; static const symbol s_3[] = { 0xD1, 0x8F }; static const symbol s_4[] = { 0xD0, 0xB0 }; static const symbol s_5[] = { 0xD1, 0x8F }; static const symbol s_6[] = { 0xD0, 0xBD }; static const symbol s_7[] = { 0xD0, 0xBD }; static const symbol s_8[] = { 0xD0, 0xBD }; static const symbol s_9[] = { 0xD0, 0xB8 }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c1 = z->c; /* do, line 61 */ { /* gopast */ /* grouping v, line 62 */ int ret = out_grouping_U(z, g_v, 1072, 1103, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[0] = z->c; /* setmark pV, line 62 */ { /* gopast */ /* non v, line 62 */ int ret = in_grouping_U(z, g_v, 1072, 1103, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* grouping v, line 63 */ int ret = out_grouping_U(z, g_v, 1072, 1103, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 63 */ int ret = in_grouping_U(z, g_v, 1072, 1103, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 63 */ lab0: z->c = c1; } return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_perfective_gerund(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 72 */ among_var = find_among_b(z, a_0, 9); /* substring, line 72 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 72 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 76 */ if (!(eq_s_b(z, 2, s_0))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_1))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 76 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 83 */ if (ret < 0) return ret; } break; } return 1; } static int r_adjective(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 88 */ among_var = find_among_b(z, a_1, 26); /* substring, line 88 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 88 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 97 */ if (ret < 0) return ret; } break; } return 1; } static int r_adjectival(struct SN_env * z) { int among_var; { int ret = r_adjective(z); if (ret == 0) return 0; /* call adjective, line 102 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 109 */ z->ket = z->c; /* [, line 110 */ among_var = find_among_b(z, a_2, 8); /* substring, line 110 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 110 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 115 */ if (!(eq_s_b(z, 2, s_2))) goto lab2; goto lab1; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_3))) { z->c = z->l - m_keep; goto lab0; } } lab1: { int ret = slice_del(z); /* delete, line 115 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } break; } lab0: ; } return 1; } static int r_reflexive(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 129 */ if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 140 && z->p[z->c - 1] != 143)) return 0; among_var = find_among_b(z, a_3, 2); /* substring, line 129 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 129 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 132 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 137 */ among_var = find_among_b(z, a_4, 46); /* substring, line 137 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 137 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 143 */ if (!(eq_s_b(z, 2, s_4))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_5))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } break; } return 1; } static int r_noun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 160 */ among_var = find_among_b(z, a_5, 36); /* substring, line 160 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 160 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 167 */ if (ret < 0) return ret; } break; } return 1; } static int r_derivational(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 176 */ if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 130 && z->p[z->c - 1] != 140)) return 0; among_var = find_among_b(z, a_6, 2); /* substring, line 176 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 176 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 176 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; } return 1; } static int r_tidy_up(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 184 */ among_var = find_among_b(z, a_7, 4); /* substring, line 184 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 184 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 188 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 189 */ if (!(eq_s_b(z, 2, s_6))) return 0; z->bra = z->c; /* ], line 189 */ if (!(eq_s_b(z, 2, s_7))) return 0; { int ret = slice_del(z); /* delete, line 189 */ if (ret < 0) return ret; } break; case 2: if (!(eq_s_b(z, 2, s_8))) return 0; { int ret = slice_del(z); /* delete, line 192 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } break; } return 1; } extern int russian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 201 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 201 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 202 */ { int mlimit; /* setlimit, line 202 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 202 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; { int m3 = z->l - z->c; (void)m3; /* do, line 203 */ { int m4 = z->l - z->c; (void)m4; /* or, line 204 */ { int ret = r_perfective_gerund(z); if (ret == 0) goto lab3; /* call perfective_gerund, line 204 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = z->l - m4; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 205 */ { int ret = r_reflexive(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call reflexive, line 205 */ if (ret < 0) return ret; } lab4: ; } { int m5 = z->l - z->c; (void)m5; /* or, line 206 */ { int ret = r_adjectival(z); if (ret == 0) goto lab6; /* call adjectival, line 206 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m5; { int ret = r_verb(z); if (ret == 0) goto lab7; /* call verb, line 206 */ if (ret < 0) return ret; } goto lab5; lab7: z->c = z->l - m5; { int ret = r_noun(z); if (ret == 0) goto lab1; /* call noun, line 206 */ if (ret < 0) return ret; } } lab5: ; } lab2: lab1: z->c = z->l - m3; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 209 */ z->ket = z->c; /* [, line 209 */ if (!(eq_s_b(z, 2, s_9))) { z->c = z->l - m_keep; goto lab8; } z->bra = z->c; /* ], line 209 */ { int ret = slice_del(z); /* delete, line 209 */ if (ret < 0) return ret; } lab8: ; } { int m6 = z->l - z->c; (void)m6; /* do, line 212 */ { int ret = r_derivational(z); if (ret == 0) goto lab9; /* call derivational, line 212 */ if (ret < 0) return ret; } lab9: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 213 */ { int ret = r_tidy_up(z); if (ret == 0) goto lab10; /* call tidy_up, line 213 */ if (ret < 0) return ret; } lab10: z->c = z->l - m7; } z->lb = mlimit; } z->c = z->lb; return 1; } extern struct SN_env * russian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } extern void russian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_russian.h000066400000000000000000000004711217574114600307550ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * russian_UTF_8_create_env(void); extern void russian_UTF_8_close_env(struct SN_env * z); extern int russian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_spanish.c000066400000000000000000001212241217574114600307310ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int spanish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_y_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_attached_pronoun(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * spanish_UTF_8_create_env(void); extern void spanish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 0xC3, 0xA1 }; static const symbol s_0_2[2] = { 0xC3, 0xA9 }; static const symbol s_0_3[2] = { 0xC3, 0xAD }; static const symbol s_0_4[2] = { 0xC3, 0xB3 }; static const symbol s_0_5[2] = { 0xC3, 0xBA }; static const struct among a_0[6] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 2, s_0_1, 0, 1, 0}, /* 2 */ { 2, s_0_2, 0, 2, 0}, /* 3 */ { 2, s_0_3, 0, 3, 0}, /* 4 */ { 2, s_0_4, 0, 4, 0}, /* 5 */ { 2, s_0_5, 0, 5, 0} }; static const symbol s_1_0[2] = { 'l', 'a' }; static const symbol s_1_1[4] = { 's', 'e', 'l', 'a' }; static const symbol s_1_2[2] = { 'l', 'e' }; static const symbol s_1_3[2] = { 'm', 'e' }; static const symbol s_1_4[2] = { 's', 'e' }; static const symbol s_1_5[2] = { 'l', 'o' }; static const symbol s_1_6[4] = { 's', 'e', 'l', 'o' }; static const symbol s_1_7[3] = { 'l', 'a', 's' }; static const symbol s_1_8[5] = { 's', 'e', 'l', 'a', 's' }; static const symbol s_1_9[3] = { 'l', 'e', 's' }; static const symbol s_1_10[3] = { 'l', 'o', 's' }; static const symbol s_1_11[5] = { 's', 'e', 'l', 'o', 's' }; static const symbol s_1_12[3] = { 'n', 'o', 's' }; static const struct among a_1[13] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 4, s_1_1, 0, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 2, s_1_4, -1, -1, 0}, /* 5 */ { 2, s_1_5, -1, -1, 0}, /* 6 */ { 4, s_1_6, 5, -1, 0}, /* 7 */ { 3, s_1_7, -1, -1, 0}, /* 8 */ { 5, s_1_8, 7, -1, 0}, /* 9 */ { 3, s_1_9, -1, -1, 0}, /* 10 */ { 3, s_1_10, -1, -1, 0}, /* 11 */ { 5, s_1_11, 10, -1, 0}, /* 12 */ { 3, s_1_12, -1, -1, 0} }; static const symbol s_2_0[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_2_1[5] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_2_2[5] = { 'y', 'e', 'n', 'd', 'o' }; static const symbol s_2_3[5] = { 0xC3, 0xA1, 'n', 'd', 'o' }; static const symbol s_2_4[6] = { 'i', 0xC3, 0xA9, 'n', 'd', 'o' }; static const symbol s_2_5[2] = { 'a', 'r' }; static const symbol s_2_6[2] = { 'e', 'r' }; static const symbol s_2_7[2] = { 'i', 'r' }; static const symbol s_2_8[3] = { 0xC3, 0xA1, 'r' }; static const symbol s_2_9[3] = { 0xC3, 0xA9, 'r' }; static const symbol s_2_10[3] = { 0xC3, 0xAD, 'r' }; static const struct among a_2[11] = { /* 0 */ { 4, s_2_0, -1, 6, 0}, /* 1 */ { 5, s_2_1, -1, 6, 0}, /* 2 */ { 5, s_2_2, -1, 7, 0}, /* 3 */ { 5, s_2_3, -1, 2, 0}, /* 4 */ { 6, s_2_4, -1, 1, 0}, /* 5 */ { 2, s_2_5, -1, 6, 0}, /* 6 */ { 2, s_2_6, -1, 6, 0}, /* 7 */ { 2, s_2_7, -1, 6, 0}, /* 8 */ { 3, s_2_8, -1, 3, 0}, /* 9 */ { 3, s_2_9, -1, 4, 0}, /* 10 */ { 3, s_2_10, -1, 5, 0} }; static const symbol s_3_0[2] = { 'i', 'c' }; static const symbol s_3_1[2] = { 'a', 'd' }; static const symbol s_3_2[2] = { 'o', 's' }; static const symbol s_3_3[2] = { 'i', 'v' }; static const struct among a_3[4] = { /* 0 */ { 2, s_3_0, -1, -1, 0}, /* 1 */ { 2, s_3_1, -1, -1, 0}, /* 2 */ { 2, s_3_2, -1, -1, 0}, /* 3 */ { 2, s_3_3, -1, 1, 0} }; static const symbol s_4_0[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_4_1[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_4_2[4] = { 'a', 'n', 't', 'e' }; static const struct among a_4[3] = { /* 0 */ { 4, s_4_0, -1, 1, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 4, s_4_2, -1, 1, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_5_2[2] = { 'i', 'v' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, -1, 1, 0} }; static const symbol s_6_0[3] = { 'i', 'c', 'a' }; static const symbol s_6_1[5] = { 'a', 'n', 'c', 'i', 'a' }; static const symbol s_6_2[5] = { 'e', 'n', 'c', 'i', 'a' }; static const symbol s_6_3[5] = { 'a', 'd', 'o', 'r', 'a' }; static const symbol s_6_4[3] = { 'o', 's', 'a' }; static const symbol s_6_5[4] = { 'i', 's', 't', 'a' }; static const symbol s_6_6[3] = { 'i', 'v', 'a' }; static const symbol s_6_7[4] = { 'a', 'n', 'z', 'a' }; static const symbol s_6_8[6] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a' }; static const symbol s_6_9[4] = { 'i', 'd', 'a', 'd' }; static const symbol s_6_10[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_6_11[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_6_12[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_13[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_14[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_15[6] = { 'a', 'c', 'i', 0xC3, 0xB3, 'n' }; static const symbol s_6_16[6] = { 'u', 'c', 'i', 0xC3, 0xB3, 'n' }; static const symbol s_6_17[3] = { 'i', 'c', 'o' }; static const symbol s_6_18[4] = { 'i', 's', 'm', 'o' }; static const symbol s_6_19[3] = { 'o', 's', 'o' }; static const symbol s_6_20[7] = { 'a', 'm', 'i', 'e', 'n', 't', 'o' }; static const symbol s_6_21[7] = { 'i', 'm', 'i', 'e', 'n', 't', 'o' }; static const symbol s_6_22[3] = { 'i', 'v', 'o' }; static const symbol s_6_23[4] = { 'a', 'd', 'o', 'r' }; static const symbol s_6_24[4] = { 'i', 'c', 'a', 's' }; static const symbol s_6_25[6] = { 'a', 'n', 'c', 'i', 'a', 's' }; static const symbol s_6_26[6] = { 'e', 'n', 'c', 'i', 'a', 's' }; static const symbol s_6_27[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; static const symbol s_6_28[4] = { 'o', 's', 'a', 's' }; static const symbol s_6_29[5] = { 'i', 's', 't', 'a', 's' }; static const symbol s_6_30[4] = { 'i', 'v', 'a', 's' }; static const symbol s_6_31[5] = { 'a', 'n', 'z', 'a', 's' }; static const symbol s_6_32[7] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a', 's' }; static const symbol s_6_33[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; static const symbol s_6_34[5] = { 'a', 'b', 'l', 'e', 's' }; static const symbol s_6_35[5] = { 'i', 'b', 'l', 'e', 's' }; static const symbol s_6_36[7] = { 'a', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_6_37[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_6_38[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; static const symbol s_6_39[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_6_40[4] = { 'i', 'c', 'o', 's' }; static const symbol s_6_41[5] = { 'i', 's', 'm', 'o', 's' }; static const symbol s_6_42[4] = { 'o', 's', 'o', 's' }; static const symbol s_6_43[8] = { 'a', 'm', 'i', 'e', 'n', 't', 'o', 's' }; static const symbol s_6_44[8] = { 'i', 'm', 'i', 'e', 'n', 't', 'o', 's' }; static const symbol s_6_45[4] = { 'i', 'v', 'o', 's' }; static const struct among a_6[46] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 5, s_6_1, -1, 2, 0}, /* 2 */ { 5, s_6_2, -1, 5, 0}, /* 3 */ { 5, s_6_3, -1, 2, 0}, /* 4 */ { 3, s_6_4, -1, 1, 0}, /* 5 */ { 4, s_6_5, -1, 1, 0}, /* 6 */ { 3, s_6_6, -1, 9, 0}, /* 7 */ { 4, s_6_7, -1, 1, 0}, /* 8 */ { 6, s_6_8, -1, 3, 0}, /* 9 */ { 4, s_6_9, -1, 8, 0}, /* 10 */ { 4, s_6_10, -1, 1, 0}, /* 11 */ { 4, s_6_11, -1, 1, 0}, /* 12 */ { 4, s_6_12, -1, 2, 0}, /* 13 */ { 5, s_6_13, -1, 7, 0}, /* 14 */ { 6, s_6_14, 13, 6, 0}, /* 15 */ { 6, s_6_15, -1, 2, 0}, /* 16 */ { 6, s_6_16, -1, 4, 0}, /* 17 */ { 3, s_6_17, -1, 1, 0}, /* 18 */ { 4, s_6_18, -1, 1, 0}, /* 19 */ { 3, s_6_19, -1, 1, 0}, /* 20 */ { 7, s_6_20, -1, 1, 0}, /* 21 */ { 7, s_6_21, -1, 1, 0}, /* 22 */ { 3, s_6_22, -1, 9, 0}, /* 23 */ { 4, s_6_23, -1, 2, 0}, /* 24 */ { 4, s_6_24, -1, 1, 0}, /* 25 */ { 6, s_6_25, -1, 2, 0}, /* 26 */ { 6, s_6_26, -1, 5, 0}, /* 27 */ { 6, s_6_27, -1, 2, 0}, /* 28 */ { 4, s_6_28, -1, 1, 0}, /* 29 */ { 5, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, -1, 9, 0}, /* 31 */ { 5, s_6_31, -1, 1, 0}, /* 32 */ { 7, s_6_32, -1, 3, 0}, /* 33 */ { 6, s_6_33, -1, 8, 0}, /* 34 */ { 5, s_6_34, -1, 1, 0}, /* 35 */ { 5, s_6_35, -1, 1, 0}, /* 36 */ { 7, s_6_36, -1, 2, 0}, /* 37 */ { 7, s_6_37, -1, 4, 0}, /* 38 */ { 6, s_6_38, -1, 2, 0}, /* 39 */ { 5, s_6_39, -1, 2, 0}, /* 40 */ { 4, s_6_40, -1, 1, 0}, /* 41 */ { 5, s_6_41, -1, 1, 0}, /* 42 */ { 4, s_6_42, -1, 1, 0}, /* 43 */ { 8, s_6_43, -1, 1, 0}, /* 44 */ { 8, s_6_44, -1, 1, 0}, /* 45 */ { 4, s_6_45, -1, 9, 0} }; static const symbol s_7_0[2] = { 'y', 'a' }; static const symbol s_7_1[2] = { 'y', 'e' }; static const symbol s_7_2[3] = { 'y', 'a', 'n' }; static const symbol s_7_3[3] = { 'y', 'e', 'n' }; static const symbol s_7_4[5] = { 'y', 'e', 'r', 'o', 'n' }; static const symbol s_7_5[5] = { 'y', 'e', 'n', 'd', 'o' }; static const symbol s_7_6[2] = { 'y', 'o' }; static const symbol s_7_7[3] = { 'y', 'a', 's' }; static const symbol s_7_8[3] = { 'y', 'e', 's' }; static const symbol s_7_9[4] = { 'y', 'a', 'i', 's' }; static const symbol s_7_10[5] = { 'y', 'a', 'm', 'o', 's' }; static const symbol s_7_11[3] = { 'y', 0xC3, 0xB3 }; static const struct among a_7[12] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 2, s_7_1, -1, 1, 0}, /* 2 */ { 3, s_7_2, -1, 1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 5, s_7_4, -1, 1, 0}, /* 5 */ { 5, s_7_5, -1, 1, 0}, /* 6 */ { 2, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 3, s_7_8, -1, 1, 0}, /* 9 */ { 4, s_7_9, -1, 1, 0}, /* 10 */ { 5, s_7_10, -1, 1, 0}, /* 11 */ { 3, s_7_11, -1, 1, 0} }; static const symbol s_8_0[3] = { 'a', 'b', 'a' }; static const symbol s_8_1[3] = { 'a', 'd', 'a' }; static const symbol s_8_2[3] = { 'i', 'd', 'a' }; static const symbol s_8_3[3] = { 'a', 'r', 'a' }; static const symbol s_8_4[4] = { 'i', 'e', 'r', 'a' }; static const symbol s_8_5[3] = { 0xC3, 0xAD, 'a' }; static const symbol s_8_6[5] = { 'a', 'r', 0xC3, 0xAD, 'a' }; static const symbol s_8_7[5] = { 'e', 'r', 0xC3, 0xAD, 'a' }; static const symbol s_8_8[5] = { 'i', 'r', 0xC3, 0xAD, 'a' }; static const symbol s_8_9[2] = { 'a', 'd' }; static const symbol s_8_10[2] = { 'e', 'd' }; static const symbol s_8_11[2] = { 'i', 'd' }; static const symbol s_8_12[3] = { 'a', 's', 'e' }; static const symbol s_8_13[4] = { 'i', 'e', 's', 'e' }; static const symbol s_8_14[4] = { 'a', 's', 't', 'e' }; static const symbol s_8_15[4] = { 'i', 's', 't', 'e' }; static const symbol s_8_16[2] = { 'a', 'n' }; static const symbol s_8_17[4] = { 'a', 'b', 'a', 'n' }; static const symbol s_8_18[4] = { 'a', 'r', 'a', 'n' }; static const symbol s_8_19[5] = { 'i', 'e', 'r', 'a', 'n' }; static const symbol s_8_20[4] = { 0xC3, 0xAD, 'a', 'n' }; static const symbol s_8_21[6] = { 'a', 'r', 0xC3, 0xAD, 'a', 'n' }; static const symbol s_8_22[6] = { 'e', 'r', 0xC3, 0xAD, 'a', 'n' }; static const symbol s_8_23[6] = { 'i', 'r', 0xC3, 0xAD, 'a', 'n' }; static const symbol s_8_24[2] = { 'e', 'n' }; static const symbol s_8_25[4] = { 'a', 's', 'e', 'n' }; static const symbol s_8_26[5] = { 'i', 'e', 's', 'e', 'n' }; static const symbol s_8_27[4] = { 'a', 'r', 'o', 'n' }; static const symbol s_8_28[5] = { 'i', 'e', 'r', 'o', 'n' }; static const symbol s_8_29[5] = { 'a', 'r', 0xC3, 0xA1, 'n' }; static const symbol s_8_30[5] = { 'e', 'r', 0xC3, 0xA1, 'n' }; static const symbol s_8_31[5] = { 'i', 'r', 0xC3, 0xA1, 'n' }; static const symbol s_8_32[3] = { 'a', 'd', 'o' }; static const symbol s_8_33[3] = { 'i', 'd', 'o' }; static const symbol s_8_34[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_8_35[5] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_8_36[2] = { 'a', 'r' }; static const symbol s_8_37[2] = { 'e', 'r' }; static const symbol s_8_38[2] = { 'i', 'r' }; static const symbol s_8_39[2] = { 'a', 's' }; static const symbol s_8_40[4] = { 'a', 'b', 'a', 's' }; static const symbol s_8_41[4] = { 'a', 'd', 'a', 's' }; static const symbol s_8_42[4] = { 'i', 'd', 'a', 's' }; static const symbol s_8_43[4] = { 'a', 'r', 'a', 's' }; static const symbol s_8_44[5] = { 'i', 'e', 'r', 'a', 's' }; static const symbol s_8_45[4] = { 0xC3, 0xAD, 'a', 's' }; static const symbol s_8_46[6] = { 'a', 'r', 0xC3, 0xAD, 'a', 's' }; static const symbol s_8_47[6] = { 'e', 'r', 0xC3, 0xAD, 'a', 's' }; static const symbol s_8_48[6] = { 'i', 'r', 0xC3, 0xAD, 'a', 's' }; static const symbol s_8_49[2] = { 'e', 's' }; static const symbol s_8_50[4] = { 'a', 's', 'e', 's' }; static const symbol s_8_51[5] = { 'i', 'e', 's', 'e', 's' }; static const symbol s_8_52[5] = { 'a', 'b', 'a', 'i', 's' }; static const symbol s_8_53[5] = { 'a', 'r', 'a', 'i', 's' }; static const symbol s_8_54[6] = { 'i', 'e', 'r', 'a', 'i', 's' }; static const symbol s_8_55[5] = { 0xC3, 0xAD, 'a', 'i', 's' }; static const symbol s_8_56[7] = { 'a', 'r', 0xC3, 0xAD, 'a', 'i', 's' }; static const symbol s_8_57[7] = { 'e', 'r', 0xC3, 0xAD, 'a', 'i', 's' }; static const symbol s_8_58[7] = { 'i', 'r', 0xC3, 0xAD, 'a', 'i', 's' }; static const symbol s_8_59[5] = { 'a', 's', 'e', 'i', 's' }; static const symbol s_8_60[6] = { 'i', 'e', 's', 'e', 'i', 's' }; static const symbol s_8_61[6] = { 'a', 's', 't', 'e', 'i', 's' }; static const symbol s_8_62[6] = { 'i', 's', 't', 'e', 'i', 's' }; static const symbol s_8_63[4] = { 0xC3, 0xA1, 'i', 's' }; static const symbol s_8_64[4] = { 0xC3, 0xA9, 'i', 's' }; static const symbol s_8_65[6] = { 'a', 'r', 0xC3, 0xA9, 'i', 's' }; static const symbol s_8_66[6] = { 'e', 'r', 0xC3, 0xA9, 'i', 's' }; static const symbol s_8_67[6] = { 'i', 'r', 0xC3, 0xA9, 'i', 's' }; static const symbol s_8_68[4] = { 'a', 'd', 'o', 's' }; static const symbol s_8_69[4] = { 'i', 'd', 'o', 's' }; static const symbol s_8_70[4] = { 'a', 'm', 'o', 's' }; static const symbol s_8_71[7] = { 0xC3, 0xA1, 'b', 'a', 'm', 'o', 's' }; static const symbol s_8_72[7] = { 0xC3, 0xA1, 'r', 'a', 'm', 'o', 's' }; static const symbol s_8_73[8] = { 'i', 0xC3, 0xA9, 'r', 'a', 'm', 'o', 's' }; static const symbol s_8_74[6] = { 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_8_75[8] = { 'a', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_8_76[8] = { 'e', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_8_77[8] = { 'i', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_8_78[4] = { 'e', 'm', 'o', 's' }; static const symbol s_8_79[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_80[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_81[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_82[7] = { 0xC3, 0xA1, 's', 'e', 'm', 'o', 's' }; static const symbol s_8_83[8] = { 'i', 0xC3, 0xA9, 's', 'e', 'm', 'o', 's' }; static const symbol s_8_84[4] = { 'i', 'm', 'o', 's' }; static const symbol s_8_85[5] = { 'a', 'r', 0xC3, 0xA1, 's' }; static const symbol s_8_86[5] = { 'e', 'r', 0xC3, 0xA1, 's' }; static const symbol s_8_87[5] = { 'i', 'r', 0xC3, 0xA1, 's' }; static const symbol s_8_88[3] = { 0xC3, 0xAD, 's' }; static const symbol s_8_89[4] = { 'a', 'r', 0xC3, 0xA1 }; static const symbol s_8_90[4] = { 'e', 'r', 0xC3, 0xA1 }; static const symbol s_8_91[4] = { 'i', 'r', 0xC3, 0xA1 }; static const symbol s_8_92[4] = { 'a', 'r', 0xC3, 0xA9 }; static const symbol s_8_93[4] = { 'e', 'r', 0xC3, 0xA9 }; static const symbol s_8_94[4] = { 'i', 'r', 0xC3, 0xA9 }; static const symbol s_8_95[3] = { 'i', 0xC3, 0xB3 }; static const struct among a_8[96] = { /* 0 */ { 3, s_8_0, -1, 2, 0}, /* 1 */ { 3, s_8_1, -1, 2, 0}, /* 2 */ { 3, s_8_2, -1, 2, 0}, /* 3 */ { 3, s_8_3, -1, 2, 0}, /* 4 */ { 4, s_8_4, -1, 2, 0}, /* 5 */ { 3, s_8_5, -1, 2, 0}, /* 6 */ { 5, s_8_6, 5, 2, 0}, /* 7 */ { 5, s_8_7, 5, 2, 0}, /* 8 */ { 5, s_8_8, 5, 2, 0}, /* 9 */ { 2, s_8_9, -1, 2, 0}, /* 10 */ { 2, s_8_10, -1, 2, 0}, /* 11 */ { 2, s_8_11, -1, 2, 0}, /* 12 */ { 3, s_8_12, -1, 2, 0}, /* 13 */ { 4, s_8_13, -1, 2, 0}, /* 14 */ { 4, s_8_14, -1, 2, 0}, /* 15 */ { 4, s_8_15, -1, 2, 0}, /* 16 */ { 2, s_8_16, -1, 2, 0}, /* 17 */ { 4, s_8_17, 16, 2, 0}, /* 18 */ { 4, s_8_18, 16, 2, 0}, /* 19 */ { 5, s_8_19, 16, 2, 0}, /* 20 */ { 4, s_8_20, 16, 2, 0}, /* 21 */ { 6, s_8_21, 20, 2, 0}, /* 22 */ { 6, s_8_22, 20, 2, 0}, /* 23 */ { 6, s_8_23, 20, 2, 0}, /* 24 */ { 2, s_8_24, -1, 1, 0}, /* 25 */ { 4, s_8_25, 24, 2, 0}, /* 26 */ { 5, s_8_26, 24, 2, 0}, /* 27 */ { 4, s_8_27, -1, 2, 0}, /* 28 */ { 5, s_8_28, -1, 2, 0}, /* 29 */ { 5, s_8_29, -1, 2, 0}, /* 30 */ { 5, s_8_30, -1, 2, 0}, /* 31 */ { 5, s_8_31, -1, 2, 0}, /* 32 */ { 3, s_8_32, -1, 2, 0}, /* 33 */ { 3, s_8_33, -1, 2, 0}, /* 34 */ { 4, s_8_34, -1, 2, 0}, /* 35 */ { 5, s_8_35, -1, 2, 0}, /* 36 */ { 2, s_8_36, -1, 2, 0}, /* 37 */ { 2, s_8_37, -1, 2, 0}, /* 38 */ { 2, s_8_38, -1, 2, 0}, /* 39 */ { 2, s_8_39, -1, 2, 0}, /* 40 */ { 4, s_8_40, 39, 2, 0}, /* 41 */ { 4, s_8_41, 39, 2, 0}, /* 42 */ { 4, s_8_42, 39, 2, 0}, /* 43 */ { 4, s_8_43, 39, 2, 0}, /* 44 */ { 5, s_8_44, 39, 2, 0}, /* 45 */ { 4, s_8_45, 39, 2, 0}, /* 46 */ { 6, s_8_46, 45, 2, 0}, /* 47 */ { 6, s_8_47, 45, 2, 0}, /* 48 */ { 6, s_8_48, 45, 2, 0}, /* 49 */ { 2, s_8_49, -1, 1, 0}, /* 50 */ { 4, s_8_50, 49, 2, 0}, /* 51 */ { 5, s_8_51, 49, 2, 0}, /* 52 */ { 5, s_8_52, -1, 2, 0}, /* 53 */ { 5, s_8_53, -1, 2, 0}, /* 54 */ { 6, s_8_54, -1, 2, 0}, /* 55 */ { 5, s_8_55, -1, 2, 0}, /* 56 */ { 7, s_8_56, 55, 2, 0}, /* 57 */ { 7, s_8_57, 55, 2, 0}, /* 58 */ { 7, s_8_58, 55, 2, 0}, /* 59 */ { 5, s_8_59, -1, 2, 0}, /* 60 */ { 6, s_8_60, -1, 2, 0}, /* 61 */ { 6, s_8_61, -1, 2, 0}, /* 62 */ { 6, s_8_62, -1, 2, 0}, /* 63 */ { 4, s_8_63, -1, 2, 0}, /* 64 */ { 4, s_8_64, -1, 1, 0}, /* 65 */ { 6, s_8_65, 64, 2, 0}, /* 66 */ { 6, s_8_66, 64, 2, 0}, /* 67 */ { 6, s_8_67, 64, 2, 0}, /* 68 */ { 4, s_8_68, -1, 2, 0}, /* 69 */ { 4, s_8_69, -1, 2, 0}, /* 70 */ { 4, s_8_70, -1, 2, 0}, /* 71 */ { 7, s_8_71, 70, 2, 0}, /* 72 */ { 7, s_8_72, 70, 2, 0}, /* 73 */ { 8, s_8_73, 70, 2, 0}, /* 74 */ { 6, s_8_74, 70, 2, 0}, /* 75 */ { 8, s_8_75, 74, 2, 0}, /* 76 */ { 8, s_8_76, 74, 2, 0}, /* 77 */ { 8, s_8_77, 74, 2, 0}, /* 78 */ { 4, s_8_78, -1, 1, 0}, /* 79 */ { 6, s_8_79, 78, 2, 0}, /* 80 */ { 6, s_8_80, 78, 2, 0}, /* 81 */ { 6, s_8_81, 78, 2, 0}, /* 82 */ { 7, s_8_82, 78, 2, 0}, /* 83 */ { 8, s_8_83, 78, 2, 0}, /* 84 */ { 4, s_8_84, -1, 2, 0}, /* 85 */ { 5, s_8_85, -1, 2, 0}, /* 86 */ { 5, s_8_86, -1, 2, 0}, /* 87 */ { 5, s_8_87, -1, 2, 0}, /* 88 */ { 3, s_8_88, -1, 2, 0}, /* 89 */ { 4, s_8_89, -1, 2, 0}, /* 90 */ { 4, s_8_90, -1, 2, 0}, /* 91 */ { 4, s_8_91, -1, 2, 0}, /* 92 */ { 4, s_8_92, -1, 2, 0}, /* 93 */ { 4, s_8_93, -1, 2, 0}, /* 94 */ { 4, s_8_94, -1, 2, 0}, /* 95 */ { 3, s_8_95, -1, 2, 0} }; static const symbol s_9_0[1] = { 'a' }; static const symbol s_9_1[1] = { 'e' }; static const symbol s_9_2[1] = { 'o' }; static const symbol s_9_3[2] = { 'o', 's' }; static const symbol s_9_4[2] = { 0xC3, 0xA1 }; static const symbol s_9_5[2] = { 0xC3, 0xA9 }; static const symbol s_9_6[2] = { 0xC3, 0xAD }; static const symbol s_9_7[2] = { 0xC3, 0xB3 }; static const struct among a_9[8] = { /* 0 */ { 1, s_9_0, -1, 1, 0}, /* 1 */ { 1, s_9_1, -1, 2, 0}, /* 2 */ { 1, s_9_2, -1, 1, 0}, /* 3 */ { 2, s_9_3, -1, 1, 0}, /* 4 */ { 2, s_9_4, -1, 1, 0}, /* 5 */ { 2, s_9_5, -1, 2, 0}, /* 6 */ { 2, s_9_6, -1, 1, 0}, /* 7 */ { 2, s_9_7, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'o' }; static const symbol s_4[] = { 'u' }; static const symbol s_5[] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_6[] = { 'a', 'n', 'd', 'o' }; static const symbol s_7[] = { 'a', 'r' }; static const symbol s_8[] = { 'e', 'r' }; static const symbol s_9[] = { 'i', 'r' }; static const symbol s_10[] = { 'u' }; static const symbol s_11[] = { 'i', 'c' }; static const symbol s_12[] = { 'l', 'o', 'g' }; static const symbol s_13[] = { 'u' }; static const symbol s_14[] = { 'e', 'n', 't', 'e' }; static const symbol s_15[] = { 'a', 't' }; static const symbol s_16[] = { 'a', 't' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'u' }; static const symbol s_19[] = { 'g' }; static const symbol s_20[] = { 'u' }; static const symbol s_21[] = { 'g' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 37 */ { int c2 = z->c; /* or, line 39 */ if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab2; { int c3 = z->c; /* or, line 38 */ if (out_grouping_U(z, g_v, 97, 252, 0)) goto lab4; { /* gopast */ /* grouping v, line 38 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab2; { /* gopast */ /* non v, line 38 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping_U(z, g_v, 97, 252, 0)) goto lab0; { int c4 = z->c; /* or, line 40 */ if (out_grouping_U(z, g_v, 97, 252, 0)) goto lab6; { /* gopast */ /* grouping v, line 40 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab0; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 40 */ } } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 41 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 43 */ { /* gopast */ /* grouping v, line 44 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 44 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 44 */ { /* gopast */ /* grouping v, line 45 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 45 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 45 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 49 */ int c1 = z->c; z->bra = z->c; /* [, line 50 */ if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((67641858 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else among_var = find_among(z, a_0, 6); /* substring, line 50 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 50 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 51 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 52 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 53 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 54 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 55 */ if (ret < 0) return ret; } break; case 6: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 57 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_attached_pronoun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 68 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((557090 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_1, 13))) return 0; /* substring, line 68 */ z->bra = z->c; /* ], line 68 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; among_var = find_among_b(z, a_2, 11); /* substring, line 72 */ if (!(among_var)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 72 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: z->bra = z->c; /* ], line 73 */ { int ret = slice_from_s(z, 5, s_5); /* <-, line 73 */ if (ret < 0) return ret; } break; case 2: z->bra = z->c; /* ], line 74 */ { int ret = slice_from_s(z, 4, s_6); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: z->bra = z->c; /* ], line 75 */ { int ret = slice_from_s(z, 2, s_7); /* <-, line 75 */ if (ret < 0) return ret; } break; case 4: z->bra = z->c; /* ], line 76 */ { int ret = slice_from_s(z, 2, s_8); /* <-, line 76 */ if (ret < 0) return ret; } break; case 5: z->bra = z->c; /* ], line 77 */ { int ret = slice_from_s(z, 2, s_9); /* <-, line 77 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } break; case 7: if (!(eq_s_b(z, 1, s_10))) return 0; { int ret = slice_del(z); /* delete, line 82 */ if (ret < 0) return ret; } break; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 87 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((835634 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_6, 46); /* substring, line 87 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 87 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 99 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 105 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 105 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 106 */ z->ket = z->c; /* [, line 106 */ if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 106 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 106 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 106 */ if (ret < 0) return ret; } lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 111 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_12); /* <-, line 111 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 115 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_13); /* <-, line 115 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 119 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_14); /* <-, line 119 */ if (ret < 0) return ret; } break; case 6: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 123 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 124 */ z->ket = z->c; /* [, line 125 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_3, 4); /* substring, line 125 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 125 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 125 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: z->ket = z->c; /* [, line 126 */ if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 126 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 126 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 126 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 135 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 135 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 136 */ z->ket = z->c; /* [, line 137 */ if (z->c - 3 <= z->lb || z->p[z->c - 1] != 101) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_4, 3); /* substring, line 137 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 137 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 140 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 140 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 147 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 148 */ z->ket = z->c; /* [, line 149 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; } among_var = find_among_b(z, a_5, 3); /* substring, line 149 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 149 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab3; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 152 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } break; } lab3: ; } break; case 9: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 159 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 159 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 160 */ z->ket = z->c; /* [, line 161 */ if (!(eq_s_b(z, 2, s_16))) { z->c = z->l - m_keep; goto lab4; } z->bra = z->c; /* ], line 161 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 161 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 161 */ if (ret < 0) return ret; } lab4: ; } break; } return 1; } static int r_y_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 168 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 168 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 168 */ among_var = find_among_b(z, a_7, 12); /* substring, line 168 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 168 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (!(eq_s_b(z, 1, s_17))) return 0; { int ret = slice_del(z); /* delete, line 171 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 176 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 176 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 176 */ among_var = find_among_b(z, a_8, 96); /* substring, line 176 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 176 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 179 */ if (!(eq_s_b(z, 1, s_18))) { z->c = z->l - m_keep; goto lab0; } { int m_test = z->l - z->c; /* test, line 179 */ if (!(eq_s_b(z, 1, s_19))) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } lab0: ; } z->bra = z->c; /* ], line 179 */ { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 200 */ if (ret < 0) return ret; } break; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 205 */ among_var = find_among_b(z, a_9, 8); /* substring, line 205 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 205 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 208 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 208 */ if (ret < 0) return ret; } break; case 2: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 210 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 210 */ z->ket = z->c; /* [, line 210 */ if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 210 */ { int m_test = z->l - z->c; /* test, line 210 */ if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 210 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } lab0: ; } break; } return 1; } extern int spanish_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 216 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 216 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 217 */ { int m2 = z->l - z->c; (void)m2; /* do, line 218 */ { int ret = r_attached_pronoun(z); if (ret == 0) goto lab1; /* call attached_pronoun, line 218 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 219 */ { int m4 = z->l - z->c; (void)m4; /* or, line 219 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab4; /* call standard_suffix, line 219 */ if (ret < 0) return ret; } goto lab3; lab4: z->c = z->l - m4; { int ret = r_y_verb_suffix(z); if (ret == 0) goto lab5; /* call y_verb_suffix, line 220 */ if (ret < 0) return ret; } goto lab3; lab5: z->c = z->l - m4; { int ret = r_verb_suffix(z); if (ret == 0) goto lab2; /* call verb_suffix, line 221 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m5 = z->l - z->c; (void)m5; /* do, line 223 */ { int ret = r_residual_suffix(z); if (ret == 0) goto lab6; /* call residual_suffix, line 223 */ if (ret < 0) return ret; } lab6: z->c = z->l - m5; } z->c = z->lb; { int c6 = z->c; /* do, line 225 */ { int ret = r_postlude(z); if (ret == 0) goto lab7; /* call postlude, line 225 */ if (ret < 0) return ret; } lab7: z->c = c6; } return 1; } extern struct SN_env * spanish_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void spanish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_spanish.h000066400000000000000000000004711217574114600307360ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * spanish_UTF_8_create_env(void); extern void spanish_UTF_8_close_env(struct SN_env * z); extern int spanish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_swedish.c000066400000000000000000000246741217574114600307450ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int swedish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * swedish_UTF_8_create_env(void); extern void swedish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'a' }; static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; static const symbol s_0_5[2] = { 'a', 'd' }; static const symbol s_0_6[1] = { 'e' }; static const symbol s_0_7[3] = { 'a', 'd', 'e' }; static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; static const symbol s_0_10[3] = { 'a', 'r', 'e' }; static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; static const symbol s_0_12[2] = { 'e', 'n' }; static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; static const symbol s_0_16[3] = { 'e', 'r', 'n' }; static const symbol s_0_17[2] = { 'a', 'r' }; static const symbol s_0_18[2] = { 'e', 'r' }; static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; static const symbol s_0_20[2] = { 'o', 'r' }; static const symbol s_0_21[1] = { 's' }; static const symbol s_0_22[2] = { 'a', 's' }; static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; static const symbol s_0_26[2] = { 'e', 's' }; static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; static const symbol s_0_29[3] = { 'e', 'n', 's' }; static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; static const symbol s_0_33[2] = { 'a', 't' }; static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; static const symbol s_0_35[3] = { 'h', 'e', 't' }; static const symbol s_0_36[3] = { 'a', 's', 't' }; static const struct among a_0[37] = { /* 0 */ { 1, s_0_0, -1, 1, 0}, /* 1 */ { 4, s_0_1, 0, 1, 0}, /* 2 */ { 4, s_0_2, 0, 1, 0}, /* 3 */ { 7, s_0_3, 2, 1, 0}, /* 4 */ { 4, s_0_4, 0, 1, 0}, /* 5 */ { 2, s_0_5, -1, 1, 0}, /* 6 */ { 1, s_0_6, -1, 1, 0}, /* 7 */ { 3, s_0_7, 6, 1, 0}, /* 8 */ { 4, s_0_8, 6, 1, 0}, /* 9 */ { 4, s_0_9, 6, 1, 0}, /* 10 */ { 3, s_0_10, 6, 1, 0}, /* 11 */ { 4, s_0_11, 6, 1, 0}, /* 12 */ { 2, s_0_12, -1, 1, 0}, /* 13 */ { 5, s_0_13, 12, 1, 0}, /* 14 */ { 4, s_0_14, 12, 1, 0}, /* 15 */ { 5, s_0_15, 12, 1, 0}, /* 16 */ { 3, s_0_16, -1, 1, 0}, /* 17 */ { 2, s_0_17, -1, 1, 0}, /* 18 */ { 2, s_0_18, -1, 1, 0}, /* 19 */ { 5, s_0_19, 18, 1, 0}, /* 20 */ { 2, s_0_20, -1, 1, 0}, /* 21 */ { 1, s_0_21, -1, 2, 0}, /* 22 */ { 2, s_0_22, 21, 1, 0}, /* 23 */ { 5, s_0_23, 22, 1, 0}, /* 24 */ { 5, s_0_24, 22, 1, 0}, /* 25 */ { 5, s_0_25, 22, 1, 0}, /* 26 */ { 2, s_0_26, 21, 1, 0}, /* 27 */ { 4, s_0_27, 26, 1, 0}, /* 28 */ { 5, s_0_28, 26, 1, 0}, /* 29 */ { 3, s_0_29, 21, 1, 0}, /* 30 */ { 5, s_0_30, 29, 1, 0}, /* 31 */ { 6, s_0_31, 29, 1, 0}, /* 32 */ { 4, s_0_32, 21, 1, 0}, /* 33 */ { 2, s_0_33, -1, 1, 0}, /* 34 */ { 5, s_0_34, -1, 1, 0}, /* 35 */ { 3, s_0_35, -1, 1, 0}, /* 36 */ { 3, s_0_36, -1, 1, 0} }; static const symbol s_1_0[2] = { 'd', 'd' }; static const symbol s_1_1[2] = { 'g', 'd' }; static const symbol s_1_2[2] = { 'n', 'n' }; static const symbol s_1_3[2] = { 'd', 't' }; static const symbol s_1_4[2] = { 'g', 't' }; static const symbol s_1_5[2] = { 'k', 't' }; static const symbol s_1_6[2] = { 't', 't' }; static const struct among a_1[7] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 2, s_1_4, -1, -1, 0}, /* 5 */ { 2, s_1_5, -1, -1, 0}, /* 6 */ { 2, s_1_6, -1, -1, 0} }; static const symbol s_2_0[2] = { 'i', 'g' }; static const symbol s_2_1[3] = { 'l', 'i', 'g' }; static const symbol s_2_2[3] = { 'e', 'l', 's' }; static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; static const symbol s_2_4[5] = { 'l', 0xC3, 0xB6, 's', 't' }; static const struct among a_2[5] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 3, s_2_2, -1, 1, 0}, /* 3 */ { 5, s_2_3, -1, 3, 0}, /* 4 */ { 5, s_2_4, -1, 2, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; static const unsigned char g_s_ending[] = { 119, 127, 149 }; static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' }; static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 29 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) return 0; z->c = ret; /* hop, line 29 */ } z->I[1] = z->c; /* setmark x, line 29 */ z->c = c_test; } if (out_grouping_U(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ { /* gopast */ /* non v, line 30 */ int ret = in_grouping_U(z, g_v, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 30 */ /* try, line 31 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 37 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 37 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 37 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 37 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b_U(z, g_s_ending, 98, 121, 0)) return 0; { int ret = slice_del(z); /* delete, line 46 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int mlimit; /* setlimit, line 50 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 50 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ z->c = z->l - m2; z->ket = z->c; /* [, line 52 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) { z->lb = mlimit; return 0; } z->c = ret; /* next, line 52 */ } z->bra = z->c; /* ], line 52 */ { int ret = slice_del(z); /* delete, line 52 */ if (ret < 0) return ret; } } z->lb = mlimit; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 55 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 55 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 56 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 56 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 57 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_0); /* <-, line 58 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } extern int swedish_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 66 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 66 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 67 */ { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 68 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 70 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } z->c = z->lb; return 1; } extern struct SN_env * swedish_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } extern void swedish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_swedish.h000066400000000000000000000004711217574114600307370ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * swedish_UTF_8_create_env(void); extern void swedish_UTF_8_close_env(struct SN_env * z); extern int swedish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_turkish.c000066400000000000000000002360321217574114600307610ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int turkish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_stem_suffix_chain_before_ki(struct SN_env * z); static int r_stem_noun_suffixes(struct SN_env * z); static int r_stem_nominal_verb_suffixes(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_post_process_last_consonants(struct SN_env * z); static int r_more_than_one_syllable_word(struct SN_env * z); static int r_mark_suffix_with_optional_s_consonant(struct SN_env * z); static int r_mark_suffix_with_optional_n_consonant(struct SN_env * z); static int r_mark_suffix_with_optional_U_vowel(struct SN_env * z); static int r_mark_suffix_with_optional_y_consonant(struct SN_env * z); static int r_mark_ysA(struct SN_env * z); static int r_mark_ymUs_(struct SN_env * z); static int r_mark_yken(struct SN_env * z); static int r_mark_yDU(struct SN_env * z); static int r_mark_yUz(struct SN_env * z); static int r_mark_yUm(struct SN_env * z); static int r_mark_yU(struct SN_env * z); static int r_mark_ylA(struct SN_env * z); static int r_mark_yA(struct SN_env * z); static int r_mark_possessives(struct SN_env * z); static int r_mark_sUnUz(struct SN_env * z); static int r_mark_sUn(struct SN_env * z); static int r_mark_sU(struct SN_env * z); static int r_mark_nUz(struct SN_env * z); static int r_mark_nUn(struct SN_env * z); static int r_mark_nU(struct SN_env * z); static int r_mark_ndAn(struct SN_env * z); static int r_mark_ndA(struct SN_env * z); static int r_mark_ncA(struct SN_env * z); static int r_mark_nA(struct SN_env * z); static int r_mark_lArI(struct SN_env * z); static int r_mark_lAr(struct SN_env * z); static int r_mark_ki(struct SN_env * z); static int r_mark_DUr(struct SN_env * z); static int r_mark_DAn(struct SN_env * z); static int r_mark_DA(struct SN_env * z); static int r_mark_cAsInA(struct SN_env * z); static int r_is_reserved_word(struct SN_env * z); static int r_check_vowel_harmony(struct SN_env * z); static int r_append_U_to_stems_ending_with_d_or_g(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * turkish_UTF_8_create_env(void); extern void turkish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'm' }; static const symbol s_0_1[1] = { 'n' }; static const symbol s_0_2[3] = { 'm', 'i', 'z' }; static const symbol s_0_3[3] = { 'n', 'i', 'z' }; static const symbol s_0_4[3] = { 'm', 'u', 'z' }; static const symbol s_0_5[3] = { 'n', 'u', 'z' }; static const symbol s_0_6[4] = { 'm', 0xC4, 0xB1, 'z' }; static const symbol s_0_7[4] = { 'n', 0xC4, 0xB1, 'z' }; static const symbol s_0_8[4] = { 'm', 0xC3, 0xBC, 'z' }; static const symbol s_0_9[4] = { 'n', 0xC3, 0xBC, 'z' }; static const struct among a_0[10] = { /* 0 */ { 1, s_0_0, -1, -1, 0}, /* 1 */ { 1, s_0_1, -1, -1, 0}, /* 2 */ { 3, s_0_2, -1, -1, 0}, /* 3 */ { 3, s_0_3, -1, -1, 0}, /* 4 */ { 3, s_0_4, -1, -1, 0}, /* 5 */ { 3, s_0_5, -1, -1, 0}, /* 6 */ { 4, s_0_6, -1, -1, 0}, /* 7 */ { 4, s_0_7, -1, -1, 0}, /* 8 */ { 4, s_0_8, -1, -1, 0}, /* 9 */ { 4, s_0_9, -1, -1, 0} }; static const symbol s_1_0[4] = { 'l', 'e', 'r', 'i' }; static const symbol s_1_1[5] = { 'l', 'a', 'r', 0xC4, 0xB1 }; static const struct among a_1[2] = { /* 0 */ { 4, s_1_0, -1, -1, 0}, /* 1 */ { 5, s_1_1, -1, -1, 0} }; static const symbol s_2_0[2] = { 'n', 'i' }; static const symbol s_2_1[2] = { 'n', 'u' }; static const symbol s_2_2[3] = { 'n', 0xC4, 0xB1 }; static const symbol s_2_3[3] = { 'n', 0xC3, 0xBC }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 3, s_2_2, -1, -1, 0}, /* 3 */ { 3, s_2_3, -1, -1, 0} }; static const symbol s_3_0[2] = { 'i', 'n' }; static const symbol s_3_1[2] = { 'u', 'n' }; static const symbol s_3_2[3] = { 0xC4, 0xB1, 'n' }; static const symbol s_3_3[3] = { 0xC3, 0xBC, 'n' }; static const struct among a_3[4] = { /* 0 */ { 2, s_3_0, -1, -1, 0}, /* 1 */ { 2, s_3_1, -1, -1, 0}, /* 2 */ { 3, s_3_2, -1, -1, 0}, /* 3 */ { 3, s_3_3, -1, -1, 0} }; static const symbol s_4_0[1] = { 'a' }; static const symbol s_4_1[1] = { 'e' }; static const struct among a_4[2] = { /* 0 */ { 1, s_4_0, -1, -1, 0}, /* 1 */ { 1, s_4_1, -1, -1, 0} }; static const symbol s_5_0[2] = { 'n', 'a' }; static const symbol s_5_1[2] = { 'n', 'e' }; static const struct among a_5[2] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0} }; static const symbol s_6_0[2] = { 'd', 'a' }; static const symbol s_6_1[2] = { 't', 'a' }; static const symbol s_6_2[2] = { 'd', 'e' }; static const symbol s_6_3[2] = { 't', 'e' }; static const struct among a_6[4] = { /* 0 */ { 2, s_6_0, -1, -1, 0}, /* 1 */ { 2, s_6_1, -1, -1, 0}, /* 2 */ { 2, s_6_2, -1, -1, 0}, /* 3 */ { 2, s_6_3, -1, -1, 0} }; static const symbol s_7_0[3] = { 'n', 'd', 'a' }; static const symbol s_7_1[3] = { 'n', 'd', 'e' }; static const struct among a_7[2] = { /* 0 */ { 3, s_7_0, -1, -1, 0}, /* 1 */ { 3, s_7_1, -1, -1, 0} }; static const symbol s_8_0[3] = { 'd', 'a', 'n' }; static const symbol s_8_1[3] = { 't', 'a', 'n' }; static const symbol s_8_2[3] = { 'd', 'e', 'n' }; static const symbol s_8_3[3] = { 't', 'e', 'n' }; static const struct among a_8[4] = { /* 0 */ { 3, s_8_0, -1, -1, 0}, /* 1 */ { 3, s_8_1, -1, -1, 0}, /* 2 */ { 3, s_8_2, -1, -1, 0}, /* 3 */ { 3, s_8_3, -1, -1, 0} }; static const symbol s_9_0[4] = { 'n', 'd', 'a', 'n' }; static const symbol s_9_1[4] = { 'n', 'd', 'e', 'n' }; static const struct among a_9[2] = { /* 0 */ { 4, s_9_0, -1, -1, 0}, /* 1 */ { 4, s_9_1, -1, -1, 0} }; static const symbol s_10_0[2] = { 'l', 'a' }; static const symbol s_10_1[2] = { 'l', 'e' }; static const struct among a_10[2] = { /* 0 */ { 2, s_10_0, -1, -1, 0}, /* 1 */ { 2, s_10_1, -1, -1, 0} }; static const symbol s_11_0[2] = { 'c', 'a' }; static const symbol s_11_1[2] = { 'c', 'e' }; static const struct among a_11[2] = { /* 0 */ { 2, s_11_0, -1, -1, 0}, /* 1 */ { 2, s_11_1, -1, -1, 0} }; static const symbol s_12_0[2] = { 'i', 'm' }; static const symbol s_12_1[2] = { 'u', 'm' }; static const symbol s_12_2[3] = { 0xC4, 0xB1, 'm' }; static const symbol s_12_3[3] = { 0xC3, 0xBC, 'm' }; static const struct among a_12[4] = { /* 0 */ { 2, s_12_0, -1, -1, 0}, /* 1 */ { 2, s_12_1, -1, -1, 0}, /* 2 */ { 3, s_12_2, -1, -1, 0}, /* 3 */ { 3, s_12_3, -1, -1, 0} }; static const symbol s_13_0[3] = { 's', 'i', 'n' }; static const symbol s_13_1[3] = { 's', 'u', 'n' }; static const symbol s_13_2[4] = { 's', 0xC4, 0xB1, 'n' }; static const symbol s_13_3[4] = { 's', 0xC3, 0xBC, 'n' }; static const struct among a_13[4] = { /* 0 */ { 3, s_13_0, -1, -1, 0}, /* 1 */ { 3, s_13_1, -1, -1, 0}, /* 2 */ { 4, s_13_2, -1, -1, 0}, /* 3 */ { 4, s_13_3, -1, -1, 0} }; static const symbol s_14_0[2] = { 'i', 'z' }; static const symbol s_14_1[2] = { 'u', 'z' }; static const symbol s_14_2[3] = { 0xC4, 0xB1, 'z' }; static const symbol s_14_3[3] = { 0xC3, 0xBC, 'z' }; static const struct among a_14[4] = { /* 0 */ { 2, s_14_0, -1, -1, 0}, /* 1 */ { 2, s_14_1, -1, -1, 0}, /* 2 */ { 3, s_14_2, -1, -1, 0}, /* 3 */ { 3, s_14_3, -1, -1, 0} }; static const symbol s_15_0[5] = { 's', 'i', 'n', 'i', 'z' }; static const symbol s_15_1[5] = { 's', 'u', 'n', 'u', 'z' }; static const symbol s_15_2[7] = { 's', 0xC4, 0xB1, 'n', 0xC4, 0xB1, 'z' }; static const symbol s_15_3[7] = { 's', 0xC3, 0xBC, 'n', 0xC3, 0xBC, 'z' }; static const struct among a_15[4] = { /* 0 */ { 5, s_15_0, -1, -1, 0}, /* 1 */ { 5, s_15_1, -1, -1, 0}, /* 2 */ { 7, s_15_2, -1, -1, 0}, /* 3 */ { 7, s_15_3, -1, -1, 0} }; static const symbol s_16_0[3] = { 'l', 'a', 'r' }; static const symbol s_16_1[3] = { 'l', 'e', 'r' }; static const struct among a_16[2] = { /* 0 */ { 3, s_16_0, -1, -1, 0}, /* 1 */ { 3, s_16_1, -1, -1, 0} }; static const symbol s_17_0[3] = { 'n', 'i', 'z' }; static const symbol s_17_1[3] = { 'n', 'u', 'z' }; static const symbol s_17_2[4] = { 'n', 0xC4, 0xB1, 'z' }; static const symbol s_17_3[4] = { 'n', 0xC3, 0xBC, 'z' }; static const struct among a_17[4] = { /* 0 */ { 3, s_17_0, -1, -1, 0}, /* 1 */ { 3, s_17_1, -1, -1, 0}, /* 2 */ { 4, s_17_2, -1, -1, 0}, /* 3 */ { 4, s_17_3, -1, -1, 0} }; static const symbol s_18_0[3] = { 'd', 'i', 'r' }; static const symbol s_18_1[3] = { 't', 'i', 'r' }; static const symbol s_18_2[3] = { 'd', 'u', 'r' }; static const symbol s_18_3[3] = { 't', 'u', 'r' }; static const symbol s_18_4[4] = { 'd', 0xC4, 0xB1, 'r' }; static const symbol s_18_5[4] = { 't', 0xC4, 0xB1, 'r' }; static const symbol s_18_6[4] = { 'd', 0xC3, 0xBC, 'r' }; static const symbol s_18_7[4] = { 't', 0xC3, 0xBC, 'r' }; static const struct among a_18[8] = { /* 0 */ { 3, s_18_0, -1, -1, 0}, /* 1 */ { 3, s_18_1, -1, -1, 0}, /* 2 */ { 3, s_18_2, -1, -1, 0}, /* 3 */ { 3, s_18_3, -1, -1, 0}, /* 4 */ { 4, s_18_4, -1, -1, 0}, /* 5 */ { 4, s_18_5, -1, -1, 0}, /* 6 */ { 4, s_18_6, -1, -1, 0}, /* 7 */ { 4, s_18_7, -1, -1, 0} }; static const symbol s_19_0[7] = { 'c', 'a', 's', 0xC4, 0xB1, 'n', 'a' }; static const symbol s_19_1[6] = { 'c', 'e', 's', 'i', 'n', 'e' }; static const struct among a_19[2] = { /* 0 */ { 7, s_19_0, -1, -1, 0}, /* 1 */ { 6, s_19_1, -1, -1, 0} }; static const symbol s_20_0[2] = { 'd', 'i' }; static const symbol s_20_1[2] = { 't', 'i' }; static const symbol s_20_2[3] = { 'd', 'i', 'k' }; static const symbol s_20_3[3] = { 't', 'i', 'k' }; static const symbol s_20_4[3] = { 'd', 'u', 'k' }; static const symbol s_20_5[3] = { 't', 'u', 'k' }; static const symbol s_20_6[4] = { 'd', 0xC4, 0xB1, 'k' }; static const symbol s_20_7[4] = { 't', 0xC4, 0xB1, 'k' }; static const symbol s_20_8[4] = { 'd', 0xC3, 0xBC, 'k' }; static const symbol s_20_9[4] = { 't', 0xC3, 0xBC, 'k' }; static const symbol s_20_10[3] = { 'd', 'i', 'm' }; static const symbol s_20_11[3] = { 't', 'i', 'm' }; static const symbol s_20_12[3] = { 'd', 'u', 'm' }; static const symbol s_20_13[3] = { 't', 'u', 'm' }; static const symbol s_20_14[4] = { 'd', 0xC4, 0xB1, 'm' }; static const symbol s_20_15[4] = { 't', 0xC4, 0xB1, 'm' }; static const symbol s_20_16[4] = { 'd', 0xC3, 0xBC, 'm' }; static const symbol s_20_17[4] = { 't', 0xC3, 0xBC, 'm' }; static const symbol s_20_18[3] = { 'd', 'i', 'n' }; static const symbol s_20_19[3] = { 't', 'i', 'n' }; static const symbol s_20_20[3] = { 'd', 'u', 'n' }; static const symbol s_20_21[3] = { 't', 'u', 'n' }; static const symbol s_20_22[4] = { 'd', 0xC4, 0xB1, 'n' }; static const symbol s_20_23[4] = { 't', 0xC4, 0xB1, 'n' }; static const symbol s_20_24[4] = { 'd', 0xC3, 0xBC, 'n' }; static const symbol s_20_25[4] = { 't', 0xC3, 0xBC, 'n' }; static const symbol s_20_26[2] = { 'd', 'u' }; static const symbol s_20_27[2] = { 't', 'u' }; static const symbol s_20_28[3] = { 'd', 0xC4, 0xB1 }; static const symbol s_20_29[3] = { 't', 0xC4, 0xB1 }; static const symbol s_20_30[3] = { 'd', 0xC3, 0xBC }; static const symbol s_20_31[3] = { 't', 0xC3, 0xBC }; static const struct among a_20[32] = { /* 0 */ { 2, s_20_0, -1, -1, 0}, /* 1 */ { 2, s_20_1, -1, -1, 0}, /* 2 */ { 3, s_20_2, -1, -1, 0}, /* 3 */ { 3, s_20_3, -1, -1, 0}, /* 4 */ { 3, s_20_4, -1, -1, 0}, /* 5 */ { 3, s_20_5, -1, -1, 0}, /* 6 */ { 4, s_20_6, -1, -1, 0}, /* 7 */ { 4, s_20_7, -1, -1, 0}, /* 8 */ { 4, s_20_8, -1, -1, 0}, /* 9 */ { 4, s_20_9, -1, -1, 0}, /* 10 */ { 3, s_20_10, -1, -1, 0}, /* 11 */ { 3, s_20_11, -1, -1, 0}, /* 12 */ { 3, s_20_12, -1, -1, 0}, /* 13 */ { 3, s_20_13, -1, -1, 0}, /* 14 */ { 4, s_20_14, -1, -1, 0}, /* 15 */ { 4, s_20_15, -1, -1, 0}, /* 16 */ { 4, s_20_16, -1, -1, 0}, /* 17 */ { 4, s_20_17, -1, -1, 0}, /* 18 */ { 3, s_20_18, -1, -1, 0}, /* 19 */ { 3, s_20_19, -1, -1, 0}, /* 20 */ { 3, s_20_20, -1, -1, 0}, /* 21 */ { 3, s_20_21, -1, -1, 0}, /* 22 */ { 4, s_20_22, -1, -1, 0}, /* 23 */ { 4, s_20_23, -1, -1, 0}, /* 24 */ { 4, s_20_24, -1, -1, 0}, /* 25 */ { 4, s_20_25, -1, -1, 0}, /* 26 */ { 2, s_20_26, -1, -1, 0}, /* 27 */ { 2, s_20_27, -1, -1, 0}, /* 28 */ { 3, s_20_28, -1, -1, 0}, /* 29 */ { 3, s_20_29, -1, -1, 0}, /* 30 */ { 3, s_20_30, -1, -1, 0}, /* 31 */ { 3, s_20_31, -1, -1, 0} }; static const symbol s_21_0[2] = { 's', 'a' }; static const symbol s_21_1[2] = { 's', 'e' }; static const symbol s_21_2[3] = { 's', 'a', 'k' }; static const symbol s_21_3[3] = { 's', 'e', 'k' }; static const symbol s_21_4[3] = { 's', 'a', 'm' }; static const symbol s_21_5[3] = { 's', 'e', 'm' }; static const symbol s_21_6[3] = { 's', 'a', 'n' }; static const symbol s_21_7[3] = { 's', 'e', 'n' }; static const struct among a_21[8] = { /* 0 */ { 2, s_21_0, -1, -1, 0}, /* 1 */ { 2, s_21_1, -1, -1, 0}, /* 2 */ { 3, s_21_2, -1, -1, 0}, /* 3 */ { 3, s_21_3, -1, -1, 0}, /* 4 */ { 3, s_21_4, -1, -1, 0}, /* 5 */ { 3, s_21_5, -1, -1, 0}, /* 6 */ { 3, s_21_6, -1, -1, 0}, /* 7 */ { 3, s_21_7, -1, -1, 0} }; static const symbol s_22_0[4] = { 'm', 'i', 0xC5, 0x9F }; static const symbol s_22_1[4] = { 'm', 'u', 0xC5, 0x9F }; static const symbol s_22_2[5] = { 'm', 0xC4, 0xB1, 0xC5, 0x9F }; static const symbol s_22_3[5] = { 'm', 0xC3, 0xBC, 0xC5, 0x9F }; static const struct among a_22[4] = { /* 0 */ { 4, s_22_0, -1, -1, 0}, /* 1 */ { 4, s_22_1, -1, -1, 0}, /* 2 */ { 5, s_22_2, -1, -1, 0}, /* 3 */ { 5, s_22_3, -1, -1, 0} }; static const symbol s_23_0[1] = { 'b' }; static const symbol s_23_1[1] = { 'c' }; static const symbol s_23_2[1] = { 'd' }; static const symbol s_23_3[2] = { 0xC4, 0x9F }; static const struct among a_23[4] = { /* 0 */ { 1, s_23_0, -1, 1, 0}, /* 1 */ { 1, s_23_1, -1, 2, 0}, /* 2 */ { 1, s_23_2, -1, 3, 0}, /* 3 */ { 2, s_23_3, -1, 4, 0} }; static const unsigned char g_vowel[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 8, 0, 0, 0, 0, 0, 0, 1 }; static const unsigned char g_U[] = { 1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 1 }; static const unsigned char g_vowel1[] = { 1, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; static const unsigned char g_vowel2[] = { 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 }; static const unsigned char g_vowel3[] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; static const unsigned char g_vowel4[] = { 17 }; static const unsigned char g_vowel5[] = { 65 }; static const unsigned char g_vowel6[] = { 65 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 0xC4, 0xB1 }; static const symbol s_3[] = { 'i' }; static const symbol s_4[] = { 'o' }; static const symbol s_5[] = { 0xC3, 0xB6 }; static const symbol s_6[] = { 'u' }; static const symbol s_7[] = { 0xC3, 0xBC }; static const symbol s_8[] = { 'n' }; static const symbol s_9[] = { 'n' }; static const symbol s_10[] = { 's' }; static const symbol s_11[] = { 's' }; static const symbol s_12[] = { 'y' }; static const symbol s_13[] = { 'y' }; static const symbol s_14[] = { 'k', 'i' }; static const symbol s_15[] = { 'k', 'e', 'n' }; static const symbol s_16[] = { 'p' }; static const symbol s_17[] = { 0xC3, 0xA7 }; static const symbol s_18[] = { 't' }; static const symbol s_19[] = { 'k' }; static const symbol s_20[] = { 'd' }; static const symbol s_21[] = { 'g' }; static const symbol s_22[] = { 'a' }; static const symbol s_23[] = { 0xC4, 0xB1 }; static const symbol s_24[] = { 0xC4, 0xB1 }; static const symbol s_25[] = { 'e' }; static const symbol s_26[] = { 'i' }; static const symbol s_27[] = { 'i' }; static const symbol s_28[] = { 'o' }; static const symbol s_29[] = { 'u' }; static const symbol s_30[] = { 'u' }; static const symbol s_31[] = { 0xC3, 0xB6 }; static const symbol s_32[] = { 0xC3, 0xBC }; static const symbol s_33[] = { 0xC3, 0xBC }; static const symbol s_34[] = { 'a', 'd' }; static const symbol s_35[] = { 's', 'o', 'y', 'a', 'd' }; static int r_check_vowel_harmony(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 112 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) return 0; /* goto */ /* grouping vowel, line 114 */ { int m1 = z->l - z->c; (void)m1; /* or, line 116 */ if (!(eq_s_b(z, 1, s_0))) goto lab1; if (out_grouping_b_U(z, g_vowel1, 97, 305, 1) < 0) goto lab1; /* goto */ /* grouping vowel1, line 116 */ goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_1))) goto lab2; if (out_grouping_b_U(z, g_vowel2, 101, 252, 1) < 0) goto lab2; /* goto */ /* grouping vowel2, line 117 */ goto lab0; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_2))) goto lab3; if (out_grouping_b_U(z, g_vowel3, 97, 305, 1) < 0) goto lab3; /* goto */ /* grouping vowel3, line 118 */ goto lab0; lab3: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_3))) goto lab4; if (out_grouping_b_U(z, g_vowel4, 101, 105, 1) < 0) goto lab4; /* goto */ /* grouping vowel4, line 119 */ goto lab0; lab4: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_4))) goto lab5; if (out_grouping_b_U(z, g_vowel5, 111, 117, 1) < 0) goto lab5; /* goto */ /* grouping vowel5, line 120 */ goto lab0; lab5: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_5))) goto lab6; if (out_grouping_b_U(z, g_vowel6, 246, 252, 1) < 0) goto lab6; /* goto */ /* grouping vowel6, line 121 */ goto lab0; lab6: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_6))) goto lab7; if (out_grouping_b_U(z, g_vowel5, 111, 117, 1) < 0) goto lab7; /* goto */ /* grouping vowel5, line 122 */ goto lab0; lab7: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_7))) return 0; if (out_grouping_b_U(z, g_vowel6, 246, 252, 1) < 0) return 0; /* goto */ /* grouping vowel6, line 123 */ } lab0: z->c = z->l - m_test; } return 1; } static int r_mark_suffix_with_optional_n_consonant(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 134 */ { int m_test = z->l - z->c; /* test, line 133 */ if (!(eq_s_b(z, 1, s_8))) goto lab1; z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab1; z->c = ret; /* next, line 133 */ } { int m_test = z->l - z->c; /* test, line 133 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 135 */ { int m_test = z->l - z->c; /* test, line 135 */ if (!(eq_s_b(z, 1, s_9))) goto lab2; z->c = z->l - m_test; } return 0; lab2: z->c = z->l - m2; } { int m_test = z->l - z->c; /* test, line 135 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 135 */ } { int m_test = z->l - z->c; /* test, line 135 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; z->c = z->l - m_test; } z->c = z->l - m_test; } } lab0: return 1; } static int r_mark_suffix_with_optional_s_consonant(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ { int m_test = z->l - z->c; /* test, line 144 */ if (!(eq_s_b(z, 1, s_10))) goto lab1; z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab1; z->c = ret; /* next, line 144 */ } { int m_test = z->l - z->c; /* test, line 144 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 146 */ { int m_test = z->l - z->c; /* test, line 146 */ if (!(eq_s_b(z, 1, s_11))) goto lab2; z->c = z->l - m_test; } return 0; lab2: z->c = z->l - m2; } { int m_test = z->l - z->c; /* test, line 146 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 146 */ } { int m_test = z->l - z->c; /* test, line 146 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; z->c = z->l - m_test; } z->c = z->l - m_test; } } lab0: return 1; } static int r_mark_suffix_with_optional_y_consonant(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 155 */ { int m_test = z->l - z->c; /* test, line 154 */ if (!(eq_s_b(z, 1, s_12))) goto lab1; z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab1; z->c = ret; /* next, line 154 */ } { int m_test = z->l - z->c; /* test, line 154 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 156 */ { int m_test = z->l - z->c; /* test, line 156 */ if (!(eq_s_b(z, 1, s_13))) goto lab2; z->c = z->l - m_test; } return 0; lab2: z->c = z->l - m2; } { int m_test = z->l - z->c; /* test, line 156 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 156 */ } { int m_test = z->l - z->c; /* test, line 156 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; z->c = z->l - m_test; } z->c = z->l - m_test; } } lab0: return 1; } static int r_mark_suffix_with_optional_U_vowel(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 161 */ { int m_test = z->l - z->c; /* test, line 160 */ if (in_grouping_b_U(z, g_U, 105, 305, 0)) goto lab1; z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab1; z->c = ret; /* next, line 160 */ } { int m_test = z->l - z->c; /* test, line 160 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 162 */ { int m_test = z->l - z->c; /* test, line 162 */ if (in_grouping_b_U(z, g_U, 105, 305, 0)) goto lab2; z->c = z->l - m_test; } return 0; lab2: z->c = z->l - m2; } { int m_test = z->l - z->c; /* test, line 162 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 162 */ } { int m_test = z->l - z->c; /* test, line 162 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; z->c = z->l - m_test; } z->c = z->l - m_test; } } lab0: return 1; } static int r_mark_possessives(struct SN_env * z) { if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((67133440 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_0, 10))) return 0; /* among, line 167 */ { int ret = r_mark_suffix_with_optional_U_vowel(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_U_vowel, line 169 */ if (ret < 0) return ret; } return 1; } static int r_mark_sU(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 173 */ if (ret < 0) return ret; } if (in_grouping_b_U(z, g_U, 105, 305, 0)) return 0; { int ret = r_mark_suffix_with_optional_s_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_s_consonant, line 175 */ if (ret < 0) return ret; } return 1; } static int r_mark_lArI(struct SN_env * z) { if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 177)) return 0; if (!(find_among_b(z, a_1, 2))) return 0; /* among, line 179 */ return 1; } static int r_mark_yU(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 183 */ if (ret < 0) return ret; } if (in_grouping_b_U(z, g_U, 105, 305, 0)) return 0; { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 185 */ if (ret < 0) return ret; } return 1; } static int r_mark_nU(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 189 */ if (ret < 0) return ret; } if (!(find_among_b(z, a_2, 4))) return 0; /* among, line 190 */ return 1; } static int r_mark_nUn(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 194 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || z->p[z->c - 1] != 110) return 0; if (!(find_among_b(z, a_3, 4))) return 0; /* among, line 195 */ { int ret = r_mark_suffix_with_optional_n_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_n_consonant, line 196 */ if (ret < 0) return ret; } return 1; } static int r_mark_yA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 200 */ if (ret < 0) return ret; } if (z->c <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_4, 2))) return 0; /* among, line 201 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 202 */ if (ret < 0) return ret; } return 1; } static int r_mark_nA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 206 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_5, 2))) return 0; /* among, line 207 */ return 1; } static int r_mark_DA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 211 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_6, 4))) return 0; /* among, line 212 */ return 1; } static int r_mark_ndA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 216 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_7, 2))) return 0; /* among, line 217 */ return 1; } static int r_mark_DAn(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 221 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 110) return 0; if (!(find_among_b(z, a_8, 4))) return 0; /* among, line 222 */ return 1; } static int r_mark_ndAn(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 226 */ if (ret < 0) return ret; } if (z->c - 3 <= z->lb || z->p[z->c - 1] != 110) return 0; if (!(find_among_b(z, a_9, 2))) return 0; /* among, line 227 */ return 1; } static int r_mark_ylA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 231 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_10, 2))) return 0; /* among, line 232 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 233 */ if (ret < 0) return ret; } return 1; } static int r_mark_ki(struct SN_env * z) { if (!(eq_s_b(z, 2, s_14))) return 0; return 1; } static int r_mark_ncA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 241 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_11, 2))) return 0; /* among, line 242 */ { int ret = r_mark_suffix_with_optional_n_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_n_consonant, line 243 */ if (ret < 0) return ret; } return 1; } static int r_mark_yUm(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 247 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || z->p[z->c - 1] != 109) return 0; if (!(find_among_b(z, a_12, 4))) return 0; /* among, line 248 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 249 */ if (ret < 0) return ret; } return 1; } static int r_mark_sUn(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 253 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 110) return 0; if (!(find_among_b(z, a_13, 4))) return 0; /* among, line 254 */ return 1; } static int r_mark_yUz(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 258 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || z->p[z->c - 1] != 122) return 0; if (!(find_among_b(z, a_14, 4))) return 0; /* among, line 259 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 260 */ if (ret < 0) return ret; } return 1; } static int r_mark_sUnUz(struct SN_env * z) { if (z->c - 4 <= z->lb || z->p[z->c - 1] != 122) return 0; if (!(find_among_b(z, a_15, 4))) return 0; /* among, line 264 */ return 1; } static int r_mark_lAr(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 268 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 114) return 0; if (!(find_among_b(z, a_16, 2))) return 0; /* among, line 269 */ return 1; } static int r_mark_nUz(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 273 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 122) return 0; if (!(find_among_b(z, a_17, 4))) return 0; /* among, line 274 */ return 1; } static int r_mark_DUr(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 278 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 114) return 0; if (!(find_among_b(z, a_18, 8))) return 0; /* among, line 279 */ return 1; } static int r_mark_cAsInA(struct SN_env * z) { if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_19, 2))) return 0; /* among, line 283 */ return 1; } static int r_mark_yDU(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 287 */ if (ret < 0) return ret; } if (!(find_among_b(z, a_20, 32))) return 0; /* among, line 288 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 292 */ if (ret < 0) return ret; } return 1; } static int r_mark_ysA(struct SN_env * z) { if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((26658 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_21, 8))) return 0; /* among, line 297 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 298 */ if (ret < 0) return ret; } return 1; } static int r_mark_ymUs_(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 302 */ if (ret < 0) return ret; } if (z->c - 3 <= z->lb || z->p[z->c - 1] != 159) return 0; if (!(find_among_b(z, a_22, 4))) return 0; /* among, line 303 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 304 */ if (ret < 0) return ret; } return 1; } static int r_mark_yken(struct SN_env * z) { if (!(eq_s_b(z, 3, s_15))) return 0; { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 308 */ if (ret < 0) return ret; } return 1; } static int r_stem_nominal_verb_suffixes(struct SN_env * z) { z->ket = z->c; /* [, line 312 */ z->B[0] = 1; /* set continue_stemming_noun_suffixes, line 313 */ { int m1 = z->l - z->c; (void)m1; /* or, line 315 */ { int m2 = z->l - z->c; (void)m2; /* or, line 314 */ { int ret = r_mark_ymUs_(z); if (ret == 0) goto lab3; /* call mark_ymUs_, line 314 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = z->l - m2; { int ret = r_mark_yDU(z); if (ret == 0) goto lab4; /* call mark_yDU, line 314 */ if (ret < 0) return ret; } goto lab2; lab4: z->c = z->l - m2; { int ret = r_mark_ysA(z); if (ret == 0) goto lab5; /* call mark_ysA, line 314 */ if (ret < 0) return ret; } goto lab2; lab5: z->c = z->l - m2; { int ret = r_mark_yken(z); if (ret == 0) goto lab1; /* call mark_yken, line 314 */ if (ret < 0) return ret; } } lab2: goto lab0; lab1: z->c = z->l - m1; { int ret = r_mark_cAsInA(z); if (ret == 0) goto lab6; /* call mark_cAsInA, line 316 */ if (ret < 0) return ret; } { int m3 = z->l - z->c; (void)m3; /* or, line 316 */ { int ret = r_mark_sUnUz(z); if (ret == 0) goto lab8; /* call mark_sUnUz, line 316 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m3; { int ret = r_mark_lAr(z); if (ret == 0) goto lab9; /* call mark_lAr, line 316 */ if (ret < 0) return ret; } goto lab7; lab9: z->c = z->l - m3; { int ret = r_mark_yUm(z); if (ret == 0) goto lab10; /* call mark_yUm, line 316 */ if (ret < 0) return ret; } goto lab7; lab10: z->c = z->l - m3; { int ret = r_mark_sUn(z); if (ret == 0) goto lab11; /* call mark_sUn, line 316 */ if (ret < 0) return ret; } goto lab7; lab11: z->c = z->l - m3; { int ret = r_mark_yUz(z); if (ret == 0) goto lab12; /* call mark_yUz, line 316 */ if (ret < 0) return ret; } goto lab7; lab12: z->c = z->l - m3; } lab7: { int ret = r_mark_ymUs_(z); if (ret == 0) goto lab6; /* call mark_ymUs_, line 316 */ if (ret < 0) return ret; } goto lab0; lab6: z->c = z->l - m1; { int ret = r_mark_lAr(z); if (ret == 0) goto lab13; /* call mark_lAr, line 319 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 319 */ { int ret = slice_del(z); /* delete, line 319 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 319 */ z->ket = z->c; /* [, line 319 */ { int m4 = z->l - z->c; (void)m4; /* or, line 319 */ { int ret = r_mark_DUr(z); if (ret == 0) goto lab16; /* call mark_DUr, line 319 */ if (ret < 0) return ret; } goto lab15; lab16: z->c = z->l - m4; { int ret = r_mark_yDU(z); if (ret == 0) goto lab17; /* call mark_yDU, line 319 */ if (ret < 0) return ret; } goto lab15; lab17: z->c = z->l - m4; { int ret = r_mark_ysA(z); if (ret == 0) goto lab18; /* call mark_ysA, line 319 */ if (ret < 0) return ret; } goto lab15; lab18: z->c = z->l - m4; { int ret = r_mark_ymUs_(z); if (ret == 0) { z->c = z->l - m_keep; goto lab14; } /* call mark_ymUs_, line 319 */ if (ret < 0) return ret; } } lab15: lab14: ; } z->B[0] = 0; /* unset continue_stemming_noun_suffixes, line 320 */ goto lab0; lab13: z->c = z->l - m1; { int ret = r_mark_nUz(z); if (ret == 0) goto lab19; /* call mark_nUz, line 323 */ if (ret < 0) return ret; } { int m5 = z->l - z->c; (void)m5; /* or, line 323 */ { int ret = r_mark_yDU(z); if (ret == 0) goto lab21; /* call mark_yDU, line 323 */ if (ret < 0) return ret; } goto lab20; lab21: z->c = z->l - m5; { int ret = r_mark_ysA(z); if (ret == 0) goto lab19; /* call mark_ysA, line 323 */ if (ret < 0) return ret; } } lab20: goto lab0; lab19: z->c = z->l - m1; { int m6 = z->l - z->c; (void)m6; /* or, line 325 */ { int ret = r_mark_sUnUz(z); if (ret == 0) goto lab24; /* call mark_sUnUz, line 325 */ if (ret < 0) return ret; } goto lab23; lab24: z->c = z->l - m6; { int ret = r_mark_yUz(z); if (ret == 0) goto lab25; /* call mark_yUz, line 325 */ if (ret < 0) return ret; } goto lab23; lab25: z->c = z->l - m6; { int ret = r_mark_sUn(z); if (ret == 0) goto lab26; /* call mark_sUn, line 325 */ if (ret < 0) return ret; } goto lab23; lab26: z->c = z->l - m6; { int ret = r_mark_yUm(z); if (ret == 0) goto lab22; /* call mark_yUm, line 325 */ if (ret < 0) return ret; } } lab23: z->bra = z->c; /* ], line 325 */ { int ret = slice_del(z); /* delete, line 325 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 325 */ z->ket = z->c; /* [, line 325 */ { int ret = r_mark_ymUs_(z); if (ret == 0) { z->c = z->l - m_keep; goto lab27; } /* call mark_ymUs_, line 325 */ if (ret < 0) return ret; } lab27: ; } goto lab0; lab22: z->c = z->l - m1; { int ret = r_mark_DUr(z); if (ret == 0) return 0; /* call mark_DUr, line 327 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 327 */ { int ret = slice_del(z); /* delete, line 327 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 327 */ z->ket = z->c; /* [, line 327 */ { int m7 = z->l - z->c; (void)m7; /* or, line 327 */ { int ret = r_mark_sUnUz(z); if (ret == 0) goto lab30; /* call mark_sUnUz, line 327 */ if (ret < 0) return ret; } goto lab29; lab30: z->c = z->l - m7; { int ret = r_mark_lAr(z); if (ret == 0) goto lab31; /* call mark_lAr, line 327 */ if (ret < 0) return ret; } goto lab29; lab31: z->c = z->l - m7; { int ret = r_mark_yUm(z); if (ret == 0) goto lab32; /* call mark_yUm, line 327 */ if (ret < 0) return ret; } goto lab29; lab32: z->c = z->l - m7; { int ret = r_mark_sUn(z); if (ret == 0) goto lab33; /* call mark_sUn, line 327 */ if (ret < 0) return ret; } goto lab29; lab33: z->c = z->l - m7; { int ret = r_mark_yUz(z); if (ret == 0) goto lab34; /* call mark_yUz, line 327 */ if (ret < 0) return ret; } goto lab29; lab34: z->c = z->l - m7; } lab29: { int ret = r_mark_ymUs_(z); if (ret == 0) { z->c = z->l - m_keep; goto lab28; } /* call mark_ymUs_, line 327 */ if (ret < 0) return ret; } lab28: ; } } lab0: z->bra = z->c; /* ], line 328 */ { int ret = slice_del(z); /* delete, line 328 */ if (ret < 0) return ret; } return 1; } static int r_stem_suffix_chain_before_ki(struct SN_env * z) { z->ket = z->c; /* [, line 333 */ { int ret = r_mark_ki(z); if (ret == 0) return 0; /* call mark_ki, line 334 */ if (ret < 0) return ret; } { int m1 = z->l - z->c; (void)m1; /* or, line 342 */ { int ret = r_mark_DA(z); if (ret == 0) goto lab1; /* call mark_DA, line 336 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 336 */ { int ret = slice_del(z); /* delete, line 336 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 336 */ z->ket = z->c; /* [, line 336 */ { int m2 = z->l - z->c; (void)m2; /* or, line 338 */ { int ret = r_mark_lAr(z); if (ret == 0) goto lab4; /* call mark_lAr, line 337 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 337 */ { int ret = slice_del(z); /* delete, line 337 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 337 */ { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab5; } /* call stem_suffix_chain_before_ki, line 337 */ if (ret < 0) return ret; } lab5: ; } goto lab3; lab4: z->c = z->l - m2; { int ret = r_mark_possessives(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call mark_possessives, line 339 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 339 */ { int ret = slice_del(z); /* delete, line 339 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 339 */ z->ket = z->c; /* [, line 339 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call mark_lAr, line 339 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 339 */ { int ret = slice_del(z); /* delete, line 339 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call stem_suffix_chain_before_ki, line 339 */ if (ret < 0) return ret; } lab6: ; } } lab3: lab2: ; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_mark_nUn(z); if (ret == 0) goto lab7; /* call mark_nUn, line 343 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 343 */ { int ret = slice_del(z); /* delete, line 343 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 343 */ z->ket = z->c; /* [, line 343 */ { int m3 = z->l - z->c; (void)m3; /* or, line 345 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab10; /* call mark_lArI, line 344 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 344 */ { int ret = slice_del(z); /* delete, line 344 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m3; z->ket = z->c; /* [, line 346 */ { int m4 = z->l - z->c; (void)m4; /* or, line 346 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab13; /* call mark_possessives, line 346 */ if (ret < 0) return ret; } goto lab12; lab13: z->c = z->l - m4; { int ret = r_mark_sU(z); if (ret == 0) goto lab11; /* call mark_sU, line 346 */ if (ret < 0) return ret; } } lab12: z->bra = z->c; /* ], line 346 */ { int ret = slice_del(z); /* delete, line 346 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 346 */ z->ket = z->c; /* [, line 346 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab14; } /* call mark_lAr, line 346 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 346 */ { int ret = slice_del(z); /* delete, line 346 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab14; } /* call stem_suffix_chain_before_ki, line 346 */ if (ret < 0) return ret; } lab14: ; } goto lab9; lab11: z->c = z->l - m3; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab8; } /* call stem_suffix_chain_before_ki, line 348 */ if (ret < 0) return ret; } } lab9: lab8: ; } goto lab0; lab7: z->c = z->l - m1; { int ret = r_mark_ndA(z); if (ret == 0) return 0; /* call mark_ndA, line 351 */ if (ret < 0) return ret; } { int m5 = z->l - z->c; (void)m5; /* or, line 353 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab16; /* call mark_lArI, line 352 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 352 */ { int ret = slice_del(z); /* delete, line 352 */ if (ret < 0) return ret; } goto lab15; lab16: z->c = z->l - m5; { int ret = r_mark_sU(z); if (ret == 0) goto lab17; /* call mark_sU, line 354 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 354 */ { int ret = slice_del(z); /* delete, line 354 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 354 */ z->ket = z->c; /* [, line 354 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab18; } /* call mark_lAr, line 354 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 354 */ { int ret = slice_del(z); /* delete, line 354 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab18; } /* call stem_suffix_chain_before_ki, line 354 */ if (ret < 0) return ret; } lab18: ; } goto lab15; lab17: z->c = z->l - m5; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) return 0; /* call stem_suffix_chain_before_ki, line 356 */ if (ret < 0) return ret; } } lab15: ; } lab0: return 1; } static int r_stem_noun_suffixes(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 363 */ z->ket = z->c; /* [, line 362 */ { int ret = r_mark_lAr(z); if (ret == 0) goto lab1; /* call mark_lAr, line 362 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 362 */ { int ret = slice_del(z); /* delete, line 362 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 362 */ { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call stem_suffix_chain_before_ki, line 362 */ if (ret < 0) return ret; } lab2: ; } goto lab0; lab1: z->c = z->l - m1; z->ket = z->c; /* [, line 364 */ { int ret = r_mark_ncA(z); if (ret == 0) goto lab3; /* call mark_ncA, line 364 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 364 */ { int ret = slice_del(z); /* delete, line 364 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 365 */ { int m2 = z->l - z->c; (void)m2; /* or, line 367 */ z->ket = z->c; /* [, line 366 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab6; /* call mark_lArI, line 366 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 366 */ { int ret = slice_del(z); /* delete, line 366 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m2; z->ket = z->c; /* [, line 368 */ { int m3 = z->l - z->c; (void)m3; /* or, line 368 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab9; /* call mark_possessives, line 368 */ if (ret < 0) return ret; } goto lab8; lab9: z->c = z->l - m3; { int ret = r_mark_sU(z); if (ret == 0) goto lab7; /* call mark_sU, line 368 */ if (ret < 0) return ret; } } lab8: z->bra = z->c; /* ], line 368 */ { int ret = slice_del(z); /* delete, line 368 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 368 */ z->ket = z->c; /* [, line 368 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call mark_lAr, line 368 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 368 */ { int ret = slice_del(z); /* delete, line 368 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call stem_suffix_chain_before_ki, line 368 */ if (ret < 0) return ret; } lab10: ; } goto lab5; lab7: z->c = z->l - m2; z->ket = z->c; /* [, line 370 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call mark_lAr, line 370 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 370 */ { int ret = slice_del(z); /* delete, line 370 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call stem_suffix_chain_before_ki, line 370 */ if (ret < 0) return ret; } } lab5: lab4: ; } goto lab0; lab3: z->c = z->l - m1; z->ket = z->c; /* [, line 374 */ { int m4 = z->l - z->c; (void)m4; /* or, line 374 */ { int ret = r_mark_ndA(z); if (ret == 0) goto lab13; /* call mark_ndA, line 374 */ if (ret < 0) return ret; } goto lab12; lab13: z->c = z->l - m4; { int ret = r_mark_nA(z); if (ret == 0) goto lab11; /* call mark_nA, line 374 */ if (ret < 0) return ret; } } lab12: { int m5 = z->l - z->c; (void)m5; /* or, line 377 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab15; /* call mark_lArI, line 376 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 376 */ { int ret = slice_del(z); /* delete, line 376 */ if (ret < 0) return ret; } goto lab14; lab15: z->c = z->l - m5; { int ret = r_mark_sU(z); if (ret == 0) goto lab16; /* call mark_sU, line 378 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 378 */ { int ret = slice_del(z); /* delete, line 378 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 378 */ z->ket = z->c; /* [, line 378 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab17; } /* call mark_lAr, line 378 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 378 */ { int ret = slice_del(z); /* delete, line 378 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab17; } /* call stem_suffix_chain_before_ki, line 378 */ if (ret < 0) return ret; } lab17: ; } goto lab14; lab16: z->c = z->l - m5; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) goto lab11; /* call stem_suffix_chain_before_ki, line 380 */ if (ret < 0) return ret; } } lab14: goto lab0; lab11: z->c = z->l - m1; z->ket = z->c; /* [, line 384 */ { int m6 = z->l - z->c; (void)m6; /* or, line 384 */ { int ret = r_mark_ndAn(z); if (ret == 0) goto lab20; /* call mark_ndAn, line 384 */ if (ret < 0) return ret; } goto lab19; lab20: z->c = z->l - m6; { int ret = r_mark_nU(z); if (ret == 0) goto lab18; /* call mark_nU, line 384 */ if (ret < 0) return ret; } } lab19: { int m7 = z->l - z->c; (void)m7; /* or, line 384 */ { int ret = r_mark_sU(z); if (ret == 0) goto lab22; /* call mark_sU, line 384 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 384 */ { int ret = slice_del(z); /* delete, line 384 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 384 */ z->ket = z->c; /* [, line 384 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab23; } /* call mark_lAr, line 384 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 384 */ { int ret = slice_del(z); /* delete, line 384 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab23; } /* call stem_suffix_chain_before_ki, line 384 */ if (ret < 0) return ret; } lab23: ; } goto lab21; lab22: z->c = z->l - m7; { int ret = r_mark_lArI(z); if (ret == 0) goto lab18; /* call mark_lArI, line 384 */ if (ret < 0) return ret; } } lab21: goto lab0; lab18: z->c = z->l - m1; z->ket = z->c; /* [, line 386 */ { int ret = r_mark_DAn(z); if (ret == 0) goto lab24; /* call mark_DAn, line 386 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 386 */ { int ret = slice_del(z); /* delete, line 386 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 386 */ z->ket = z->c; /* [, line 386 */ { int m8 = z->l - z->c; (void)m8; /* or, line 389 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab27; /* call mark_possessives, line 388 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 388 */ { int ret = slice_del(z); /* delete, line 388 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 388 */ z->ket = z->c; /* [, line 388 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab28; } /* call mark_lAr, line 388 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 388 */ { int ret = slice_del(z); /* delete, line 388 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab28; } /* call stem_suffix_chain_before_ki, line 388 */ if (ret < 0) return ret; } lab28: ; } goto lab26; lab27: z->c = z->l - m8; { int ret = r_mark_lAr(z); if (ret == 0) goto lab29; /* call mark_lAr, line 390 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 390 */ { int ret = slice_del(z); /* delete, line 390 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 390 */ { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab30; } /* call stem_suffix_chain_before_ki, line 390 */ if (ret < 0) return ret; } lab30: ; } goto lab26; lab29: z->c = z->l - m8; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab25; } /* call stem_suffix_chain_before_ki, line 392 */ if (ret < 0) return ret; } } lab26: lab25: ; } goto lab0; lab24: z->c = z->l - m1; z->ket = z->c; /* [, line 396 */ { int m9 = z->l - z->c; (void)m9; /* or, line 396 */ { int ret = r_mark_nUn(z); if (ret == 0) goto lab33; /* call mark_nUn, line 396 */ if (ret < 0) return ret; } goto lab32; lab33: z->c = z->l - m9; { int ret = r_mark_ylA(z); if (ret == 0) goto lab31; /* call mark_ylA, line 396 */ if (ret < 0) return ret; } } lab32: z->bra = z->c; /* ], line 396 */ { int ret = slice_del(z); /* delete, line 396 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 397 */ { int m10 = z->l - z->c; (void)m10; /* or, line 399 */ z->ket = z->c; /* [, line 398 */ { int ret = r_mark_lAr(z); if (ret == 0) goto lab36; /* call mark_lAr, line 398 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 398 */ { int ret = slice_del(z); /* delete, line 398 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) goto lab36; /* call stem_suffix_chain_before_ki, line 398 */ if (ret < 0) return ret; } goto lab35; lab36: z->c = z->l - m10; z->ket = z->c; /* [, line 400 */ { int m11 = z->l - z->c; (void)m11; /* or, line 400 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab39; /* call mark_possessives, line 400 */ if (ret < 0) return ret; } goto lab38; lab39: z->c = z->l - m11; { int ret = r_mark_sU(z); if (ret == 0) goto lab37; /* call mark_sU, line 400 */ if (ret < 0) return ret; } } lab38: z->bra = z->c; /* ], line 400 */ { int ret = slice_del(z); /* delete, line 400 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 400 */ z->ket = z->c; /* [, line 400 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab40; } /* call mark_lAr, line 400 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 400 */ { int ret = slice_del(z); /* delete, line 400 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab40; } /* call stem_suffix_chain_before_ki, line 400 */ if (ret < 0) return ret; } lab40: ; } goto lab35; lab37: z->c = z->l - m10; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab34; } /* call stem_suffix_chain_before_ki, line 402 */ if (ret < 0) return ret; } } lab35: lab34: ; } goto lab0; lab31: z->c = z->l - m1; z->ket = z->c; /* [, line 406 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab41; /* call mark_lArI, line 406 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 406 */ { int ret = slice_del(z); /* delete, line 406 */ if (ret < 0) return ret; } goto lab0; lab41: z->c = z->l - m1; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) goto lab42; /* call stem_suffix_chain_before_ki, line 408 */ if (ret < 0) return ret; } goto lab0; lab42: z->c = z->l - m1; z->ket = z->c; /* [, line 410 */ { int m12 = z->l - z->c; (void)m12; /* or, line 410 */ { int ret = r_mark_DA(z); if (ret == 0) goto lab45; /* call mark_DA, line 410 */ if (ret < 0) return ret; } goto lab44; lab45: z->c = z->l - m12; { int ret = r_mark_yU(z); if (ret == 0) goto lab46; /* call mark_yU, line 410 */ if (ret < 0) return ret; } goto lab44; lab46: z->c = z->l - m12; { int ret = r_mark_yA(z); if (ret == 0) goto lab43; /* call mark_yA, line 410 */ if (ret < 0) return ret; } } lab44: z->bra = z->c; /* ], line 410 */ { int ret = slice_del(z); /* delete, line 410 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 410 */ z->ket = z->c; /* [, line 410 */ { int m13 = z->l - z->c; (void)m13; /* or, line 410 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab49; /* call mark_possessives, line 410 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 410 */ { int ret = slice_del(z); /* delete, line 410 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 410 */ z->ket = z->c; /* [, line 410 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab50; } /* call mark_lAr, line 410 */ if (ret < 0) return ret; } lab50: ; } goto lab48; lab49: z->c = z->l - m13; { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab47; } /* call mark_lAr, line 410 */ if (ret < 0) return ret; } } lab48: z->bra = z->c; /* ], line 410 */ { int ret = slice_del(z); /* delete, line 410 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 410 */ { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab47; } /* call stem_suffix_chain_before_ki, line 410 */ if (ret < 0) return ret; } lab47: ; } goto lab0; lab43: z->c = z->l - m1; z->ket = z->c; /* [, line 412 */ { int m14 = z->l - z->c; (void)m14; /* or, line 412 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab52; /* call mark_possessives, line 412 */ if (ret < 0) return ret; } goto lab51; lab52: z->c = z->l - m14; { int ret = r_mark_sU(z); if (ret == 0) return 0; /* call mark_sU, line 412 */ if (ret < 0) return ret; } } lab51: z->bra = z->c; /* ], line 412 */ { int ret = slice_del(z); /* delete, line 412 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 412 */ z->ket = z->c; /* [, line 412 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab53; } /* call mark_lAr, line 412 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 412 */ { int ret = slice_del(z); /* delete, line 412 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab53; } /* call stem_suffix_chain_before_ki, line 412 */ if (ret < 0) return ret; } lab53: ; } } lab0: return 1; } static int r_post_process_last_consonants(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 416 */ among_var = find_among_b(z, a_23, 4); /* substring, line 416 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 416 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_16); /* <-, line 417 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_17); /* <-, line 418 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_18); /* <-, line 419 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_19); /* <-, line 420 */ if (ret < 0) return ret; } break; } return 1; } static int r_append_U_to_stems_ending_with_d_or_g(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 431 */ { int m1 = z->l - z->c; (void)m1; /* or, line 431 */ if (!(eq_s_b(z, 1, s_20))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_21))) return 0; } lab0: z->c = z->l - m_test; } { int m2 = z->l - z->c; (void)m2; /* or, line 433 */ { int m_test = z->l - z->c; /* test, line 432 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab3; /* goto */ /* grouping vowel, line 432 */ { int m3 = z->l - z->c; (void)m3; /* or, line 432 */ if (!(eq_s_b(z, 1, s_22))) goto lab5; goto lab4; lab5: z->c = z->l - m3; if (!(eq_s_b(z, 2, s_23))) goto lab3; } lab4: z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 2, s_24); /* <+, line 432 */ z->c = c_keep; if (ret < 0) return ret; } goto lab2; lab3: z->c = z->l - m2; { int m_test = z->l - z->c; /* test, line 434 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab6; /* goto */ /* grouping vowel, line 434 */ { int m4 = z->l - z->c; (void)m4; /* or, line 434 */ if (!(eq_s_b(z, 1, s_25))) goto lab8; goto lab7; lab8: z->c = z->l - m4; if (!(eq_s_b(z, 1, s_26))) goto lab6; } lab7: z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_27); /* <+, line 434 */ z->c = c_keep; if (ret < 0) return ret; } goto lab2; lab6: z->c = z->l - m2; { int m_test = z->l - z->c; /* test, line 436 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab9; /* goto */ /* grouping vowel, line 436 */ { int m5 = z->l - z->c; (void)m5; /* or, line 436 */ if (!(eq_s_b(z, 1, s_28))) goto lab11; goto lab10; lab11: z->c = z->l - m5; if (!(eq_s_b(z, 1, s_29))) goto lab9; } lab10: z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_30); /* <+, line 436 */ z->c = c_keep; if (ret < 0) return ret; } goto lab2; lab9: z->c = z->l - m2; { int m_test = z->l - z->c; /* test, line 438 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) return 0; /* goto */ /* grouping vowel, line 438 */ { int m6 = z->l - z->c; (void)m6; /* or, line 438 */ if (!(eq_s_b(z, 2, s_31))) goto lab13; goto lab12; lab13: z->c = z->l - m6; if (!(eq_s_b(z, 2, s_32))) return 0; } lab12: z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 2, s_33); /* <+, line 438 */ z->c = c_keep; if (ret < 0) return ret; } } lab2: return 1; } static int r_more_than_one_syllable_word(struct SN_env * z) { { int c_test = z->c; /* test, line 446 */ { int i = 2; while(1) { /* atleast, line 446 */ int c1 = z->c; { /* gopast */ /* grouping vowel, line 446 */ int ret = out_grouping_U(z, g_vowel, 97, 305, 1); if (ret < 0) goto lab0; z->c += ret; } i--; continue; lab0: z->c = c1; break; } if (i > 0) return 0; } z->c = c_test; } return 1; } static int r_is_reserved_word(struct SN_env * z) { { int c1 = z->c; /* or, line 451 */ { int c_test = z->c; /* test, line 450 */ while(1) { /* gopast, line 450 */ if (!(eq_s(z, 2, s_34))) goto lab2; break; lab2: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab1; z->c = ret; /* gopast, line 450 */ } } z->I[0] = 2; if (!(z->I[0] == z->l)) goto lab1; z->c = c_test; } goto lab0; lab1: z->c = c1; { int c_test = z->c; /* test, line 452 */ while(1) { /* gopast, line 452 */ if (!(eq_s(z, 5, s_35))) goto lab3; break; lab3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) return 0; z->c = ret; /* gopast, line 452 */ } } z->I[0] = 5; if (!(z->I[0] == z->l)) return 0; z->c = c_test; } } lab0: return 1; } static int r_postlude(struct SN_env * z) { { int c1 = z->c; /* not, line 456 */ { int ret = r_is_reserved_word(z); if (ret == 0) goto lab0; /* call is_reserved_word, line 456 */ if (ret < 0) return ret; } return 0; lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 457 */ { int m2 = z->l - z->c; (void)m2; /* do, line 458 */ { int ret = r_append_U_to_stems_ending_with_d_or_g(z); if (ret == 0) goto lab1; /* call append_U_to_stems_ending_with_d_or_g, line 458 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 459 */ { int ret = r_post_process_last_consonants(z); if (ret == 0) goto lab2; /* call post_process_last_consonants, line 459 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; return 1; } extern int turkish_UTF_8_stem(struct SN_env * z) { { int ret = r_more_than_one_syllable_word(z); if (ret == 0) return 0; /* call more_than_one_syllable_word, line 465 */ if (ret < 0) return ret; } z->lb = z->c; z->c = z->l; /* backwards, line 467 */ { int m1 = z->l - z->c; (void)m1; /* do, line 468 */ { int ret = r_stem_nominal_verb_suffixes(z); if (ret == 0) goto lab0; /* call stem_nominal_verb_suffixes, line 468 */ if (ret < 0) return ret; } lab0: z->c = z->l - m1; } if (!(z->B[0])) return 0; /* Boolean test continue_stemming_noun_suffixes, line 469 */ { int m2 = z->l - z->c; (void)m2; /* do, line 470 */ { int ret = r_stem_noun_suffixes(z); if (ret == 0) goto lab1; /* call stem_noun_suffixes, line 470 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } z->c = z->lb; { int ret = r_postlude(z); if (ret == 0) return 0; /* call postlude, line 473 */ if (ret < 0) return ret; } return 1; } extern struct SN_env * turkish_UTF_8_create_env(void) { return SN_create_env(0, 1, 1); } extern void turkish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.4/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_turkish.h000066400000000000000000000004711217574114600307620ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env * turkish_UTF_8_create_env(void); extern void turkish_UTF_8_close_env(struct SN_env * z); extern int turkish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.4/src/core/000077500000000000000000000000001217574114600170325ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/CMakeLists.txt000066400000000000000000000044311217574114600215740ustar00rootroot00000000000000PROJECT (lucene++-lib) #################################### # THE lucene++ library #################################### file(GLOB_RECURSE lucene_sources ${lucene++-lib_SOURCE_DIR}/search/*.cpp ${lucene++-lib_SOURCE_DIR}/analysis/*.cpp ${lucene++-lib_SOURCE_DIR}/document/*.cpp ${lucene++-lib_SOURCE_DIR}/index/*.cpp ${lucene++-lib_SOURCE_DIR}/queryparser/*.cpp ${lucene++-lib_SOURCE_DIR}/store/*.cpp ${lucene++-lib_SOURCE_DIR}/util/*.cpp) file(GLOB_RECURSE INTERN_HEADERS ${lucene++-libs_SOURCE_DIR}/include/*.h) file(GLOB_RECURSE HEADERS ${lucene++-base_SOURCE_DIR}/include/*.h ${lucene++-base_BINARY_DIR}/include/*.h) #C sources... file(GLOB_RECURSE lucene_c_sources ${lucene++-lib_SOURCE_DIR}/util/*.c) IF ( ENABLE_NEDMALLOC ) ADD_DEFINITIONS(-DLPP_USE_NEDMALLOC) ENDIF() LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) INCLUDE_DIRECTORIES(${lucene++-base_SOURCE_DIR}/include) INCLUDE_DIRECTORIES(${lucene++-lib_SOURCE_DIR}/include) ADD_DEFINITIONS(-DLPP_BUILDING_LIB) ADD_LIBRARY(lucene++-c STATIC ${lucene_c_sources} ) install(FILES ${HEADERS} DESTINATION include/lucene++ COMPONENT development) ################################# # lucene++ shared library ################################# ADD_LIBRARY(lucene++ SHARED ${lucene_sources} ${HEADERS} ${INTERN_HEADERS} ) SET(PCH_ADDITIONAL_COMPILER_FLAGS_lucene++ -DLPP_HAVE_DLL) ADD_PRECOMPILED_HEADER(lucene++ ${lucene++-lib_SOURCE_DIR}/include/LuceneInc.h) #set properties on the libraries SET_TARGET_PROPERTIES(lucene++ PROPERTIES VERSION ${LUCENE++_VERSION} SOVERSION ${LUCENE++_SOVERSION} COMPILE_FLAGS -DLPP_HAVE_DLL ) TARGET_LINK_LIBRARIES(lucene++ lucene++-c ${CMAKE_THREAD_LIBS_INIT} ${LUCENE_BOOST_LIBS} ) install(TARGETS lucene++ DESTINATION ${LIB_DESTINATION} COMPONENT runtime) ################################# # lucene++ static library ################################# ADD_LIBRARY(lucene++-static STATIC EXCLUDE_FROM_ALL ${lucene_sources} ${HEADERS} ${INTERN_HEADERS} ) ADD_PRECOMPILED_HEADER(lucene++-static ${lucene++-lib_SOURCE_DIR}/include/LuceneInc.h) #set properties on the libraries SET_TARGET_PROPERTIES(lucene++-static PROPERTIES VERSION ${LUCENE++_VERSION} SOVERSION ${LUCENE++_SOVERSION} ) LucenePlusPlus-rel_3.0.4/src/core/analysis/000077500000000000000000000000001217574114600206555ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/analysis/ASCIIFoldingFilter.cpp000066400000000000000000003556131217574114600246770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ASCIIFoldingFilter.h" #include "TermAttribute.h" #include "MiscUtils.h" namespace Lucene { ASCIIFoldingFilter::ASCIIFoldingFilter(TokenStreamPtr input) : TokenFilter(input) { output = CharArray::newInstance(512); outputPos = 0; termAtt = addAttribute(); } ASCIIFoldingFilter::~ASCIIFoldingFilter() { } bool ASCIIFoldingFilter::incrementToken() { if (input->incrementToken()) { wchar_t* buffer = termAtt->termBufferArray(); int32_t length = termAtt->termLength(); // If no characters actually require rewriting then we just return token as-is for (int32_t i = 0; i < length; ++i) { wchar_t c = buffer[i]; if (c >= 0x0080) { foldToASCII(buffer, length); termAtt->setTermBuffer(output.get(), 0, outputPos); break; } } return true; } else return false; } void ASCIIFoldingFilter::foldToASCII(const wchar_t* input, int32_t length) { // Worst-case length required int32_t maxSizeNeeded = 4 * length; if (output.size() < maxSizeNeeded) output.resize(MiscUtils::getNextSize(maxSizeNeeded)); outputPos = 0; wchar_t* output = this->output.get(); for (int32_t pos = 0; pos < length; ++pos) { wchar_t c = input[pos]; // Quick test: if it's not in range then just keep current character if (c < 0x0080) output[outputPos++] = c; else { switch (c) { case 0x00C0: // [LATIN CAPITAL LETTER A WITH GRAVE] case 0x00C1: // [LATIN CAPITAL LETTER A WITH ACUTE] case 0x00C2: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] case 0x00C3: // [LATIN CAPITAL LETTER A WITH TILDE] case 0x00C4: // [LATIN CAPITAL LETTER A WITH DIAERESIS] case 0x00C5: // [LATIN CAPITAL LETTER A WITH RING ABOVE] case 0x0100: // [LATIN CAPITAL LETTER A WITH MACRON] case 0x0102: // [LATIN CAPITAL LETTER A WITH BREVE] case 0x0104: // [LATIN CAPITAL LETTER A WITH OGONEK] case 0x018F: // [LATIN CAPITAL LETTER SCHWA] case 0x01CD: // [LATIN CAPITAL LETTER A WITH CARON] case 0x01DE: // [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] case 0x01E0: // [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] case 0x01FA: // [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] case 0x0200: // [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] case 0x0202: // [LATIN CAPITAL LETTER A WITH INVERTED BREVE] case 0x0226: // [LATIN CAPITAL LETTER A WITH DOT ABOVE] case 0x023A: // [LATIN CAPITAL LETTER A WITH STROKE] case 0x1D00: // [LATIN LETTER SMALL CAPITAL A] case 0x1E00: // [LATIN CAPITAL LETTER A WITH RING BELOW] case 0x1EA0: // [LATIN CAPITAL LETTER A WITH DOT BELOW] case 0x1EA2: // [LATIN CAPITAL LETTER A WITH HOOK ABOVE] case 0x1EA4: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] case 0x1EA6: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] case 0x1EA8: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1EAA: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] case 0x1EAC: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] case 0x1EAE: // [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] case 0x1EB0: // [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] case 0x1EB2: // [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] case 0x1EB4: // [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] case 0x1EB6: // [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] case 0x24B6: // [CIRCLED LATIN CAPITAL LETTER A] case 0xFF21: // [FULLWIDTH LATIN CAPITAL LETTER A] output[outputPos++] = L'A'; break; case 0x00E0: // [LATIN SMALL LETTER A WITH GRAVE] case 0x00E1: // [LATIN SMALL LETTER A WITH ACUTE] case 0x00E2: // [LATIN SMALL LETTER A WITH CIRCUMFLEX] case 0x00E3: // [LATIN SMALL LETTER A WITH TILDE] case 0x00E4: // [LATIN SMALL LETTER A WITH DIAERESIS] case 0x00E5: // [LATIN SMALL LETTER A WITH RING ABOVE] case 0x0101: // [LATIN SMALL LETTER A WITH MACRON] case 0x0103: // [LATIN SMALL LETTER A WITH BREVE] case 0x0105: // [LATIN SMALL LETTER A WITH OGONEK] case 0x01CE: // [LATIN SMALL LETTER A WITH CARON] case 0x01DF: // [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] case 0x01E1: // [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] case 0x01FB: // [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] case 0x0201: // [LATIN SMALL LETTER A WITH DOUBLE GRAVE] case 0x0203: // [LATIN SMALL LETTER A WITH INVERTED BREVE] case 0x0227: // [LATIN SMALL LETTER A WITH DOT ABOVE] case 0x0250: // [LATIN SMALL LETTER TURNED A] case 0x0259: // [LATIN SMALL LETTER SCHWA] case 0x025A: // [LATIN SMALL LETTER SCHWA WITH HOOK] case 0x1D8F: // [LATIN SMALL LETTER A WITH RETROFLEX HOOK] case 0x1D95: // [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] case 0x1E01: // [LATIN SMALL LETTER A WITH RING BELOW] case 0x1E9A: // [LATIN SMALL LETTER A WITH RIGHT HALF RING] case 0x1EA1: // [LATIN SMALL LETTER A WITH DOT BELOW] case 0x1EA3: // [LATIN SMALL LETTER A WITH HOOK ABOVE] case 0x1EA5: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] case 0x1EA7: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] case 0x1EA9: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1EAB: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] case 0x1EAD: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] case 0x1EAF: // [LATIN SMALL LETTER A WITH BREVE AND ACUTE] case 0x1EB1: // [LATIN SMALL LETTER A WITH BREVE AND GRAVE] case 0x1EB3: // [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] case 0x1EB5: // [LATIN SMALL LETTER A WITH BREVE AND TILDE] case 0x1EB7: // [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] case 0x2090: // [LATIN SUBSCRIPT SMALL LETTER A] case 0x2094: // [LATIN SUBSCRIPT SMALL LETTER SCHWA] case 0x24D0: // [CIRCLED LATIN SMALL LETTER A] case 0x2C65: // [LATIN SMALL LETTER A WITH STROKE] case 0x2C6F: // [LATIN CAPITAL LETTER TURNED A] case 0xFF41: // [FULLWIDTH LATIN SMALL LETTER A] output[outputPos++] = L'a'; break; case 0xA732: // [LATIN CAPITAL LETTER AA] output[outputPos++] = L'A'; output[outputPos++] = L'A'; break; case 0x00C6: // [LATIN CAPITAL LETTER AE] case 0x01E2: // [LATIN CAPITAL LETTER AE WITH MACRON] case 0x01FC: // [LATIN CAPITAL LETTER AE WITH ACUTE] case 0x1D01: // [LATIN LETTER SMALL CAPITAL AE] output[outputPos++] = L'A'; output[outputPos++] = L'E'; break; case 0xA734: // [LATIN CAPITAL LETTER AO] output[outputPos++] = L'A'; output[outputPos++] = L'O'; break; case 0xA736: // [LATIN CAPITAL LETTER AU] output[outputPos++] = L'A'; output[outputPos++] = L'U'; break; case 0xA738: // [LATIN CAPITAL LETTER AV] case 0xA73A: // [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] output[outputPos++] = L'A'; output[outputPos++] = L'V'; break; case 0xA73C: // [LATIN CAPITAL LETTER AY] output[outputPos++] = L'A'; output[outputPos++] = L'Y'; break; case 0x249C: // [PARENTHESIZED LATIN SMALL LETTER A] output[outputPos++] = L'('; output[outputPos++] = L'a'; output[outputPos++] = L')'; break; case 0xA733: // [LATIN SMALL LETTER AA] output[outputPos++] = L'a'; output[outputPos++] = L'a'; break; case 0x00E6: // [LATIN SMALL LETTER AE] case 0x01E3: // [LATIN SMALL LETTER AE WITH MACRON] case 0x01FD: // [LATIN SMALL LETTER AE WITH ACUTE] case 0x1D02: // [LATIN SMALL LETTER TURNED AE] output[outputPos++] = L'a'; output[outputPos++] = L'e'; break; case 0xA735: // [LATIN SMALL LETTER AO] output[outputPos++] = L'a'; output[outputPos++] = L'o'; break; case 0xA737: // [LATIN SMALL LETTER AU] output[outputPos++] = L'a'; output[outputPos++] = L'u'; break; case 0xA739: // [LATIN SMALL LETTER AV] case 0xA73B: // [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] output[outputPos++] = L'a'; output[outputPos++] = L'v'; break; case 0xA73D: // [LATIN SMALL LETTER AY] output[outputPos++] = L'a'; output[outputPos++] = L'y'; break; case 0x0181: // [LATIN CAPITAL LETTER B WITH HOOK] case 0x0182: // [LATIN CAPITAL LETTER B WITH TOPBAR] case 0x0243: // [LATIN CAPITAL LETTER B WITH STROKE] case 0x0299: // [LATIN LETTER SMALL CAPITAL B] case 0x1D03: // [LATIN LETTER SMALL CAPITAL BARRED B] case 0x1E02: // [LATIN CAPITAL LETTER B WITH DOT ABOVE] case 0x1E04: // [LATIN CAPITAL LETTER B WITH DOT BELOW] case 0x1E06: // [LATIN CAPITAL LETTER B WITH LINE BELOW] case 0x24B7: // [CIRCLED LATIN CAPITAL LETTER B] case 0xFF22: // [FULLWIDTH LATIN CAPITAL LETTER B] output[outputPos++] = L'B'; break; case 0x0180: // [LATIN SMALL LETTER B WITH STROKE] case 0x0183: // [LATIN SMALL LETTER B WITH TOPBAR] case 0x0253: // [LATIN SMALL LETTER B WITH HOOK] case 0x1D6C: // [LATIN SMALL LETTER B WITH MIDDLE TILDE] case 0x1D80: // [LATIN SMALL LETTER B WITH PALATAL HOOK] case 0x1E03: // [LATIN SMALL LETTER B WITH DOT ABOVE] case 0x1E05: // [LATIN SMALL LETTER B WITH DOT BELOW] case 0x1E07: // [LATIN SMALL LETTER B WITH LINE BELOW] case 0x24D1: // [CIRCLED LATIN SMALL LETTER B] case 0xFF42: // [FULLWIDTH LATIN SMALL LETTER B] output[outputPos++] = L'b'; break; case 0x249D: // [PARENTHESIZED LATIN SMALL LETTER B] output[outputPos++] = L'('; output[outputPos++] = L'b'; output[outputPos++] = L')'; break; case 0x00C7: // [LATIN CAPITAL LETTER C WITH CEDILLA] case 0x0106: // [LATIN CAPITAL LETTER C WITH ACUTE] case 0x0108: // [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] case 0x010A: // [LATIN CAPITAL LETTER C WITH DOT ABOVE] case 0x010C: // [LATIN CAPITAL LETTER C WITH CARON] case 0x0187: // [LATIN CAPITAL LETTER C WITH HOOK] case 0x023B: // [LATIN CAPITAL LETTER C WITH STROKE] case 0x0297: // [LATIN LETTER STRETCHED C] case 0x1D04: // [LATIN LETTER SMALL CAPITAL C] case 0x1E08: // [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] case 0x24B8: // [CIRCLED LATIN CAPITAL LETTER C] case 0xFF23: // [FULLWIDTH LATIN CAPITAL LETTER C] output[outputPos++] = L'C'; break; case 0x00E7: // [LATIN SMALL LETTER C WITH CEDILLA] case 0x0107: // [LATIN SMALL LETTER C WITH ACUTE] case 0x0109: // [LATIN SMALL LETTER C WITH CIRCUMFLEX] case 0x010B: // [LATIN SMALL LETTER C WITH DOT ABOVE] case 0x010D: // [LATIN SMALL LETTER C WITH CARON] case 0x0188: // [LATIN SMALL LETTER C WITH HOOK] case 0x023C: // [LATIN SMALL LETTER C WITH STROKE] case 0x0255: // [LATIN SMALL LETTER C WITH CURL] case 0x1E09: // [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] case 0x2184: // [LATIN SMALL LETTER REVERSED C] case 0x24D2: // [CIRCLED LATIN SMALL LETTER C] case 0xA73E: // [LATIN CAPITAL LETTER REVERSED C WITH DOT] case 0xA73F: // [LATIN SMALL LETTER REVERSED C WITH DOT] case 0xFF43: // [FULLWIDTH LATIN SMALL LETTER C] output[outputPos++] = L'c'; break; case 0x249E: // [PARENTHESIZED LATIN SMALL LETTER C] output[outputPos++] = L'('; output[outputPos++] = L'c'; output[outputPos++] = L')'; break; case 0x00D0: // [LATIN CAPITAL LETTER ETH] case 0x010E: // [LATIN CAPITAL LETTER D WITH CARON] case 0x0110: // [LATIN CAPITAL LETTER D WITH STROKE] case 0x0189: // [LATIN CAPITAL LETTER AFRICAN D] case 0x018A: // [LATIN CAPITAL LETTER D WITH HOOK] case 0x018B: // [LATIN CAPITAL LETTER D WITH TOPBAR] case 0x1D05: // [LATIN LETTER SMALL CAPITAL D] case 0x1D06: // [LATIN LETTER SMALL CAPITAL ETH] case 0x1E0A: // [LATIN CAPITAL LETTER D WITH DOT ABOVE] case 0x1E0C: // [LATIN CAPITAL LETTER D WITH DOT BELOW] case 0x1E0E: // [LATIN CAPITAL LETTER D WITH LINE BELOW] case 0x1E10: // [LATIN CAPITAL LETTER D WITH CEDILLA] case 0x1E12: // [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] case 0x24B9: // [CIRCLED LATIN CAPITAL LETTER D] case 0xA779: // [LATIN CAPITAL LETTER INSULAR D] case 0xFF24: // [FULLWIDTH LATIN CAPITAL LETTER D] output[outputPos++] = L'D'; break; case 0x00F0: // [LATIN SMALL LETTER ETH] case 0x010F: // [LATIN SMALL LETTER D WITH CARON] case 0x0111: // [LATIN SMALL LETTER D WITH STROKE] case 0x018C: // [LATIN SMALL LETTER D WITH TOPBAR] case 0x0221: // [LATIN SMALL LETTER D WITH CURL] case 0x0256: // [LATIN SMALL LETTER D WITH TAIL] case 0x0257: // [LATIN SMALL LETTER D WITH HOOK] case 0x1D6D: // [LATIN SMALL LETTER D WITH MIDDLE TILDE] case 0x1D81: // [LATIN SMALL LETTER D WITH PALATAL HOOK] case 0x1D91: // [LATIN SMALL LETTER D WITH HOOK AND TAIL] case 0x1E0B: // [LATIN SMALL LETTER D WITH DOT ABOVE] case 0x1E0D: // [LATIN SMALL LETTER D WITH DOT BELOW] case 0x1E0F: // [LATIN SMALL LETTER D WITH LINE BELOW] case 0x1E11: // [LATIN SMALL LETTER D WITH CEDILLA] case 0x1E13: // [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] case 0x24D3: // [CIRCLED LATIN SMALL LETTER D] case 0xA77A: // [LATIN SMALL LETTER INSULAR D] case 0xFF44: // [FULLWIDTH LATIN SMALL LETTER D] output[outputPos++] = L'd'; break; case 0x01C4: // [LATIN CAPITAL LETTER DZ WITH CARON] case 0x01F1: // [LATIN CAPITAL LETTER DZ] output[outputPos++] = L'D'; output[outputPos++] = L'Z'; break; case 0x01C5: // [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] case 0x01F2: // [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] output[outputPos++] = L'D'; output[outputPos++] = L'z'; break; case 0x249F: // [PARENTHESIZED LATIN SMALL LETTER D] output[outputPos++] = L'('; output[outputPos++] = L'd'; output[outputPos++] = L')'; break; case 0x0238: // [LATIN SMALL LETTER DB DIGRAPH] output[outputPos++] = L'd'; output[outputPos++] = L'b'; break; case 0x01C6: // [LATIN SMALL LETTER DZ WITH CARON] case 0x01F3: // [LATIN SMALL LETTER DZ] case 0x02A3: // [LATIN SMALL LETTER DZ DIGRAPH] case 0x02A5: // [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] output[outputPos++] = L'd'; output[outputPos++] = L'z'; break; case 0x00C8: // [LATIN CAPITAL LETTER E WITH GRAVE] case 0x00C9: // [LATIN CAPITAL LETTER E WITH ACUTE] case 0x00CA: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] case 0x00CB: // [LATIN CAPITAL LETTER E WITH DIAERESIS] case 0x0112: // [LATIN CAPITAL LETTER E WITH MACRON] case 0x0114: // [LATIN CAPITAL LETTER E WITH BREVE] case 0x0116: // [LATIN CAPITAL LETTER E WITH DOT ABOVE] case 0x0118: // [LATIN CAPITAL LETTER E WITH OGONEK] case 0x011A: // [LATIN CAPITAL LETTER E WITH CARON] case 0x018E: // [LATIN CAPITAL LETTER REVERSED E] case 0x0190: // [LATIN CAPITAL LETTER OPEN E] case 0x0204: // [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] case 0x0206: // [LATIN CAPITAL LETTER E WITH INVERTED BREVE] case 0x0228: // [LATIN CAPITAL LETTER E WITH CEDILLA] case 0x0246: // [LATIN CAPITAL LETTER E WITH STROKE] case 0x1D07: // [LATIN LETTER SMALL CAPITAL E] case 0x1E14: // [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] case 0x1E16: // [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] case 0x1E18: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] case 0x1E1A: // [LATIN CAPITAL LETTER E WITH TILDE BELOW] case 0x1E1C: // [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] case 0x1EB8: // [LATIN CAPITAL LETTER E WITH DOT BELOW] case 0x1EBA: // [LATIN CAPITAL LETTER E WITH HOOK ABOVE] case 0x1EBC: // [LATIN CAPITAL LETTER E WITH TILDE] case 0x1EBE: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] case 0x1EC0: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] case 0x1EC2: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1EC4: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] case 0x1EC6: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] case 0x24BA: // [CIRCLED LATIN CAPITAL LETTER E] case 0x2C7B: // [LATIN LETTER SMALL CAPITAL TURNED E] case 0xFF25: // [FULLWIDTH LATIN CAPITAL LETTER E] output[outputPos++] = L'E'; break; case 0x00E8: // [LATIN SMALL LETTER E WITH GRAVE] case 0x00E9: // [LATIN SMALL LETTER E WITH ACUTE] case 0x00EA: // [LATIN SMALL LETTER E WITH CIRCUMFLEX] case 0x00EB: // [LATIN SMALL LETTER E WITH DIAERESIS] case 0x0113: // [LATIN SMALL LETTER E WITH MACRON] case 0x0115: // [LATIN SMALL LETTER E WITH BREVE] case 0x0117: // [LATIN SMALL LETTER E WITH DOT ABOVE] case 0x0119: // [LATIN SMALL LETTER E WITH OGONEK] case 0x011B: // [LATIN SMALL LETTER E WITH CARON] case 0x01DD: // [LATIN SMALL LETTER TURNED E] case 0x0205: // [LATIN SMALL LETTER E WITH DOUBLE GRAVE] case 0x0207: // [LATIN SMALL LETTER E WITH INVERTED BREVE] case 0x0229: // [LATIN SMALL LETTER E WITH CEDILLA] case 0x0247: // [LATIN SMALL LETTER E WITH STROKE] case 0x0258: // [LATIN SMALL LETTER REVERSED E] case 0x025B: // [LATIN SMALL LETTER OPEN E] case 0x025C: // [LATIN SMALL LETTER REVERSED OPEN E] case 0x025D: // [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] case 0x025E: // [LATIN SMALL LETTER CLOSED REVERSED OPEN E] case 0x029A: // [LATIN SMALL LETTER CLOSED OPEN E] case 0x1D08: // [LATIN SMALL LETTER TURNED OPEN E] case 0x1D92: // [LATIN SMALL LETTER E WITH RETROFLEX HOOK] case 0x1D93: // [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] case 0x1D94: // [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] case 0x1E15: // [LATIN SMALL LETTER E WITH MACRON AND GRAVE] case 0x1E17: // [LATIN SMALL LETTER E WITH MACRON AND ACUTE] case 0x1E19: // [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] case 0x1E1B: // [LATIN SMALL LETTER E WITH TILDE BELOW] case 0x1E1D: // [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] case 0x1EB9: // [LATIN SMALL LETTER E WITH DOT BELOW] case 0x1EBB: // [LATIN SMALL LETTER E WITH HOOK ABOVE] case 0x1EBD: // [LATIN SMALL LETTER E WITH TILDE] case 0x1EBF: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] case 0x1EC1: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] case 0x1EC3: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1EC5: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] case 0x1EC7: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] case 0x2091: // [LATIN SUBSCRIPT SMALL LETTER E] case 0x24D4: // [CIRCLED LATIN SMALL LETTER E] case 0x2C78: // [LATIN SMALL LETTER E WITH NOTCH] case 0xFF45: // [FULLWIDTH LATIN SMALL LETTER E] output[outputPos++] = L'e'; break; case 0x24A0: // [PARENTHESIZED LATIN SMALL LETTER E] output[outputPos++] = L'('; output[outputPos++] = L'e'; output[outputPos++] = L')'; break; case 0x0191: // [LATIN CAPITAL LETTER F WITH HOOK] case 0x1E1E: // [LATIN CAPITAL LETTER F WITH DOT ABOVE] case 0x24BB: // [CIRCLED LATIN CAPITAL LETTER F] case 0xA730: // [LATIN LETTER SMALL CAPITAL F] case 0xA77B: // [LATIN CAPITAL LETTER INSULAR F] case 0xA7FB: // [LATIN EPIGRAPHIC LETTER REVERSED F] case 0xFF26: // [FULLWIDTH LATIN CAPITAL LETTER F] output[outputPos++] = L'F'; break; case 0x0192: // [LATIN SMALL LETTER F WITH HOOK] case 0x1D6E: // [LATIN SMALL LETTER F WITH MIDDLE TILDE] case 0x1D82: // [LATIN SMALL LETTER F WITH PALATAL HOOK] case 0x1E1F: // [LATIN SMALL LETTER F WITH DOT ABOVE] case 0x1E9B: // [LATIN SMALL LETTER LONG S WITH DOT ABOVE] case 0x24D5: // [CIRCLED LATIN SMALL LETTER F] case 0xA77C: // [LATIN SMALL LETTER INSULAR F] case 0xFF46: // [FULLWIDTH LATIN SMALL LETTER F] output[outputPos++] = L'f'; break; case 0x24A1: // [PARENTHESIZED LATIN SMALL LETTER F] output[outputPos++] = L'('; output[outputPos++] = L'f'; output[outputPos++] = L')'; break; case 0xFB00: // [LATIN SMALL LIGATURE FF] output[outputPos++] = L'f'; output[outputPos++] = L'f'; break; case 0xFB03: // [LATIN SMALL LIGATURE FFI] output[outputPos++] = L'f'; output[outputPos++] = L'f'; output[outputPos++] = L'i'; break; case 0xFB04: // [LATIN SMALL LIGATURE FFL] output[outputPos++] = L'f'; output[outputPos++] = L'f'; output[outputPos++] = L'l'; break; case 0xFB01: // [LATIN SMALL LIGATURE FI] output[outputPos++] = L'f'; output[outputPos++] = L'i'; break; case 0xFB02: // [LATIN SMALL LIGATURE FL] output[outputPos++] = L'f'; output[outputPos++] = L'l'; break; case 0x011C: // [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] case 0x011E: // [LATIN CAPITAL LETTER G WITH BREVE] case 0x0120: // [LATIN CAPITAL LETTER G WITH DOT ABOVE] case 0x0122: // [LATIN CAPITAL LETTER G WITH CEDILLA] case 0x0193: // [LATIN CAPITAL LETTER G WITH HOOK] case 0x01E4: // [LATIN CAPITAL LETTER G WITH STROKE] case 0x01E5: // [LATIN SMALL LETTER G WITH STROKE] case 0x01E6: // [LATIN CAPITAL LETTER G WITH CARON] case 0x01E7: // [LATIN SMALL LETTER G WITH CARON] case 0x01F4: // [LATIN CAPITAL LETTER G WITH ACUTE] case 0x0262: // [LATIN LETTER SMALL CAPITAL G] case 0x029B: // [LATIN LETTER SMALL CAPITAL G WITH HOOK] case 0x1E20: // [LATIN CAPITAL LETTER G WITH MACRON] case 0x24BC: // [CIRCLED LATIN CAPITAL LETTER G] case 0xA77D: // [LATIN CAPITAL LETTER INSULAR G] case 0xA77E: // [LATIN CAPITAL LETTER TURNED INSULAR G] case 0xFF27: // [FULLWIDTH LATIN CAPITAL LETTER G] output[outputPos++] = L'G'; break; case 0x011D: // [LATIN SMALL LETTER G WITH CIRCUMFLEX] case 0x011F: // [LATIN SMALL LETTER G WITH BREVE] case 0x0121: // [LATIN SMALL LETTER G WITH DOT ABOVE] case 0x0123: // [LATIN SMALL LETTER G WITH CEDILLA] case 0x01F5: // [LATIN SMALL LETTER G WITH ACUTE] case 0x0260: // [LATIN SMALL LETTER G WITH HOOK] case 0x0261: // [LATIN SMALL LETTER SCRIPT G] case 0x1D77: // [LATIN SMALL LETTER TURNED G] case 0x1D79: // [LATIN SMALL LETTER INSULAR G] case 0x1D83: // [LATIN SMALL LETTER G WITH PALATAL HOOK] case 0x1E21: // [LATIN SMALL LETTER G WITH MACRON] case 0x24D6: // [CIRCLED LATIN SMALL LETTER G] case 0xA77F: // [LATIN SMALL LETTER TURNED INSULAR G] case 0xFF47: // [FULLWIDTH LATIN SMALL LETTER G] output[outputPos++] = L'g'; break; case 0x24A2: // [PARENTHESIZED LATIN SMALL LETTER G] output[outputPos++] = L'('; output[outputPos++] = L'g'; output[outputPos++] = L')'; break; case 0x0124: // [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] case 0x0126: // [LATIN CAPITAL LETTER H WITH STROKE] case 0x021E: // [LATIN CAPITAL LETTER H WITH CARON] case 0x029C: // [LATIN LETTER SMALL CAPITAL H] case 0x1E22: // [LATIN CAPITAL LETTER H WITH DOT ABOVE] case 0x1E24: // [LATIN CAPITAL LETTER H WITH DOT BELOW] case 0x1E26: // [LATIN CAPITAL LETTER H WITH DIAERESIS] case 0x1E28: // [LATIN CAPITAL LETTER H WITH CEDILLA] case 0x1E2A: // [LATIN CAPITAL LETTER H WITH BREVE BELOW] case 0x24BD: // [CIRCLED LATIN CAPITAL LETTER H] case 0x2C67: // [LATIN CAPITAL LETTER H WITH DESCENDER] case 0x2C75: // [LATIN CAPITAL LETTER HALF H] case 0xFF28: // [FULLWIDTH LATIN CAPITAL LETTER H] output[outputPos++] = L'H'; break; case 0x0125: // [LATIN SMALL LETTER H WITH CIRCUMFLEX] case 0x0127: // [LATIN SMALL LETTER H WITH STROKE] case 0x021F: // [LATIN SMALL LETTER H WITH CARON] case 0x0265: // [LATIN SMALL LETTER TURNED H] case 0x0266: // [LATIN SMALL LETTER H WITH HOOK] case 0x02AE: // [LATIN SMALL LETTER TURNED H WITH FISHHOOK] case 0x02AF: // [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] case 0x1E23: // [LATIN SMALL LETTER H WITH DOT ABOVE] case 0x1E25: // [LATIN SMALL LETTER H WITH DOT BELOW] case 0x1E27: // [LATIN SMALL LETTER H WITH DIAERESIS] case 0x1E29: // [LATIN SMALL LETTER H WITH CEDILLA] case 0x1E2B: // [LATIN SMALL LETTER H WITH BREVE BELOW] case 0x1E96: // [LATIN SMALL LETTER H WITH LINE BELOW] case 0x24D7: // [CIRCLED LATIN SMALL LETTER H] case 0x2C68: // [LATIN SMALL LETTER H WITH DESCENDER] case 0x2C76: // [LATIN SMALL LETTER HALF H] case 0xFF48: // [FULLWIDTH LATIN SMALL LETTER H] output[outputPos++] = L'h'; break; case 0x01F6: // [LATIN CAPITAL LETTER HWAIR] output[outputPos++] = L'H'; output[outputPos++] = L'V'; break; case 0x24A3: // [PARENTHESIZED LATIN SMALL LETTER H] output[outputPos++] = L'('; output[outputPos++] = L'h'; output[outputPos++] = L')'; break; case 0x0195: // [LATIN SMALL LETTER HV] output[outputPos++] = L'h'; output[outputPos++] = L'v'; break; case 0x00CC: // [LATIN CAPITAL LETTER I WITH GRAVE] case 0x00CD: // [LATIN CAPITAL LETTER I WITH ACUTE] case 0x00CE: // [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] case 0x00CF: // [LATIN CAPITAL LETTER I WITH DIAERESIS] case 0x0128: // [LATIN CAPITAL LETTER I WITH TILDE] case 0x012A: // [LATIN CAPITAL LETTER I WITH MACRON] case 0x012C: // [LATIN CAPITAL LETTER I WITH BREVE] case 0x012E: // [LATIN CAPITAL LETTER I WITH OGONEK] case 0x0130: // [LATIN CAPITAL LETTER I WITH DOT ABOVE] case 0x0196: // [LATIN CAPITAL LETTER IOTA] case 0x0197: // [LATIN CAPITAL LETTER I WITH STROKE] case 0x01CF: // [LATIN CAPITAL LETTER I WITH CARON] case 0x0208: // [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] case 0x020A: // [LATIN CAPITAL LETTER I WITH INVERTED BREVE] case 0x026A: // [LATIN LETTER SMALL CAPITAL I] case 0x1D7B: // [LATIN SMALL CAPITAL LETTER I WITH STROKE] case 0x1E2C: // [LATIN CAPITAL LETTER I WITH TILDE BELOW] case 0x1E2E: // [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] case 0x1EC8: // [LATIN CAPITAL LETTER I WITH HOOK ABOVE] case 0x1ECA: // [LATIN CAPITAL LETTER I WITH DOT BELOW] case 0x24BE: // [CIRCLED LATIN CAPITAL LETTER I] case 0xA7FE: // [LATIN EPIGRAPHIC LETTER I LONGA] case 0xFF29: // [FULLWIDTH LATIN CAPITAL LETTER I] output[outputPos++] = L'I'; break; case 0x00EC: // [LATIN SMALL LETTER I WITH GRAVE] case 0x00ED: // [LATIN SMALL LETTER I WITH ACUTE] case 0x00EE: // [LATIN SMALL LETTER I WITH CIRCUMFLEX] case 0x00EF: // [LATIN SMALL LETTER I WITH DIAERESIS] case 0x0129: // [LATIN SMALL LETTER I WITH TILDE] case 0x012B: // [LATIN SMALL LETTER I WITH MACRON] case 0x012D: // [LATIN SMALL LETTER I WITH BREVE] case 0x012F: // [LATIN SMALL LETTER I WITH OGONEK] case 0x0131: // [LATIN SMALL LETTER DOTLESS I] case 0x01D0: // [LATIN SMALL LETTER I WITH CARON] case 0x0209: // [LATIN SMALL LETTER I WITH DOUBLE GRAVE] case 0x020B: // [LATIN SMALL LETTER I WITH INVERTED BREVE] case 0x0268: // [LATIN SMALL LETTER I WITH STROKE] case 0x1D09: // [LATIN SMALL LETTER TURNED I] case 0x1D62: // [LATIN SUBSCRIPT SMALL LETTER I] case 0x1D7C: // [LATIN SMALL LETTER IOTA WITH STROKE] case 0x1D96: // [LATIN SMALL LETTER I WITH RETROFLEX HOOK] case 0x1E2D: // [LATIN SMALL LETTER I WITH TILDE BELOW] case 0x1E2F: // [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] case 0x1EC9: // [LATIN SMALL LETTER I WITH HOOK ABOVE] case 0x1ECB: // [LATIN SMALL LETTER I WITH DOT BELOW] case 0x2071: // [SUPERSCRIPT LATIN SMALL LETTER I] case 0x24D8: // [CIRCLED LATIN SMALL LETTER I] case 0xFF49: // [FULLWIDTH LATIN SMALL LETTER I] output[outputPos++] = L'i'; break; case 0x0132: // [LATIN CAPITAL LIGATURE IJ] output[outputPos++] = L'I'; output[outputPos++] = L'J'; break; case 0x24A4: // [PARENTHESIZED LATIN SMALL LETTER I] output[outputPos++] = L'('; output[outputPos++] = L'i'; output[outputPos++] = L')'; break; case 0x0133: // [LATIN SMALL LIGATURE IJ] output[outputPos++] = L'i'; output[outputPos++] = L'j'; break; case 0x0134: // [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] case 0x0248: // [LATIN CAPITAL LETTER J WITH STROKE] case 0x1D0A: // [LATIN LETTER SMALL CAPITAL J] case 0x24BF: // [CIRCLED LATIN CAPITAL LETTER J] case 0xFF2A: // [FULLWIDTH LATIN CAPITAL LETTER J] output[outputPos++] = L'J'; break; case 0x0135: // [LATIN SMALL LETTER J WITH CIRCUMFLEX] case 0x01F0: // [LATIN SMALL LETTER J WITH CARON] case 0x0237: // [LATIN SMALL LETTER DOTLESS J] case 0x0249: // [LATIN SMALL LETTER J WITH STROKE] case 0x025F: // [LATIN SMALL LETTER DOTLESS J WITH STROKE] case 0x0284: // [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] case 0x029D: // [LATIN SMALL LETTER J WITH CROSSED-TAIL] case 0x24D9: // [CIRCLED LATIN SMALL LETTER J] case 0x2C7C: // [LATIN SUBSCRIPT SMALL LETTER J] case 0xFF4A: // [FULLWIDTH LATIN SMALL LETTER J] output[outputPos++] = L'j'; break; case 0x24A5: // [PARENTHESIZED LATIN SMALL LETTER J] output[outputPos++] = L'('; output[outputPos++] = L'j'; output[outputPos++] = L')'; break; case 0x0136: // [LATIN CAPITAL LETTER K WITH CEDILLA] case 0x0198: // [LATIN CAPITAL LETTER K WITH HOOK] case 0x01E8: // [LATIN CAPITAL LETTER K WITH CARON] case 0x1D0B: // [LATIN LETTER SMALL CAPITAL K] case 0x1E30: // [LATIN CAPITAL LETTER K WITH ACUTE] case 0x1E32: // [LATIN CAPITAL LETTER K WITH DOT BELOW] case 0x1E34: // [LATIN CAPITAL LETTER K WITH LINE BELOW] case 0x24C0: // [CIRCLED LATIN CAPITAL LETTER K] case 0x2C69: // [LATIN CAPITAL LETTER K WITH DESCENDER] case 0xA740: // [LATIN CAPITAL LETTER K WITH STROKE] case 0xA742: // [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] case 0xA744: // [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] case 0xFF2B: // [FULLWIDTH LATIN CAPITAL LETTER K] output[outputPos++] = L'K'; break; case 0x0137: // [LATIN SMALL LETTER K WITH CEDILLA] case 0x0199: // [LATIN SMALL LETTER K WITH HOOK] case 0x01E9: // [LATIN SMALL LETTER K WITH CARON] case 0x029E: // [LATIN SMALL LETTER TURNED K] case 0x1D84: // [LATIN SMALL LETTER K WITH PALATAL HOOK] case 0x1E31: // [LATIN SMALL LETTER K WITH ACUTE] case 0x1E33: // [LATIN SMALL LETTER K WITH DOT BELOW] case 0x1E35: // [LATIN SMALL LETTER K WITH LINE BELOW] case 0x24DA: // [CIRCLED LATIN SMALL LETTER K] case 0x2C6A: // [LATIN SMALL LETTER K WITH DESCENDER] case 0xA741: // [LATIN SMALL LETTER K WITH STROKE] case 0xA743: // [LATIN SMALL LETTER K WITH DIAGONAL STROKE] case 0xA745: // [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] case 0xFF4B: // [FULLWIDTH LATIN SMALL LETTER K] output[outputPos++] = L'k'; break; case 0x24A6: // [PARENTHESIZED LATIN SMALL LETTER K] output[outputPos++] = L'('; output[outputPos++] = L'k'; output[outputPos++] = L')'; break; case 0x0139: // [LATIN CAPITAL LETTER L WITH ACUTE] case 0x013B: // [LATIN CAPITAL LETTER L WITH CEDILLA] case 0x013D: // [LATIN CAPITAL LETTER L WITH CARON] case 0x013F: // [LATIN CAPITAL LETTER L WITH MIDDLE DOT] case 0x0141: // [LATIN CAPITAL LETTER L WITH STROKE] case 0x023D: // [LATIN CAPITAL LETTER L WITH BAR] case 0x029F: // [LATIN LETTER SMALL CAPITAL L] case 0x1D0C: // [LATIN LETTER SMALL CAPITAL L WITH STROKE] case 0x1E36: // [LATIN CAPITAL LETTER L WITH DOT BELOW] case 0x1E38: // [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] case 0x1E3A: // [LATIN CAPITAL LETTER L WITH LINE BELOW] case 0x1E3C: // [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] case 0x24C1: // [CIRCLED LATIN CAPITAL LETTER L] case 0x2C60: // [LATIN CAPITAL LETTER L WITH DOUBLE BAR] case 0x2C62: // [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] case 0xA746: // [LATIN CAPITAL LETTER BROKEN L] case 0xA748: // [LATIN CAPITAL LETTER L WITH HIGH STROKE] case 0xA780: // [LATIN CAPITAL LETTER TURNED L] case 0xFF2C: // [FULLWIDTH LATIN CAPITAL LETTER L] output[outputPos++] = L'L'; break; case 0x013A: // [LATIN SMALL LETTER L WITH ACUTE] case 0x013C: // [LATIN SMALL LETTER L WITH CEDILLA] case 0x013E: // [LATIN SMALL LETTER L WITH CARON] case 0x0140: // [LATIN SMALL LETTER L WITH MIDDLE DOT] case 0x0142: // [LATIN SMALL LETTER L WITH STROKE] case 0x019A: // [LATIN SMALL LETTER L WITH BAR] case 0x0234: // [LATIN SMALL LETTER L WITH CURL] case 0x026B: // [LATIN SMALL LETTER L WITH MIDDLE TILDE] case 0x026C: // [LATIN SMALL LETTER L WITH BELT] case 0x026D: // [LATIN SMALL LETTER L WITH RETROFLEX HOOK] case 0x1D85: // [LATIN SMALL LETTER L WITH PALATAL HOOK] case 0x1E37: // [LATIN SMALL LETTER L WITH DOT BELOW] case 0x1E39: // [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] case 0x1E3B: // [LATIN SMALL LETTER L WITH LINE BELOW] case 0x1E3D: // [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] case 0x24DB: // [CIRCLED LATIN SMALL LETTER L] case 0x2C61: // [LATIN SMALL LETTER L WITH DOUBLE BAR] case 0xA747: // [LATIN SMALL LETTER BROKEN L] case 0xA749: // [LATIN SMALL LETTER L WITH HIGH STROKE] case 0xA781: // [LATIN SMALL LETTER TURNED L] case 0xFF4C: // [FULLWIDTH LATIN SMALL LETTER L] output[outputPos++] = L'l'; break; case 0x01C7: // [LATIN CAPITAL LETTER LJ] output[outputPos++] = L'L'; output[outputPos++] = L'J'; break; case 0x1EFA: // [LATIN CAPITAL LETTER MIDDLE-WELSH LL] output[outputPos++] = L'L'; output[outputPos++] = L'L'; break; case 0x01C8: // [LATIN CAPITAL LETTER L WITH SMALL LETTER J] output[outputPos++] = L'L'; output[outputPos++] = L'j'; break; case 0x24A7: // [PARENTHESIZED LATIN SMALL LETTER L] output[outputPos++] = L'('; output[outputPos++] = L'l'; output[outputPos++] = L')'; break; case 0x01C9: // [LATIN SMALL LETTER LJ] output[outputPos++] = L'l'; output[outputPos++] = L'j'; break; case 0x1EFB: // [LATIN SMALL LETTER MIDDLE-WELSH LL] output[outputPos++] = L'l'; output[outputPos++] = L'l'; break; case 0x02AA: // [LATIN SMALL LETTER LS DIGRAPH] output[outputPos++] = L'l'; output[outputPos++] = L's'; break; case 0x02AB: // [LATIN SMALL LETTER LZ DIGRAPH] output[outputPos++] = L'l'; output[outputPos++] = L'z'; break; case 0x019C: // [LATIN CAPITAL LETTER TURNED M] case 0x1D0D: // [LATIN LETTER SMALL CAPITAL M] case 0x1E3E: // [LATIN CAPITAL LETTER M WITH ACUTE] case 0x1E40: // [LATIN CAPITAL LETTER M WITH DOT ABOVE] case 0x1E42: // [LATIN CAPITAL LETTER M WITH DOT BELOW] case 0x24C2: // [CIRCLED LATIN CAPITAL LETTER M] case 0x2C6E: // [LATIN CAPITAL LETTER M WITH HOOK] case 0xA7FD: // [LATIN EPIGRAPHIC LETTER INVERTED M] case 0xA7FF: // [LATIN EPIGRAPHIC LETTER ARCHAIC M] case 0xFF2D: // [FULLWIDTH LATIN CAPITAL LETTER M] output[outputPos++] = L'M'; break; case 0x026F: // [LATIN SMALL LETTER TURNED M] case 0x0270: // [LATIN SMALL LETTER TURNED M WITH LONG LEG] case 0x0271: // [LATIN SMALL LETTER M WITH HOOK] case 0x1D6F: // [LATIN SMALL LETTER M WITH MIDDLE TILDE] case 0x1D86: // [LATIN SMALL LETTER M WITH PALATAL HOOK] case 0x1E3F: // [LATIN SMALL LETTER M WITH ACUTE] case 0x1E41: // [LATIN SMALL LETTER M WITH DOT ABOVE] case 0x1E43: // [LATIN SMALL LETTER M WITH DOT BELOW] case 0x24DC: // [CIRCLED LATIN SMALL LETTER M] case 0xFF4D: // [FULLWIDTH LATIN SMALL LETTER M] output[outputPos++] = L'm'; break; case 0x24A8: // [PARENTHESIZED LATIN SMALL LETTER M] output[outputPos++] = L'('; output[outputPos++] = L'm'; output[outputPos++] = L')'; break; case 0x00D1: // [LATIN CAPITAL LETTER N WITH TILDE] case 0x0143: // [LATIN CAPITAL LETTER N WITH ACUTE] case 0x0145: // [LATIN CAPITAL LETTER N WITH CEDILLA] case 0x0147: // [LATIN CAPITAL LETTER N WITH CARON] case 0x014A: // [LATIN CAPITAL LETTER ENG] case 0x019D: // [LATIN CAPITAL LETTER N WITH LEFT HOOK] case 0x01F8: // [LATIN CAPITAL LETTER N WITH GRAVE] case 0x0220: // [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] case 0x0274: // [LATIN LETTER SMALL CAPITAL N] case 0x1D0E: // [LATIN LETTER SMALL CAPITAL REVERSED N] case 0x1E44: // [LATIN CAPITAL LETTER N WITH DOT ABOVE] case 0x1E46: // [LATIN CAPITAL LETTER N WITH DOT BELOW] case 0x1E48: // [LATIN CAPITAL LETTER N WITH LINE BELOW] case 0x1E4A: // [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] case 0x24C3: // [CIRCLED LATIN CAPITAL LETTER N] case 0xFF2E: // [FULLWIDTH LATIN CAPITAL LETTER N] output[outputPos++] = L'N'; break; case 0x00F1: // [LATIN SMALL LETTER N WITH TILDE] case 0x0144: // [LATIN SMALL LETTER N WITH ACUTE] case 0x0146: // [LATIN SMALL LETTER N WITH CEDILLA] case 0x0148: // [LATIN SMALL LETTER N WITH CARON] case 0x0149: // [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] case 0x014B: // [LATIN SMALL LETTER ENG] case 0x019E: // [LATIN SMALL LETTER N WITH LONG RIGHT LEG] case 0x01F9: // [LATIN SMALL LETTER N WITH GRAVE] case 0x0235: // [LATIN SMALL LETTER N WITH CURL] case 0x0272: // [LATIN SMALL LETTER N WITH LEFT HOOK] case 0x0273: // [LATIN SMALL LETTER N WITH RETROFLEX HOOK] case 0x1D70: // [LATIN SMALL LETTER N WITH MIDDLE TILDE] case 0x1D87: // [LATIN SMALL LETTER N WITH PALATAL HOOK] case 0x1E45: // [LATIN SMALL LETTER N WITH DOT ABOVE] case 0x1E47: // [LATIN SMALL LETTER N WITH DOT BELOW] case 0x1E49: // [LATIN SMALL LETTER N WITH LINE BELOW] case 0x1E4B: // [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] case 0x207F: // [SUPERSCRIPT LATIN SMALL LETTER N] case 0x24DD: // [CIRCLED LATIN SMALL LETTER N] case 0xFF4E: // [FULLWIDTH LATIN SMALL LETTER N] output[outputPos++] = L'n'; break; case 0x01CA: // [LATIN CAPITAL LETTER NJ] output[outputPos++] = L'N'; output[outputPos++] = L'J'; break; case 0x01CB: // [LATIN CAPITAL LETTER N WITH SMALL LETTER J] output[outputPos++] = L'N'; output[outputPos++] = L'j'; break; case 0x24A9: // [PARENTHESIZED LATIN SMALL LETTER N] output[outputPos++] = L'('; output[outputPos++] = L'n'; output[outputPos++] = L')'; break; case 0x01CC: // [LATIN SMALL LETTER NJ] output[outputPos++] = L'n'; output[outputPos++] = L'j'; break; case 0x00D2: // [LATIN CAPITAL LETTER O WITH GRAVE] case 0x00D3: // [LATIN CAPITAL LETTER O WITH ACUTE] case 0x00D4: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] case 0x00D5: // [LATIN CAPITAL LETTER O WITH TILDE] case 0x00D6: // [LATIN CAPITAL LETTER O WITH DIAERESIS] case 0x00D8: // [LATIN CAPITAL LETTER O WITH STROKE] case 0x014C: // [LATIN CAPITAL LETTER O WITH MACRON] case 0x014E: // [LATIN CAPITAL LETTER O WITH BREVE] case 0x0150: // [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] case 0x0186: // [LATIN CAPITAL LETTER OPEN O] case 0x019F: // [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] case 0x01A0: // [LATIN CAPITAL LETTER O WITH HORN] case 0x01D1: // [LATIN CAPITAL LETTER O WITH CARON] case 0x01EA: // [LATIN CAPITAL LETTER O WITH OGONEK] case 0x01EC: // [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] case 0x01FE: // [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] case 0x020C: // [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] case 0x020E: // [LATIN CAPITAL LETTER O WITH INVERTED BREVE] case 0x022A: // [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] case 0x022C: // [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] case 0x022E: // [LATIN CAPITAL LETTER O WITH DOT ABOVE] case 0x0230: // [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] case 0x1D0F: // [LATIN LETTER SMALL CAPITAL O] case 0x1D10: // [LATIN LETTER SMALL CAPITAL OPEN O] case 0x1E4C: // [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] case 0x1E4E: // [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] case 0x1E50: // [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] case 0x1E52: // [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] case 0x1ECC: // [LATIN CAPITAL LETTER O WITH DOT BELOW] case 0x1ECE: // [LATIN CAPITAL LETTER O WITH HOOK ABOVE] case 0x1ED0: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] case 0x1ED2: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] case 0x1ED4: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1ED6: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] case 0x1ED8: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] case 0x1EDA: // [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] case 0x1EDC: // [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] case 0x1EDE: // [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] case 0x1EE0: // [LATIN CAPITAL LETTER O WITH HORN AND TILDE] case 0x1EE2: // [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] case 0x24C4: // [CIRCLED LATIN CAPITAL LETTER O] case 0xA74A: // [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] case 0xA74C: // [LATIN CAPITAL LETTER O WITH LOOP] case 0xFF2F: // [FULLWIDTH LATIN CAPITAL LETTER O] output[outputPos++] = L'O'; break; case 0x00F2: // [LATIN SMALL LETTER O WITH GRAVE] case 0x00F3: // [LATIN SMALL LETTER O WITH ACUTE] case 0x00F4: // [LATIN SMALL LETTER O WITH CIRCUMFLEX] case 0x00F5: // [LATIN SMALL LETTER O WITH TILDE] case 0x00F6: // [LATIN SMALL LETTER O WITH DIAERESIS] case 0x00F8: // [LATIN SMALL LETTER O WITH STROKE] case 0x014D: // [LATIN SMALL LETTER O WITH MACRON] case 0x014F: // [LATIN SMALL LETTER O WITH BREVE] case 0x0151: // [LATIN SMALL LETTER O WITH DOUBLE ACUTE] case 0x01A1: // [LATIN SMALL LETTER O WITH HORN] case 0x01D2: // [LATIN SMALL LETTER O WITH CARON] case 0x01EB: // [LATIN SMALL LETTER O WITH OGONEK] case 0x01ED: // [LATIN SMALL LETTER O WITH OGONEK AND MACRON] case 0x01FF: // [LATIN SMALL LETTER O WITH STROKE AND ACUTE] case 0x020D: // [LATIN SMALL LETTER O WITH DOUBLE GRAVE] case 0x020F: // [LATIN SMALL LETTER O WITH INVERTED BREVE] case 0x022B: // [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] case 0x022D: // [LATIN SMALL LETTER O WITH TILDE AND MACRON] case 0x022F: // [LATIN SMALL LETTER O WITH DOT ABOVE] case 0x0231: // [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] case 0x0254: // [LATIN SMALL LETTER OPEN O] case 0x0275: // [LATIN SMALL LETTER BARRED O] case 0x1D16: // [LATIN SMALL LETTER TOP HALF O] case 0x1D17: // [LATIN SMALL LETTER BOTTOM HALF O] case 0x1D97: // [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] case 0x1E4D: // [LATIN SMALL LETTER O WITH TILDE AND ACUTE] case 0x1E4F: // [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] case 0x1E51: // [LATIN SMALL LETTER O WITH MACRON AND GRAVE] case 0x1E53: // [LATIN SMALL LETTER O WITH MACRON AND ACUTE] case 0x1ECD: // [LATIN SMALL LETTER O WITH DOT BELOW] case 0x1ECF: // [LATIN SMALL LETTER O WITH HOOK ABOVE] case 0x1ED1: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] case 0x1ED3: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] case 0x1ED5: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1ED7: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] case 0x1ED9: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] case 0x1EDB: // [LATIN SMALL LETTER O WITH HORN AND ACUTE] case 0x1EDD: // [LATIN SMALL LETTER O WITH HORN AND GRAVE] case 0x1EDF: // [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] case 0x1EE1: // [LATIN SMALL LETTER O WITH HORN AND TILDE] case 0x1EE3: // [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] case 0x2092: // [LATIN SUBSCRIPT SMALL LETTER O] case 0x24DE: // [CIRCLED LATIN SMALL LETTER O] case 0x2C7A: // [LATIN SMALL LETTER O WITH LOW RING INSIDE] case 0xA74B: // [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] case 0xA74D: // [LATIN SMALL LETTER O WITH LOOP] case 0xFF4F: // [FULLWIDTH LATIN SMALL LETTER O] output[outputPos++] = L'o'; break; case 0x0152: // [LATIN CAPITAL LIGATURE OE] case 0x0276: // [LATIN LETTER SMALL CAPITAL OE] output[outputPos++] = L'O'; output[outputPos++] = L'E'; break; case 0xA74E: // [LATIN CAPITAL LETTER OO] output[outputPos++] = L'O'; output[outputPos++] = L'O'; break; case 0x0222: // [LATIN CAPITAL LETTER OU] case 0x1D15: // [LATIN LETTER SMALL CAPITAL OU] output[outputPos++] = L'O'; output[outputPos++] = L'U'; break; case 0x24AA: // [PARENTHESIZED LATIN SMALL LETTER O] output[outputPos++] = L'('; output[outputPos++] = L'o'; output[outputPos++] = L')'; break; case 0x0153: // [LATIN SMALL LIGATURE OE] case 0x1D14: // [LATIN SMALL LETTER TURNED OE] output[outputPos++] = L'o'; output[outputPos++] = L'e'; break; case 0xA74F: // [LATIN SMALL LETTER OO] output[outputPos++] = L'o'; output[outputPos++] = L'o'; break; case 0x0223: // [LATIN SMALL LETTER OU] output[outputPos++] = L'o'; output[outputPos++] = L'u'; break; case 0x01A4: // [LATIN CAPITAL LETTER P WITH HOOK] case 0x1D18: // [LATIN LETTER SMALL CAPITAL P] case 0x1E54: // [LATIN CAPITAL LETTER P WITH ACUTE] case 0x1E56: // [LATIN CAPITAL LETTER P WITH DOT ABOVE] case 0x24C5: // [CIRCLED LATIN CAPITAL LETTER P] case 0x2C63: // [LATIN CAPITAL LETTER P WITH STROKE] case 0xA750: // [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] case 0xA752: // [LATIN CAPITAL LETTER P WITH FLOURISH] case 0xA754: // [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] case 0xFF30: // [FULLWIDTH LATIN CAPITAL LETTER P] output[outputPos++] = L'P'; break; case 0x01A5: // [LATIN SMALL LETTER P WITH HOOK] case 0x1D71: // [LATIN SMALL LETTER P WITH MIDDLE TILDE] case 0x1D7D: // [LATIN SMALL LETTER P WITH STROKE] case 0x1D88: // [LATIN SMALL LETTER P WITH PALATAL HOOK] case 0x1E55: // [LATIN SMALL LETTER P WITH ACUTE] case 0x1E57: // [LATIN SMALL LETTER P WITH DOT ABOVE] case 0x24DF: // [CIRCLED LATIN SMALL LETTER P] case 0xA751: // [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] case 0xA753: // [LATIN SMALL LETTER P WITH FLOURISH] case 0xA755: // [LATIN SMALL LETTER P WITH SQUIRREL TAIL] case 0xA7FC: // [LATIN EPIGRAPHIC LETTER REVERSED P] case 0xFF50: // [FULLWIDTH LATIN SMALL LETTER P] output[outputPos++] = L'p'; break; case 0x24AB: // [PARENTHESIZED LATIN SMALL LETTER P] output[outputPos++] = L'('; output[outputPos++] = L'p'; output[outputPos++] = L')'; break; case 0x024A: // [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] case 0x24C6: // [CIRCLED LATIN CAPITAL LETTER Q] case 0xA756: // [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] case 0xA758: // [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] case 0xFF31: // [FULLWIDTH LATIN CAPITAL LETTER Q] output[outputPos++] = L'Q'; break; case 0x0138: // [LATIN SMALL LETTER KRA] case 0x024B: // [LATIN SMALL LETTER Q WITH HOOK TAIL] case 0x02A0: // [LATIN SMALL LETTER Q WITH HOOK] case 0x24E0: // [CIRCLED LATIN SMALL LETTER Q] case 0xA757: // [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] case 0xA759: // [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] case 0xFF51: // [FULLWIDTH LATIN SMALL LETTER Q] output[outputPos++] = L'q'; break; case 0x24AC: // [PARENTHESIZED LATIN SMALL LETTER Q] output[outputPos++] = L'('; output[outputPos++] = L'q'; output[outputPos++] = L')'; break; case 0x0239: // [LATIN SMALL LETTER QP DIGRAPH] output[outputPos++] = L'q'; output[outputPos++] = L'p'; break; case 0x0154: // [LATIN CAPITAL LETTER R WITH ACUTE] case 0x0156: // [LATIN CAPITAL LETTER R WITH CEDILLA] case 0x0158: // [LATIN CAPITAL LETTER R WITH CARON] case 0x0210: // [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] case 0x0212: // [LATIN CAPITAL LETTER R WITH INVERTED BREVE] case 0x024C: // [LATIN CAPITAL LETTER R WITH STROKE] case 0x0280: // [LATIN LETTER SMALL CAPITAL R] case 0x0281: // [LATIN LETTER SMALL CAPITAL INVERTED R] case 0x1D19: // [LATIN LETTER SMALL CAPITAL REVERSED R] case 0x1D1A: // [LATIN LETTER SMALL CAPITAL TURNED R] case 0x1E58: // [LATIN CAPITAL LETTER R WITH DOT ABOVE] case 0x1E5A: // [LATIN CAPITAL LETTER R WITH DOT BELOW] case 0x1E5C: // [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] case 0x1E5E: // [LATIN CAPITAL LETTER R WITH LINE BELOW] case 0x24C7: // [CIRCLED LATIN CAPITAL LETTER R] case 0x2C64: // [LATIN CAPITAL LETTER R WITH TAIL] case 0xA75A: // [LATIN CAPITAL LETTER R ROTUNDA] case 0xA782: // [LATIN CAPITAL LETTER INSULAR R] case 0xFF32: // [FULLWIDTH LATIN CAPITAL LETTER R] output[outputPos++] = L'R'; break; case 0x0155: // [LATIN SMALL LETTER R WITH ACUTE] case 0x0157: // [LATIN SMALL LETTER R WITH CEDILLA] case 0x0159: // [LATIN SMALL LETTER R WITH CARON] case 0x0211: // [LATIN SMALL LETTER R WITH DOUBLE GRAVE] case 0x0213: // [LATIN SMALL LETTER R WITH INVERTED BREVE] case 0x024D: // [LATIN SMALL LETTER R WITH STROKE] case 0x027C: // [LATIN SMALL LETTER R WITH LONG LEG] case 0x027D: // [LATIN SMALL LETTER R WITH TAIL] case 0x027E: // [LATIN SMALL LETTER R WITH FISHHOOK] case 0x027F: // [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] case 0x1D63: // [LATIN SUBSCRIPT SMALL LETTER R] case 0x1D72: // [LATIN SMALL LETTER R WITH MIDDLE TILDE] case 0x1D73: // [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] case 0x1D89: // [LATIN SMALL LETTER R WITH PALATAL HOOK] case 0x1E59: // [LATIN SMALL LETTER R WITH DOT ABOVE] case 0x1E5B: // [LATIN SMALL LETTER R WITH DOT BELOW] case 0x1E5D: // [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] case 0x1E5F: // [LATIN SMALL LETTER R WITH LINE BELOW] case 0x24E1: // [CIRCLED LATIN SMALL LETTER R] case 0xA75B: // [LATIN SMALL LETTER R ROTUNDA] case 0xA783: // [LATIN SMALL LETTER INSULAR R] case 0xFF52: // [FULLWIDTH LATIN SMALL LETTER R] output[outputPos++] = L'r'; break; case 0x24AD: // [PARENTHESIZED LATIN SMALL LETTER R] output[outputPos++] = L'('; output[outputPos++] = L'r'; output[outputPos++] = L')'; break; case 0x015A: // [LATIN CAPITAL LETTER S WITH ACUTE] case 0x015C: // [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] case 0x015E: // [LATIN CAPITAL LETTER S WITH CEDILLA] case 0x0160: // [LATIN CAPITAL LETTER S WITH CARON] case 0x0218: // [LATIN CAPITAL LETTER S WITH COMMA BELOW] case 0x1E60: // [LATIN CAPITAL LETTER S WITH DOT ABOVE] case 0x1E62: // [LATIN CAPITAL LETTER S WITH DOT BELOW] case 0x1E64: // [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] case 0x1E66: // [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] case 0x1E68: // [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] case 0x24C8: // [CIRCLED LATIN CAPITAL LETTER S] case 0xA731: // [LATIN LETTER SMALL CAPITAL S] case 0xA785: // [LATIN SMALL LETTER INSULAR S] case 0xFF33: // [FULLWIDTH LATIN CAPITAL LETTER S] output[outputPos++] = L'S'; break; case 0x015B: // [LATIN SMALL LETTER S WITH ACUTE] case 0x015D: // [LATIN SMALL LETTER S WITH CIRCUMFLEX] case 0x015F: // [LATIN SMALL LETTER S WITH CEDILLA] case 0x0161: // [LATIN SMALL LETTER S WITH CARON] case 0x017F: // [LATIN SMALL LETTER LONG S] case 0x0219: // [LATIN SMALL LETTER S WITH COMMA BELOW] case 0x023F: // [LATIN SMALL LETTER S WITH SWASH TAIL] case 0x0282: // [LATIN SMALL LETTER S WITH HOOK] case 0x1D74: // [LATIN SMALL LETTER S WITH MIDDLE TILDE] case 0x1D8A: // [LATIN SMALL LETTER S WITH PALATAL HOOK] case 0x1E61: // [LATIN SMALL LETTER S WITH DOT ABOVE] case 0x1E63: // [LATIN SMALL LETTER S WITH DOT BELOW] case 0x1E65: // [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] case 0x1E67: // [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] case 0x1E69: // [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] case 0x1E9C: // [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] case 0x1E9D: // [LATIN SMALL LETTER LONG S WITH HIGH STROKE] case 0x24E2: // [CIRCLED LATIN SMALL LETTER S] case 0xA784: // [LATIN CAPITAL LETTER INSULAR S] case 0xFF53: // [FULLWIDTH LATIN SMALL LETTER S] output[outputPos++] = L's'; break; case 0x1E9E: // [LATIN CAPITAL LETTER SHARP S] output[outputPos++] = L'S'; output[outputPos++] = L'S'; break; case 0x24AE: // [PARENTHESIZED LATIN SMALL LETTER S] output[outputPos++] = L'('; output[outputPos++] = L's'; output[outputPos++] = L')'; break; case 0x00DF: // [LATIN SMALL LETTER SHARP S] output[outputPos++] = L's'; output[outputPos++] = L's'; break; case 0xFB06: // [LATIN SMALL LIGATURE ST] output[outputPos++] = L's'; output[outputPos++] = L't'; break; case 0x0162: // [LATIN CAPITAL LETTER T WITH CEDILLA] case 0x0164: // [LATIN CAPITAL LETTER T WITH CARON] case 0x0166: // [LATIN CAPITAL LETTER T WITH STROKE] case 0x01AC: // [LATIN CAPITAL LETTER T WITH HOOK] case 0x01AE: // [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] case 0x021A: // [LATIN CAPITAL LETTER T WITH COMMA BELOW] case 0x023E: // [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] case 0x1D1B: // [LATIN LETTER SMALL CAPITAL T] case 0x1E6A: // [LATIN CAPITAL LETTER T WITH DOT ABOVE] case 0x1E6C: // [LATIN CAPITAL LETTER T WITH DOT BELOW] case 0x1E6E: // [LATIN CAPITAL LETTER T WITH LINE BELOW] case 0x1E70: // [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] case 0x24C9: // [CIRCLED LATIN CAPITAL LETTER T] case 0xA786: // [LATIN CAPITAL LETTER INSULAR T] case 0xFF34: // [FULLWIDTH LATIN CAPITAL LETTER T] output[outputPos++] = L'T'; break; case 0x0163: // [LATIN SMALL LETTER T WITH CEDILLA] case 0x0165: // [LATIN SMALL LETTER T WITH CARON] case 0x0167: // [LATIN SMALL LETTER T WITH STROKE] case 0x01AB: // [LATIN SMALL LETTER T WITH PALATAL HOOK] case 0x01AD: // [LATIN SMALL LETTER T WITH HOOK] case 0x021B: // [LATIN SMALL LETTER T WITH COMMA BELOW] case 0x0236: // [LATIN SMALL LETTER T WITH CURL] case 0x0287: // [LATIN SMALL LETTER TURNED T] case 0x0288: // [LATIN SMALL LETTER T WITH RETROFLEX HOOK] case 0x1D75: // [LATIN SMALL LETTER T WITH MIDDLE TILDE] case 0x1E6B: // [LATIN SMALL LETTER T WITH DOT ABOVE] case 0x1E6D: // [LATIN SMALL LETTER T WITH DOT BELOW] case 0x1E6F: // [LATIN SMALL LETTER T WITH LINE BELOW] case 0x1E71: // [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] case 0x1E97: // [LATIN SMALL LETTER T WITH DIAERESIS] case 0x24E3: // [CIRCLED LATIN SMALL LETTER T] case 0x2C66: // [LATIN SMALL LETTER T WITH DIAGONAL STROKE] case 0xFF54: // [FULLWIDTH LATIN SMALL LETTER T] output[outputPos++] = L't'; break; case 0x00DE: // [LATIN CAPITAL LETTER THORN] case 0xA766: // [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] output[outputPos++] = L'T'; output[outputPos++] = L'H'; break; case 0xA728: // [LATIN CAPITAL LETTER TZ] output[outputPos++] = L'T'; output[outputPos++] = L'Z'; break; case 0x24AF: // [PARENTHESIZED LATIN SMALL LETTER T] output[outputPos++] = L'('; output[outputPos++] = L't'; output[outputPos++] = L')'; break; case 0x02A8: // [LATIN SMALL LETTER TC DIGRAPH WITH CURL] output[outputPos++] = L't'; output[outputPos++] = L'c'; break; case 0x00FE: // [LATIN SMALL LETTER THORN] case 0x1D7A: // [LATIN SMALL LETTER TH WITH STRIKETHROUGH] case 0xA767: // [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] output[outputPos++] = L't'; output[outputPos++] = L'h'; break; case 0x02A6: // [LATIN SMALL LETTER TS DIGRAPH] output[outputPos++] = L't'; output[outputPos++] = L's'; break; case 0xA729: // [LATIN SMALL LETTER TZ] output[outputPos++] = L't'; output[outputPos++] = L'z'; break; case 0x00D9: // [LATIN CAPITAL LETTER U WITH GRAVE] case 0x00DA: // [LATIN CAPITAL LETTER U WITH ACUTE] case 0x00DB: // [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] case 0x00DC: // [LATIN CAPITAL LETTER U WITH DIAERESIS] case 0x0168: // [LATIN CAPITAL LETTER U WITH TILDE] case 0x016A: // [LATIN CAPITAL LETTER U WITH MACRON] case 0x016C: // [LATIN CAPITAL LETTER U WITH BREVE] case 0x016E: // [LATIN CAPITAL LETTER U WITH RING ABOVE] case 0x0170: // [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] case 0x0172: // [LATIN CAPITAL LETTER U WITH OGONEK] case 0x01AF: // [LATIN CAPITAL LETTER U WITH HORN] case 0x01D3: // [LATIN CAPITAL LETTER U WITH CARON] case 0x01D5: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] case 0x01D7: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] case 0x01D9: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] case 0x01DB: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] case 0x0214: // [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] case 0x0216: // [LATIN CAPITAL LETTER U WITH INVERTED BREVE] case 0x0244: // [LATIN CAPITAL LETTER U BAR] case 0x1D1C: // [LATIN LETTER SMALL CAPITAL U] case 0x1D7E: // [LATIN SMALL CAPITAL LETTER U WITH STROKE] case 0x1E72: // [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] case 0x1E74: // [LATIN CAPITAL LETTER U WITH TILDE BELOW] case 0x1E76: // [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] case 0x1E78: // [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] case 0x1E7A: // [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] case 0x1EE4: // [LATIN CAPITAL LETTER U WITH DOT BELOW] case 0x1EE6: // [LATIN CAPITAL LETTER U WITH HOOK ABOVE] case 0x1EE8: // [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] case 0x1EEA: // [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] case 0x1EEC: // [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] case 0x1EEE: // [LATIN CAPITAL LETTER U WITH HORN AND TILDE] case 0x1EF0: // [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] case 0x24CA: // [CIRCLED LATIN CAPITAL LETTER U] case 0xFF35: // [FULLWIDTH LATIN CAPITAL LETTER U] output[outputPos++] = L'U'; break; case 0x00F9: // [LATIN SMALL LETTER U WITH GRAVE] case 0x00FA: // [LATIN SMALL LETTER U WITH ACUTE] case 0x00FB: // [LATIN SMALL LETTER U WITH CIRCUMFLEX] case 0x00FC: // [LATIN SMALL LETTER U WITH DIAERESIS] case 0x0169: // [LATIN SMALL LETTER U WITH TILDE] case 0x016B: // [LATIN SMALL LETTER U WITH MACRON] case 0x016D: // [LATIN SMALL LETTER U WITH BREVE] case 0x016F: // [LATIN SMALL LETTER U WITH RING ABOVE] case 0x0171: // [LATIN SMALL LETTER U WITH DOUBLE ACUTE] case 0x0173: // [LATIN SMALL LETTER U WITH OGONEK] case 0x01B0: // [LATIN SMALL LETTER U WITH HORN] case 0x01D4: // [LATIN SMALL LETTER U WITH CARON] case 0x01D6: // [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] case 0x01D8: // [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] case 0x01DA: // [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] case 0x01DC: // [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] case 0x0215: // [LATIN SMALL LETTER U WITH DOUBLE GRAVE] case 0x0217: // [LATIN SMALL LETTER U WITH INVERTED BREVE] case 0x0289: // [LATIN SMALL LETTER U BAR] case 0x1D64: // [LATIN SUBSCRIPT SMALL LETTER U] case 0x1D99: // [LATIN SMALL LETTER U WITH RETROFLEX HOOK] case 0x1E73: // [LATIN SMALL LETTER U WITH DIAERESIS BELOW] case 0x1E75: // [LATIN SMALL LETTER U WITH TILDE BELOW] case 0x1E77: // [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] case 0x1E79: // [LATIN SMALL LETTER U WITH TILDE AND ACUTE] case 0x1E7B: // [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] case 0x1EE5: // [LATIN SMALL LETTER U WITH DOT BELOW] case 0x1EE7: // [LATIN SMALL LETTER U WITH HOOK ABOVE] case 0x1EE9: // [LATIN SMALL LETTER U WITH HORN AND ACUTE] case 0x1EEB: // [LATIN SMALL LETTER U WITH HORN AND GRAVE] case 0x1EED: // [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] case 0x1EEF: // [LATIN SMALL LETTER U WITH HORN AND TILDE] case 0x1EF1: // [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] case 0x24E4: // [CIRCLED LATIN SMALL LETTER U] case 0xFF55: // [FULLWIDTH LATIN SMALL LETTER U] output[outputPos++] = L'u'; break; case 0x24B0: // [PARENTHESIZED LATIN SMALL LETTER U] output[outputPos++] = L'('; output[outputPos++] = L'u'; output[outputPos++] = L')'; break; case 0x1D6B: // [LATIN SMALL LETTER UE] output[outputPos++] = L'u'; output[outputPos++] = L'e'; break; case 0x01B2: // [LATIN CAPITAL LETTER V WITH HOOK] case 0x0245: // [LATIN CAPITAL LETTER TURNED V] case 0x1D20: // [LATIN LETTER SMALL CAPITAL V] case 0x1E7C: // [LATIN CAPITAL LETTER V WITH TILDE] case 0x1E7E: // [LATIN CAPITAL LETTER V WITH DOT BELOW] case 0x1EFC: // [LATIN CAPITAL LETTER MIDDLE-WELSH V] case 0x24CB: // [CIRCLED LATIN CAPITAL LETTER V] case 0xA75E: // [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] case 0xA768: // [LATIN CAPITAL LETTER VEND] case 0xFF36: // [FULLWIDTH LATIN CAPITAL LETTER V] output[outputPos++] = L'V'; break; case 0x028B: // [LATIN SMALL LETTER V WITH HOOK] case 0x028C: // [LATIN SMALL LETTER TURNED V] case 0x1D65: // [LATIN SUBSCRIPT SMALL LETTER V] case 0x1D8C: // [LATIN SMALL LETTER V WITH PALATAL HOOK] case 0x1E7D: // [LATIN SMALL LETTER V WITH TILDE] case 0x1E7F: // [LATIN SMALL LETTER V WITH DOT BELOW] case 0x24E5: // [CIRCLED LATIN SMALL LETTER V] case 0x2C71: // [LATIN SMALL LETTER V WITH RIGHT HOOK] case 0x2C74: // [LATIN SMALL LETTER V WITH CURL] case 0xA75F: // [LATIN SMALL LETTER V WITH DIAGONAL STROKE] case 0xFF56: // [FULLWIDTH LATIN SMALL LETTER V] output[outputPos++] = L'v'; break; case 0xA760: // [LATIN CAPITAL LETTER VY] output[outputPos++] = L'V'; output[outputPos++] = L'Y'; break; case 0x24B1: // [PARENTHESIZED LATIN SMALL LETTER V] output[outputPos++] = L'('; output[outputPos++] = L'v'; output[outputPos++] = L')'; break; case 0xA761: // [LATIN SMALL LETTER VY] output[outputPos++] = L'v'; output[outputPos++] = L'y'; break; case 0x0174: // [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] case 0x01F7: // [LATIN CAPITAL LETTER WYNN] case 0x1D21: // [LATIN LETTER SMALL CAPITAL W] case 0x1E80: // [LATIN CAPITAL LETTER W WITH GRAVE] case 0x1E82: // [LATIN CAPITAL LETTER W WITH ACUTE] case 0x1E84: // [LATIN CAPITAL LETTER W WITH DIAERESIS] case 0x1E86: // [LATIN CAPITAL LETTER W WITH DOT ABOVE] case 0x1E88: // [LATIN CAPITAL LETTER W WITH DOT BELOW] case 0x24CC: // [CIRCLED LATIN CAPITAL LETTER W] case 0x2C72: // [LATIN CAPITAL LETTER W WITH HOOK] case 0xFF37: // [FULLWIDTH LATIN CAPITAL LETTER W] output[outputPos++] = L'W'; break; case 0x0175: // [LATIN SMALL LETTER W WITH CIRCUMFLEX] case 0x01BF: // [LATIN LETTER WYNN] case 0x028D: // [LATIN SMALL LETTER TURNED W] case 0x1E81: // [LATIN SMALL LETTER W WITH GRAVE] case 0x1E83: // [LATIN SMALL LETTER W WITH ACUTE] case 0x1E85: // [LATIN SMALL LETTER W WITH DIAERESIS] case 0x1E87: // [LATIN SMALL LETTER W WITH DOT ABOVE] case 0x1E89: // [LATIN SMALL LETTER W WITH DOT BELOW] case 0x1E98: // [LATIN SMALL LETTER W WITH RING ABOVE] case 0x24E6: // [CIRCLED LATIN SMALL LETTER W] case 0x2C73: // [LATIN SMALL LETTER W WITH HOOK] case 0xFF57: // [FULLWIDTH LATIN SMALL LETTER W] output[outputPos++] = L'w'; break; case 0x24B2: // [PARENTHESIZED LATIN SMALL LETTER W] output[outputPos++] = L'('; output[outputPos++] = L'w'; output[outputPos++] = L')'; break; case 0x1E8A: // [LATIN CAPITAL LETTER X WITH DOT ABOVE] case 0x1E8C: // [LATIN CAPITAL LETTER X WITH DIAERESIS] case 0x24CD: // [CIRCLED LATIN CAPITAL LETTER X] case 0xFF38: // [FULLWIDTH LATIN CAPITAL LETTER X] output[outputPos++] = L'X'; break; case 0x1D8D: // [LATIN SMALL LETTER X WITH PALATAL HOOK] case 0x1E8B: // [LATIN SMALL LETTER X WITH DOT ABOVE] case 0x1E8D: // [LATIN SMALL LETTER X WITH DIAERESIS] case 0x2093: // [LATIN SUBSCRIPT SMALL LETTER X] case 0x24E7: // [CIRCLED LATIN SMALL LETTER X] case 0xFF58: // [FULLWIDTH LATIN SMALL LETTER X] output[outputPos++] = L'x'; break; case 0x24B3: // [PARENTHESIZED LATIN SMALL LETTER X] output[outputPos++] = L'('; output[outputPos++] = L'x'; output[outputPos++] = L')'; break; case 0x00DD: // [LATIN CAPITAL LETTER Y WITH ACUTE] case 0x0176: // [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] case 0x0178: // [LATIN CAPITAL LETTER Y WITH DIAERESIS] case 0x01B3: // [LATIN CAPITAL LETTER Y WITH HOOK] case 0x0232: // [LATIN CAPITAL LETTER Y WITH MACRON] case 0x024E: // [LATIN CAPITAL LETTER Y WITH STROKE] case 0x028F: // [LATIN LETTER SMALL CAPITAL Y] case 0x1E8E: // [LATIN CAPITAL LETTER Y WITH DOT ABOVE] case 0x1EF2: // [LATIN CAPITAL LETTER Y WITH GRAVE] case 0x1EF4: // [LATIN CAPITAL LETTER Y WITH DOT BELOW] case 0x1EF6: // [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] case 0x1EF8: // [LATIN CAPITAL LETTER Y WITH TILDE] case 0x1EFE: // [LATIN CAPITAL LETTER Y WITH LOOP] case 0x24CE: // [CIRCLED LATIN CAPITAL LETTER Y] case 0xFF39: // [FULLWIDTH LATIN CAPITAL LETTER Y] output[outputPos++] = L'Y'; break; case 0x00FD: // [LATIN SMALL LETTER Y WITH ACUTE] case 0x00FF: // [LATIN SMALL LETTER Y WITH DIAERESIS] case 0x0177: // [LATIN SMALL LETTER Y WITH CIRCUMFLEX] case 0x01B4: // [LATIN SMALL LETTER Y WITH HOOK] case 0x0233: // [LATIN SMALL LETTER Y WITH MACRON] case 0x024F: // [LATIN SMALL LETTER Y WITH STROKE] case 0x028E: // [LATIN SMALL LETTER TURNED Y] case 0x1E8F: // [LATIN SMALL LETTER Y WITH DOT ABOVE] case 0x1E99: // [LATIN SMALL LETTER Y WITH RING ABOVE] case 0x1EF3: // [LATIN SMALL LETTER Y WITH GRAVE] case 0x1EF5: // [LATIN SMALL LETTER Y WITH DOT BELOW] case 0x1EF7: // [LATIN SMALL LETTER Y WITH HOOK ABOVE] case 0x1EF9: // [LATIN SMALL LETTER Y WITH TILDE] case 0x1EFF: // [LATIN SMALL LETTER Y WITH LOOP] case 0x24E8: // [CIRCLED LATIN SMALL LETTER Y] case 0xFF59: // [FULLWIDTH LATIN SMALL LETTER Y] output[outputPos++] = L'y'; break; case 0x24B4: // [PARENTHESIZED LATIN SMALL LETTER Y] output[outputPos++] = L'('; output[outputPos++] = L'y'; output[outputPos++] = L')'; break; case 0x0179: // [LATIN CAPITAL LETTER Z WITH ACUTE] case 0x017B: // [LATIN CAPITAL LETTER Z WITH DOT ABOVE] case 0x017D: // [LATIN CAPITAL LETTER Z WITH CARON] case 0x01B5: // [LATIN CAPITAL LETTER Z WITH STROKE] case 0x021C: // [LATIN CAPITAL LETTER YOGH] case 0x0224: // [LATIN CAPITAL LETTER Z WITH HOOK] case 0x1D22: // [LATIN LETTER SMALL CAPITAL Z] case 0x1E90: // [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] case 0x1E92: // [LATIN CAPITAL LETTER Z WITH DOT BELOW] case 0x1E94: // [LATIN CAPITAL LETTER Z WITH LINE BELOW] case 0x24CF: // [CIRCLED LATIN CAPITAL LETTER Z] case 0x2C6B: // [LATIN CAPITAL LETTER Z WITH DESCENDER] case 0xA762: // [LATIN CAPITAL LETTER VISIGOTHIC Z] case 0xFF3A: // [FULLWIDTH LATIN CAPITAL LETTER Z] output[outputPos++] = L'Z'; break; case 0x017A: // [LATIN SMALL LETTER Z WITH ACUTE] case 0x017C: // [LATIN SMALL LETTER Z WITH DOT ABOVE] case 0x017E: // [LATIN SMALL LETTER Z WITH CARON] case 0x01B6: // [LATIN SMALL LETTER Z WITH STROKE] case 0x021D: // [LATIN SMALL LETTER YOGH] case 0x0225: // [LATIN SMALL LETTER Z WITH HOOK] case 0x0240: // [LATIN SMALL LETTER Z WITH SWASH TAIL] case 0x0290: // [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] case 0x0291: // [LATIN SMALL LETTER Z WITH CURL] case 0x1D76: // [LATIN SMALL LETTER Z WITH MIDDLE TILDE] case 0x1D8E: // [LATIN SMALL LETTER Z WITH PALATAL HOOK] case 0x1E91: // [LATIN SMALL LETTER Z WITH CIRCUMFLEX] case 0x1E93: // [LATIN SMALL LETTER Z WITH DOT BELOW] case 0x1E95: // [LATIN SMALL LETTER Z WITH LINE BELOW] case 0x24E9: // [CIRCLED LATIN SMALL LETTER Z] case 0x2C6C: // [LATIN SMALL LETTER Z WITH DESCENDER] case 0xA763: // [LATIN SMALL LETTER VISIGOTHIC Z] case 0xFF5A: // [FULLWIDTH LATIN SMALL LETTER Z] output[outputPos++] = L'z'; break; case 0x24B5: // [PARENTHESIZED LATIN SMALL LETTER Z] output[outputPos++] = L'('; output[outputPos++] = L'z'; output[outputPos++] = L')'; break; case 0x2070: // [SUPERSCRIPT ZERO] case 0x2080: // [SUBSCRIPT ZERO] case 0x24EA: // [CIRCLED DIGIT ZERO] case 0x24FF: // [NEGATIVE CIRCLED DIGIT ZERO] case 0xFF10: // [FULLWIDTH DIGIT ZERO] output[outputPos++] = L'0'; break; case 0x00B9: // [SUPERSCRIPT ONE] case 0x2081: // [SUBSCRIPT ONE] case 0x2460: // [CIRCLED DIGIT ONE] case 0x24F5: // [DOUBLE CIRCLED DIGIT ONE] case 0x2776: // [DINGBAT NEGATIVE CIRCLED DIGIT ONE] case 0x2780: // [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] case 0x278A: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] case 0xFF11: // [FULLWIDTH DIGIT ONE] output[outputPos++] = L'1'; break; case 0x2488: // [DIGIT ONE FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'.'; break; case 0x2474: // [PARENTHESIZED DIGIT ONE] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L')'; break; case 0x00B2: // [SUPERSCRIPT TWO] case 0x2082: // [SUBSCRIPT TWO] case 0x2461: // [CIRCLED DIGIT TWO] case 0x24F6: // [DOUBLE CIRCLED DIGIT TWO] case 0x2777: // [DINGBAT NEGATIVE CIRCLED DIGIT TWO] case 0x2781: // [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] case 0x278B: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] case 0xFF12: // [FULLWIDTH DIGIT TWO] output[outputPos++] = L'2'; break; case 0x2489: // [DIGIT TWO FULL STOP] output[outputPos++] = L'2'; output[outputPos++] = L'.'; break; case 0x2475: // [PARENTHESIZED DIGIT TWO] output[outputPos++] = L'('; output[outputPos++] = L'2'; output[outputPos++] = L')'; break; case 0x00B3: // [SUPERSCRIPT THREE] case 0x2083: // [SUBSCRIPT THREE] case 0x2462: // [CIRCLED DIGIT THREE] case 0x24F7: // [DOUBLE CIRCLED DIGIT THREE] case 0x2778: // [DINGBAT NEGATIVE CIRCLED DIGIT THREE] case 0x2782: // [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] case 0x278C: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] case 0xFF13: // [FULLWIDTH DIGIT THREE] output[outputPos++] = L'3'; break; case 0x248A: // [DIGIT THREE FULL STOP] output[outputPos++] = L'3'; output[outputPos++] = L'.'; break; case 0x2476: // [PARENTHESIZED DIGIT THREE] output[outputPos++] = L'('; output[outputPos++] = L'3'; output[outputPos++] = L')'; break; case 0x2074: // [SUPERSCRIPT FOUR] case 0x2084: // [SUBSCRIPT FOUR] case 0x2463: // [CIRCLED DIGIT FOUR] case 0x24F8: // [DOUBLE CIRCLED DIGIT FOUR] case 0x2779: // [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] case 0x2783: // [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] case 0x278D: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] case 0xFF14: // [FULLWIDTH DIGIT FOUR] output[outputPos++] = L'4'; break; case 0x248B: // [DIGIT FOUR FULL STOP] output[outputPos++] = L'4'; output[outputPos++] = L'.'; break; case 0x2477: // [PARENTHESIZED DIGIT FOUR] output[outputPos++] = L'('; output[outputPos++] = L'4'; output[outputPos++] = L')'; break; case 0x2075: // [SUPERSCRIPT FIVE] case 0x2085: // [SUBSCRIPT FIVE] case 0x2464: // [CIRCLED DIGIT FIVE] case 0x24F9: // [DOUBLE CIRCLED DIGIT FIVE] case 0x277A: // [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] case 0x2784: // [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] case 0x278E: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] case 0xFF15: // [FULLWIDTH DIGIT FIVE] output[outputPos++] = L'5'; break; case 0x248C: // [DIGIT FIVE FULL STOP] output[outputPos++] = L'5'; output[outputPos++] = L'.'; break; case 0x2478: // [PARENTHESIZED DIGIT FIVE] output[outputPos++] = L'('; output[outputPos++] = L'5'; output[outputPos++] = L')'; break; case 0x2076: // [SUPERSCRIPT SIX] case 0x2086: // [SUBSCRIPT SIX] case 0x2465: // [CIRCLED DIGIT SIX] case 0x24FA: // [DOUBLE CIRCLED DIGIT SIX] case 0x277B: // [DINGBAT NEGATIVE CIRCLED DIGIT SIX] case 0x2785: // [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] case 0x278F: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] case 0xFF16: // [FULLWIDTH DIGIT SIX] output[outputPos++] = L'6'; break; case 0x248D: // [DIGIT SIX FULL STOP] output[outputPos++] = L'6'; output[outputPos++] = L'.'; break; case 0x2479: // [PARENTHESIZED DIGIT SIX] output[outputPos++] = L'('; output[outputPos++] = L'6'; output[outputPos++] = L')'; break; case 0x2077: // [SUPERSCRIPT SEVEN] case 0x2087: // [SUBSCRIPT SEVEN] case 0x2466: // [CIRCLED DIGIT SEVEN] case 0x24FB: // [DOUBLE CIRCLED DIGIT SEVEN] case 0x277C: // [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] case 0x2786: // [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] case 0x2790: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] case 0xFF17: // [FULLWIDTH DIGIT SEVEN] output[outputPos++] = L'7'; break; case 0x248E: // [DIGIT SEVEN FULL STOP] output[outputPos++] = L'7'; output[outputPos++] = L'.'; break; case 0x247A: // [PARENTHESIZED DIGIT SEVEN] output[outputPos++] = L'('; output[outputPos++] = L'7'; output[outputPos++] = L')'; break; case 0x2078: // [SUPERSCRIPT EIGHT] case 0x2088: // [SUBSCRIPT EIGHT] case 0x2467: // [CIRCLED DIGIT EIGHT] case 0x24FC: // [DOUBLE CIRCLED DIGIT EIGHT] case 0x277D: // [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] case 0x2787: // [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] case 0x2791: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] case 0xFF18: // [FULLWIDTH DIGIT EIGHT] output[outputPos++] = L'8'; break; case 0x248F: // [DIGIT EIGHT FULL STOP] output[outputPos++] = L'8'; output[outputPos++] = L'.'; break; case 0x247B: // [PARENTHESIZED DIGIT EIGHT] output[outputPos++] = L'('; output[outputPos++] = L'8'; output[outputPos++] = L')'; break; case 0x2079: // [SUPERSCRIPT NINE] case 0x2089: // [SUBSCRIPT NINE] case 0x2468: // [CIRCLED DIGIT NINE] case 0x24FD: // [DOUBLE CIRCLED DIGIT NINE] case 0x277E: // [DINGBAT NEGATIVE CIRCLED DIGIT NINE] case 0x2788: // [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] case 0x2792: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] case 0xFF19: // [FULLWIDTH DIGIT NINE] output[outputPos++] = L'9'; break; case 0x2490: // [DIGIT NINE FULL STOP] output[outputPos++] = L'9'; output[outputPos++] = L'.'; break; case 0x247C: // [PARENTHESIZED DIGIT NINE] output[outputPos++] = L'('; output[outputPos++] = L'9'; output[outputPos++] = L')'; break; case 0x2469: // [CIRCLED NUMBER TEN] case 0x24FE: // [DOUBLE CIRCLED NUMBER TEN] case 0x277F: // [DINGBAT NEGATIVE CIRCLED NUMBER TEN] case 0x2789: // [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] case 0x2793: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] output[outputPos++] = L'1'; output[outputPos++] = L'0'; break; case 0x2491: // [NUMBER TEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'0'; output[outputPos++] = L'.'; break; case 0x247D: // [PARENTHESIZED NUMBER TEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'0'; output[outputPos++] = L')'; break; case 0x246A: // [CIRCLED NUMBER ELEVEN] case 0x24EB: // [NEGATIVE CIRCLED NUMBER ELEVEN] output[outputPos++] = L'1'; output[outputPos++] = L'1'; break; case 0x2492: // [NUMBER ELEVEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'1'; output[outputPos++] = L'.'; break; case 0x247E: // [PARENTHESIZED NUMBER ELEVEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'1'; output[outputPos++] = L')'; break; case 0x246B: // [CIRCLED NUMBER TWELVE] case 0x24EC: // [NEGATIVE CIRCLED NUMBER TWELVE] output[outputPos++] = L'1'; output[outputPos++] = L'2'; break; case 0x2493: // [NUMBER TWELVE FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'2'; output[outputPos++] = L'.'; break; case 0x247F: // [PARENTHESIZED NUMBER TWELVE] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'2'; output[outputPos++] = L')'; break; case 0x246C: // [CIRCLED NUMBER THIRTEEN] case 0x24ED: // [NEGATIVE CIRCLED NUMBER THIRTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'3'; break; case 0x2494: // [NUMBER THIRTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'3'; output[outputPos++] = L'.'; break; case 0x2480: // [PARENTHESIZED NUMBER THIRTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'3'; output[outputPos++] = L')'; break; case 0x246D: // [CIRCLED NUMBER FOURTEEN] case 0x24EE: // [NEGATIVE CIRCLED NUMBER FOURTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'4'; break; case 0x2495: // [NUMBER FOURTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'4'; output[outputPos++] = L'.'; break; case 0x2481: // [PARENTHESIZED NUMBER FOURTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'4'; output[outputPos++] = L')'; break; case 0x246E: // [CIRCLED NUMBER FIFTEEN] case 0x24EF: // [NEGATIVE CIRCLED NUMBER FIFTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'5'; break; case 0x2496: // [NUMBER FIFTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'5'; output[outputPos++] = L'.'; break; case 0x2482: // [PARENTHESIZED NUMBER FIFTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'5'; output[outputPos++] = L')'; break; case 0x246F: // [CIRCLED NUMBER SIXTEEN] case 0x24F0: // [NEGATIVE CIRCLED NUMBER SIXTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'6'; break; case 0x2497: // [NUMBER SIXTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'6'; output[outputPos++] = L'.'; break; case 0x2483: // [PARENTHESIZED NUMBER SIXTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'6'; output[outputPos++] = L')'; break; case 0x2470: // [CIRCLED NUMBER SEVENTEEN] case 0x24F1: // [NEGATIVE CIRCLED NUMBER SEVENTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'7'; break; case 0x2498: // [NUMBER SEVENTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'7'; output[outputPos++] = L'.'; break; case 0x2484: // [PARENTHESIZED NUMBER SEVENTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'7'; output[outputPos++] = L')'; break; case 0x2471: // [CIRCLED NUMBER EIGHTEEN] case 0x24F2: // [NEGATIVE CIRCLED NUMBER EIGHTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'8'; break; case 0x2499: // [NUMBER EIGHTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'8'; output[outputPos++] = L'.'; break; case 0x2485: // [PARENTHESIZED NUMBER EIGHTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'8'; output[outputPos++] = L')'; break; case 0x2472: // [CIRCLED NUMBER NINETEEN] case 0x24F3: // [NEGATIVE CIRCLED NUMBER NINETEEN] output[outputPos++] = L'1'; output[outputPos++] = L'9'; break; case 0x249A: // [NUMBER NINETEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'9'; output[outputPos++] = L'.'; break; case 0x2486: // [PARENTHESIZED NUMBER NINETEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'9'; output[outputPos++] = L')'; break; case 0x2473: // [CIRCLED NUMBER TWENTY] case 0x24F4: // [NEGATIVE CIRCLED NUMBER TWENTY] output[outputPos++] = L'2'; output[outputPos++] = L'0'; break; case 0x249B: // [NUMBER TWENTY FULL STOP] output[outputPos++] = L'2'; output[outputPos++] = L'0'; output[outputPos++] = L'.'; break; case 0x2487: // [PARENTHESIZED NUMBER TWENTY] output[outputPos++] = L'('; output[outputPos++] = L'2'; output[outputPos++] = L'0'; output[outputPos++] = L')'; break; case 0x00AB: // [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] case 0x00BB: // [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] case 0x201C: // [LEFT DOUBLE QUOTATION MARK] case 0x201D: // [RIGHT DOUBLE QUOTATION MARK] case 0x201E: // [DOUBLE LOW-9 QUOTATION MARK] case 0x2033: // [DOUBLE PRIME] case 0x2036: // [REVERSED DOUBLE PRIME] case 0x275D: // [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] case 0x275E: // [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] case 0x276E: // [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] case 0x276F: // [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] case 0xFF02: // [FULLWIDTH QUOTATION MARK] output[outputPos++] = L'"'; break; case 0x2018: // [LEFT SINGLE QUOTATION MARK] case 0x2019: // [RIGHT SINGLE QUOTATION MARK] case 0x201A: // [SINGLE LOW-9 QUOTATION MARK] case 0x201B: // [SINGLE HIGH-REVERSED-9 QUOTATION MARK] case 0x2032: // [PRIME] case 0x2035: // [REVERSED PRIME] case 0x2039: // [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] case 0x203A: // [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] case 0x275B: // [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] case 0x275C: // [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] case 0xFF07: // [FULLWIDTH APOSTROPHE] output[outputPos++] = L'\''; break; case 0x2010: // [HYPHEN] case 0x2011: // [NON-BREAKING HYPHEN] case 0x2012: // [FIGURE DASH] case 0x2013: // [EN DASH] case 0x2014: // [EM DASH] case 0x207B: // [SUPERSCRIPT MINUS] case 0x208B: // [SUBSCRIPT MINUS] case 0xFF0D: // [FULLWIDTH HYPHEN-MINUS] output[outputPos++] = L'-'; break; case 0x2045: // [LEFT SQUARE BRACKET WITH QUILL] case 0x2772: // [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] case 0xFF3B: // [FULLWIDTH LEFT SQUARE BRACKET] output[outputPos++] = L'['; break; case 0x2046: // [RIGHT SQUARE BRACKET WITH QUILL] case 0x2773: // [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] case 0xFF3D: // [FULLWIDTH RIGHT SQUARE BRACKET] output[outputPos++] = L']'; break; case 0x207D: // [SUPERSCRIPT LEFT PARENTHESIS] case 0x208D: // [SUBSCRIPT LEFT PARENTHESIS] case 0x2768: // [MEDIUM LEFT PARENTHESIS ORNAMENT] case 0x276A: // [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] case 0xFF08: // [FULLWIDTH LEFT PARENTHESIS] output[outputPos++] = L'('; break; case 0x2E28: // [LEFT DOUBLE PARENTHESIS] output[outputPos++] = L'('; output[outputPos++] = L'('; break; case 0x207E: // [SUPERSCRIPT RIGHT PARENTHESIS] case 0x208E: // [SUBSCRIPT RIGHT PARENTHESIS] case 0x2769: // [MEDIUM RIGHT PARENTHESIS ORNAMENT] case 0x276B: // [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] case 0xFF09: // [FULLWIDTH RIGHT PARENTHESIS] output[outputPos++] = L')'; break; case 0x2E29: // [RIGHT DOUBLE PARENTHESIS] output[outputPos++] = L')'; output[outputPos++] = L')'; break; case 0x276C: // [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] case 0x2770: // [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] case 0xFF1C: // [FULLWIDTH LESS-THAN SIGN] output[outputPos++] = L'<'; break; case 0x276D: // [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] case 0x2771: // [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] case 0xFF1E: // [FULLWIDTH GREATER-THAN SIGN] output[outputPos++] = L'>'; break; case 0x2774: // [MEDIUM LEFT CURLY BRACKET ORNAMENT] case 0xFF5B: // [FULLWIDTH LEFT CURLY BRACKET] output[outputPos++] = L'{'; break; case 0x2775: // [MEDIUM RIGHT CURLY BRACKET ORNAMENT] case 0xFF5D: // [FULLWIDTH RIGHT CURLY BRACKET] output[outputPos++] = L'}'; break; case 0x207A: // [SUPERSCRIPT PLUS SIGN] case 0x208A: // [SUBSCRIPT PLUS SIGN] case 0xFF0B: // [FULLWIDTH PLUS SIGN] output[outputPos++] = L'+'; break; case 0x207C: // [SUPERSCRIPT EQUALS SIGN] case 0x208C: // [SUBSCRIPT EQUALS SIGN] case 0xFF1D: // [FULLWIDTH EQUALS SIGN] output[outputPos++] = L'='; break; case 0xFF01: // [FULLWIDTH EXCLAMATION MARK] output[outputPos++] = L'!'; break; case 0x203C: // [DOUBLE EXCLAMATION MARK] output[outputPos++] = L'!'; output[outputPos++] = L'!'; break; case 0x2049: // [EXCLAMATION QUESTION MARK] output[outputPos++] = L'!'; output[outputPos++] = L'?'; break; case 0xFF03: // [FULLWIDTH NUMBER SIGN] output[outputPos++] = L'#'; break; case 0xFF04: // [FULLWIDTH DOLLAR SIGN] output[outputPos++] = L'$'; break; case 0x2052: // [COMMERCIAL MINUS SIGN] case 0xFF05: // [FULLWIDTH PERCENT SIGN] output[outputPos++] = L'%'; break; case 0xFF06: // [FULLWIDTH AMPERSAND] output[outputPos++] = L'&'; break; case 0x204E: // [LOW ASTERISK] case 0xFF0A: // [FULLWIDTH ASTERISK] output[outputPos++] = L'*'; break; case 0xFF0C: // [FULLWIDTH COMMA] output[outputPos++] = L','; break; case 0xFF0E: // [FULLWIDTH FULL STOP] output[outputPos++] = L'.'; break; case 0x2044: // [FRACTION SLASH] case 0xFF0F: // [FULLWIDTH SOLIDUS] output[outputPos++] = L'/'; break; case 0xFF1A: // [FULLWIDTH COLON] output[outputPos++] = L':'; break; case 0x204F: // [REVERSED SEMICOLON] case 0xFF1B: // [FULLWIDTH SEMICOLON] output[outputPos++] = L';'; break; case 0xFF1F: // [FULLWIDTH QUESTION MARK] output[outputPos++] = L'?'; break; case 0x2047: // [DOUBLE QUESTION MARK] output[outputPos++] = L'?'; output[outputPos++] = L'?'; break; case 0x2048: // [QUESTION EXCLAMATION MARK] output[outputPos++] = L'?'; output[outputPos++] = L'!'; break; case 0xFF20: // [FULLWIDTH COMMERCIAL AT] output[outputPos++] = L'@'; break; case 0xFF3C: // [FULLWIDTH REVERSE SOLIDUS] output[outputPos++] = L'\\'; break; case 0x2038: // [CARET] case 0xFF3E: // [FULLWIDTH CIRCUMFLEX ACCENT] output[outputPos++] = L'^'; break; case 0xFF3F: // [FULLWIDTH LOW LINE] output[outputPos++] = L'_'; break; case 0x2053: // [SWUNG DASH] case 0xFF5E: // [FULLWIDTH TILDE] output[outputPos++] = L'~'; break; default: output[outputPos++] = c; break; } } } } } LucenePlusPlus-rel_3.0.4/src/core/analysis/Analyzer.cpp000066400000000000000000000021701217574114600231460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Analyzer.h" #include "Fieldable.h" namespace Lucene { Analyzer::~Analyzer() { } TokenStreamPtr Analyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { return tokenStream(fieldName, reader); } LuceneObjectPtr Analyzer::getPreviousTokenStream() { return tokenStreams.get(); } void Analyzer::setPreviousTokenStream(LuceneObjectPtr stream) { tokenStreams.set(stream); } int32_t Analyzer::getPositionIncrementGap(const String& fieldName) { return 0; } int32_t Analyzer::getOffsetGap(FieldablePtr field) { return field->isTokenized() ? 1 : 0; } void Analyzer::close() { tokenStreams.close(); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/BaseCharFilter.cpp000066400000000000000000000037711217574114600242070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BaseCharFilter.h" #include "MiscUtils.h" namespace Lucene { BaseCharFilter::BaseCharFilter(CharStreamPtr in) : CharFilter(in) { size = 0; } BaseCharFilter::~BaseCharFilter() { } int32_t BaseCharFilter::correct(int32_t currentOff) { if (!offsets || currentOff < offsets[0]) return currentOff; int32_t hi = size - 1; if (currentOff >= offsets[hi]) return currentOff + diffs[hi]; int32_t lo = 0; int32_t mid = -1; while (hi >= lo) { mid = MiscUtils::unsignedShift(lo + hi, 1); if (currentOff < offsets[mid]) hi = mid - 1; else if (currentOff > offsets[mid]) lo = mid + 1; else return currentOff + diffs[mid]; } if (currentOff < offsets[mid]) return mid == 0 ? currentOff : currentOff + diffs[mid - 1]; else return currentOff + diffs[mid]; } int32_t BaseCharFilter::getLastCumulativeDiff() { return !offsets ? 0 : diffs[size - 1]; } void BaseCharFilter::addOffCorrectMap(int32_t off, int32_t cumulativeDiff) { if (!offsets) { offsets = IntArray::newInstance(64); diffs = IntArray::newInstance(64); } else if (size == offsets.size()) { offsets.resize(MiscUtils::getNextSize(offsets.size())); diffs.resize(MiscUtils::getNextSize(diffs.size())); } offsets[size] = off; diffs[size++] = cumulativeDiff; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/CachingTokenFilter.cpp000066400000000000000000000031021217574114600250600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CachingTokenFilter.h" namespace Lucene { CachingTokenFilter::CachingTokenFilter(TokenStreamPtr input) : TokenFilter(input) { } CachingTokenFilter::~CachingTokenFilter() { } bool CachingTokenFilter::incrementToken() { if (!cache) { // fill cache lazily cache = Collection::newInstance(); fillCache(); iterator = cache.begin(); } if (iterator == cache.end()) { // the cache is exhausted, return false return false; } // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. restoreState(*iterator++); return true; } void CachingTokenFilter::end() { if (finalState) restoreState(finalState); } void CachingTokenFilter::reset() { if (cache) iterator = cache.begin(); } void CachingTokenFilter::fillCache() { while (input->incrementToken()) cache.add(captureState()); // capture final state input->end(); finalState = captureState(); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/CharArraySet.cpp000066400000000000000000000043511217574114600237140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharArraySet.h" #include "StringUtils.h" namespace Lucene { CharArraySet::CharArraySet(bool ignoreCase) { this->ignoreCase = ignoreCase; this->entries = HashSet::newInstance(); } CharArraySet::CharArraySet(HashSet entries, bool ignoreCase) { this->ignoreCase = ignoreCase; this->entries = HashSet::newInstance(); if (entries) { for (HashSet::iterator entry = entries.begin(); entry != entries.end(); ++entry) add(*entry); } } CharArraySet::CharArraySet(Collection entries, bool ignoreCase) { this->ignoreCase = ignoreCase; this->entries = HashSet::newInstance(); if (entries) { for (Collection::iterator entry = entries.begin(); entry != entries.end(); ++entry) add(*entry); } } CharArraySet::~CharArraySet() { } bool CharArraySet::contains(const String& text) { return entries.contains(ignoreCase ? StringUtils::toLower(text) : text); } bool CharArraySet::contains(const wchar_t* text, int32_t offset, int32_t length) { return contains(String(text + offset, length)); } bool CharArraySet::add(const String& text) { return entries.add(ignoreCase ? StringUtils::toLower(text) : text); } bool CharArraySet::add(CharArray text) { return add(String(text.get(), text.size())); } int32_t CharArraySet::size() { return entries.size(); } bool CharArraySet::isEmpty() { return entries.empty(); } HashSet::iterator CharArraySet::begin() { return entries.begin(); } HashSet::iterator CharArraySet::end() { return entries.end(); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/CharFilter.cpp000066400000000000000000000023351217574114600234070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharFilter.h" namespace Lucene { CharFilter::CharFilter(CharStreamPtr in) { input = in; } CharFilter::~CharFilter() { } int32_t CharFilter::correct(int32_t currentOff) { return currentOff; } int32_t CharFilter::correctOffset(int32_t currentOff) { return input->correctOffset(correct(currentOff)); } void CharFilter::close() { input->close(); } int32_t CharFilter::read(wchar_t* buffer, int32_t offset, int32_t length) { return input->read(buffer, offset, length); } bool CharFilter::markSupported() { return input->markSupported(); } void CharFilter::mark(int32_t readAheadLimit) { input->mark(readAheadLimit); } void CharFilter::reset() { input->reset(); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/CharReader.cpp000066400000000000000000000025161217574114600233650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharReader.h" namespace Lucene { CharReader::CharReader(ReaderPtr in) { input = in; } CharReader::~CharReader() { } CharStreamPtr CharReader::get(ReaderPtr input) { CharStreamPtr charStream(boost::dynamic_pointer_cast(input)); return charStream ? charStream : newLucene(input); } int32_t CharReader::correctOffset(int32_t currentOff) { return currentOff; } void CharReader::close() { if (input) input->close(); } int32_t CharReader::read(wchar_t* buffer, int32_t offset, int32_t length) { return input->read(buffer, offset, length); } bool CharReader::markSupported() { return input->markSupported(); } void CharReader::mark(int32_t readAheadLimit) { input->mark(readAheadLimit); } void CharReader::reset() { input->reset(); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/CharStream.cpp000066400000000000000000000007021217574114600234110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharStream.h" namespace Lucene { CharStream::~CharStream() { } } LucenePlusPlus-rel_3.0.4/src/core/analysis/CharTokenizer.cpp000066400000000000000000000071331217574114600241350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharTokenizer.h" #include "OffsetAttribute.h" #include "TermAttribute.h" #include "Reader.h" namespace Lucene { const int32_t CharTokenizer::MAX_WORD_LEN = 255; const int32_t CharTokenizer::IO_BUFFER_SIZE = 4096; CharTokenizer::CharTokenizer(ReaderPtr input) : Tokenizer(input) { offset = 0; bufferIndex = 0; dataLen = 0; ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); offsetAtt = addAttribute(); termAtt = addAttribute(); } CharTokenizer::CharTokenizer(AttributeSourcePtr source, ReaderPtr input) : Tokenizer(source, input) { offset = 0; bufferIndex = 0; dataLen = 0; ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); offsetAtt = addAttribute(); termAtt = addAttribute(); } CharTokenizer::CharTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : Tokenizer(factory, input) { offset = 0; bufferIndex = 0; dataLen = 0; ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); offsetAtt = addAttribute(); termAtt = addAttribute(); } CharTokenizer::~CharTokenizer() { } wchar_t CharTokenizer::normalize(wchar_t c) { return c; } bool CharTokenizer::incrementToken() { clearAttributes(); int32_t length = 0; int32_t start = bufferIndex; CharArray buffer(termAtt->termBuffer()); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); if (dataLen == -1) { dataLen = 0; // so next offset += dataLen won't decrement offset if (length > 0) break; else return false; } bufferIndex = 0; } wchar_t c = ioBuffer[bufferIndex++]; if (isTokenChar(c)) // if it's a token char { if (length == 0) start = offset + bufferIndex - 1; else if (length == buffer.size()) buffer = termAtt->resizeTermBuffer(1 + length); buffer[length++] = normalize(c); // buffer it, normalized if (length == MAX_WORD_LEN) // buffer overflow! break; } else if (length > 0) // at non-Letter with chars break; // return them } termAtt->setTermLength(length); offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); return true; } void CharTokenizer::end() { // set final offset int32_t finalOffset = correctOffset(offset); offsetAtt->setOffset(finalOffset, finalOffset); } void CharTokenizer::reset(ReaderPtr input) { Tokenizer::reset(input); bufferIndex = 0; offset = 0; dataLen = 0; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/ISOLatin1AccentFilter.cpp000066400000000000000000000200571217574114600253540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ISOLatin1AccentFilter.h" #include "TermAttribute.h" namespace Lucene { ISOLatin1AccentFilter::ISOLatin1AccentFilter(TokenStreamPtr input) : TokenFilter(input) { output = CharArray::newInstance(256); outputPos = 0; termAtt = addAttribute(); } ISOLatin1AccentFilter::~ISOLatin1AccentFilter() { } bool ISOLatin1AccentFilter::incrementToken() { if (input->incrementToken()) { wchar_t* buffer = termAtt->termBufferArray(); int32_t length = termAtt->termLength(); // If no characters actually require rewriting then we just return token as-is for (int32_t i = 0; i < length; ++i) { wchar_t c = buffer[i]; if (c >= 0x00c0 && c <= 0xfb06) { removeAccents(buffer, length); termAtt->setTermBuffer(output.get(), 0, outputPos); break; } } return true; } else return false; } void ISOLatin1AccentFilter::removeAccents(const wchar_t* input, int32_t length) { // Worst-case length required int32_t maxSizeNeeded = 2 * length; int32_t size = output.size(); while (size < maxSizeNeeded) size *= 2; if (size != output.size()) output.resize(size); outputPos = 0; int32_t pos = 0; wchar_t* output = this->output.get(); for (int32_t i = 0; i < length; ++i, ++pos) { wchar_t c = input[pos]; // Quick test: if it's not in range then just keep current character if (c < 0x00C0 || c > 0xFB06) output[outputPos++] = c; else { switch (c) { case 0x00C0: case 0x00C1: case 0x00C2: case 0x00C3: case 0x00C4: case 0x00C5: output[outputPos++] = L'A'; break; case 0x00C6: output[outputPos++] = L'A'; output[outputPos++] = L'E'; break; case 0x00C7: output[outputPos++] = L'C'; break; case 0x00C8: case 0x00C9: case 0x00CA: case 0x00CB: output[outputPos++] = L'E'; break; case 0x00CC: case 0x00CD: case 0x00CE: case 0x00CF: output[outputPos++] = L'I'; break; case 0x0132: output[outputPos++] = L'I'; output[outputPos++] = L'J'; break; case 0x00D0: output[outputPos++] = L'D'; break; case 0x00D1: output[outputPos++] = L'N'; break; case 0x00D2: case 0x00D3: case 0x00D4: case 0x00D5: case 0x00D6: case 0x00D8: output[outputPos++] = L'O'; break; case 0x0152: output[outputPos++] = L'O'; output[outputPos++] = L'E'; break; case 0x00DE: output[outputPos++] = L'T'; output[outputPos++] = L'H'; break; case 0x00D9: case 0x00DA: case 0x00DB: case 0x00DC: output[outputPos++] = L'U'; break; case 0x00DD: case 0x0178: output[outputPos++] = L'Y'; break; case 0x00E0: case 0x00E1: case 0x00E2: case 0x00E3: case 0x00E4: case 0x00E5: output[outputPos++] = L'a'; break; case 0x00E6: output[outputPos++] = L'a'; output[outputPos++] = L'e'; break; case 0x00E7: output[outputPos++] = L'c'; break; case 0x00E8: case 0x00E9: case 0x00EA: case 0x00EB: output[outputPos++] = L'e'; break; case 0x00EC: case 0x00ED: case 0x00EE: case 0x00EF: output[outputPos++] = L'i'; break; case 0x0133: output[outputPos++] = L'i'; output[outputPos++] = L'j'; break; case 0x00F0: output[outputPos++] = L'd'; break; case 0x00F1: output[outputPos++] = L'n'; break; case 0x00F2: case 0x00F3: case 0x00F4: case 0x00F5: case 0x00F6: case 0x00F8: output[outputPos++] = L'o'; break; case 0x0153: output[outputPos++] = L'o'; output[outputPos++] = L'e'; break; case 0x00DF: output[outputPos++] = L's'; output[outputPos++] = L's'; break; case 0x00FE: output[outputPos++] = L't'; output[outputPos++] = L'h'; break; case 0x00F9: case 0x00FA: case 0x00FB: case 0x00FC: output[outputPos++] = L'u'; break; case 0x00FD: case 0x00FF: output[outputPos++] = L'y'; break; case 0xFB00: output[outputPos++] = L'f'; output[outputPos++] = L'f'; break; case 0xFB01: output[outputPos++] = L'f'; output[outputPos++] = L'i'; break; case 0xFB02: output[outputPos++] = L'f'; output[outputPos++] = L'l'; break; case 0xFB05: output[outputPos++] = L'f'; output[outputPos++] = L't'; break; case 0xFB06: output[outputPos++] = L's'; output[outputPos++] = L't'; break; default : output[outputPos++] = c; break; } } } } } LucenePlusPlus-rel_3.0.4/src/core/analysis/KeywordAnalyzer.cpp000066400000000000000000000021121217574114600245070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "KeywordAnalyzer.h" #include "KeywordTokenizer.h" namespace Lucene { KeywordAnalyzer::~KeywordAnalyzer() { } TokenStreamPtr KeywordAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(reader); } TokenStreamPtr KeywordAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!tokenizer) { tokenizer = newLucene(reader); setPreviousTokenStream(tokenizer); } else tokenizer->reset(reader); return tokenizer; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/KeywordTokenizer.cpp000066400000000000000000000047721217574114600247120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "KeywordTokenizer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "Reader.h" namespace Lucene { const int32_t KeywordTokenizer::DEFAULT_BUFFER_SIZE = 256; KeywordTokenizer::KeywordTokenizer(ReaderPtr input) : Tokenizer(input) { init(DEFAULT_BUFFER_SIZE); } KeywordTokenizer::KeywordTokenizer(ReaderPtr input, int32_t bufferSize) : Tokenizer(input) { init(bufferSize); } KeywordTokenizer::KeywordTokenizer(AttributeSourcePtr source, ReaderPtr input, int32_t bufferSize) : Tokenizer(source, input) { init(bufferSize); } KeywordTokenizer::KeywordTokenizer(AttributeFactoryPtr factory, ReaderPtr input, int32_t bufferSize) : Tokenizer(factory, input) { init(bufferSize); } KeywordTokenizer::~KeywordTokenizer() { } void KeywordTokenizer::init(int32_t bufferSize) { this->done = false; this->finalOffset = 0; this->termAtt = addAttribute(); this->offsetAtt = addAttribute(); this->termAtt->resizeTermBuffer(bufferSize); } bool KeywordTokenizer::incrementToken() { if (!done) { clearAttributes(); done = true; int32_t upto = 0; CharArray buffer(termAtt->termBuffer()); while (true) { int32_t length = input->read(buffer.get(), upto, buffer.size() - upto); if (length == -1) break; upto += length; if (upto == buffer.size()) buffer = termAtt->resizeTermBuffer(buffer.size() + 1); } termAtt->setTermLength(upto); finalOffset = correctOffset(upto); offsetAtt->setOffset(correctOffset(0), finalOffset); return true; } return false; } void KeywordTokenizer::end() { // set final offset offsetAtt->setOffset(finalOffset, finalOffset); } void KeywordTokenizer::reset() { Tokenizer::reset(input); done = false; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/LengthFilter.cpp000066400000000000000000000021471217574114600237540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LengthFilter.h" #include "TermAttribute.h" namespace Lucene { LengthFilter::LengthFilter(TokenStreamPtr input, int32_t min, int32_t max) : TokenFilter(input) { this->min = min; this->max = max; this->termAtt = addAttribute(); } LengthFilter::~LengthFilter() { } bool LengthFilter::incrementToken() { // return the first non-stop word found while (input->incrementToken()) { int32_t len = termAtt->termLength(); if (len >= min && len <= max) return true; // note: else we ignore it but should we index each part of it? } // reached EOS -- return false return false; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/LetterTokenizer.cpp000066400000000000000000000017171217574114600245210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LetterTokenizer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { LetterTokenizer::LetterTokenizer(ReaderPtr input) : CharTokenizer(input) { } LetterTokenizer::LetterTokenizer(AttributeSourcePtr source, ReaderPtr input) : CharTokenizer(source, input) { } LetterTokenizer::LetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : CharTokenizer(factory, input) { } LetterTokenizer::~LetterTokenizer() { } bool LetterTokenizer::isTokenChar(wchar_t c) { return UnicodeUtil::isAlpha(c); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/LowerCaseFilter.cpp000066400000000000000000000016751217574114600244240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LowerCaseFilter.h" #include "TermAttribute.h" #include "CharFolder.h" namespace Lucene { LowerCaseFilter::LowerCaseFilter(TokenStreamPtr input) : TokenFilter(input) { termAtt = addAttribute(); } LowerCaseFilter::~LowerCaseFilter() { } bool LowerCaseFilter::incrementToken() { if (input->incrementToken()) { wchar_t* buffer = termAtt->termBufferArray(); CharFolder::toLower(buffer, buffer + termAtt->termLength()); return true; } return false; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/LowerCaseTokenizer.cpp000066400000000000000000000017321217574114600251430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LowerCaseTokenizer.h" #include "CharFolder.h" namespace Lucene { LowerCaseTokenizer::LowerCaseTokenizer(ReaderPtr input) : LetterTokenizer(input) { } LowerCaseTokenizer::LowerCaseTokenizer(AttributeSourcePtr source, ReaderPtr input) : LetterTokenizer(source, input) { } LowerCaseTokenizer::LowerCaseTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : LetterTokenizer(factory, input) { } LowerCaseTokenizer::~LowerCaseTokenizer() { } wchar_t LowerCaseTokenizer::normalize(wchar_t c) { return CharFolder::toLower(c); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/MappingCharFilter.cpp000066400000000000000000000075451217574114600247330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MappingCharFilter.h" #include "NormalizeCharMap.h" #include "CharReader.h" namespace Lucene { MappingCharFilter::MappingCharFilter(NormalizeCharMapPtr normMap, CharStreamPtr in) : BaseCharFilter(in) { this->normMap = normMap; this->charPointer = 0; this->nextCharCounter = 0; } MappingCharFilter::MappingCharFilter(NormalizeCharMapPtr normMap, ReaderPtr in) : BaseCharFilter(CharReader::get(in)) { this->normMap = normMap; this->charPointer = 0; this->nextCharCounter = 0; } MappingCharFilter::~MappingCharFilter() { } int32_t MappingCharFilter::read() { while (true) { if (charPointer < (int32_t)replacement.length()) return (int32_t)replacement[charPointer++]; int32_t firstChar = nextChar(); if (firstChar == -1) return -1; NormalizeCharMapPtr nm(normMap->submap ? normMap->submap.get((wchar_t)firstChar) : NormalizeCharMapPtr()); if (!nm) return firstChar; NormalizeCharMapPtr result(match(nm)); if (!result) return firstChar; replacement = result->normStr; charPointer = 0; if (result->diff != 0) { int32_t prevCumulativeDiff = getLastCumulativeDiff(); if (result->diff < 0) { for (int32_t i = 0; i < -result->diff; ++i) addOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i); } else addOffCorrectMap(nextCharCounter - result->diff - prevCumulativeDiff, prevCumulativeDiff + result->diff); } } } int32_t MappingCharFilter::nextChar() { ++nextCharCounter; if (buffer && !buffer.empty()) return buffer.removeFirst(); return input->read(); } void MappingCharFilter::pushChar(int32_t c) { --nextCharCounter; if (!buffer) buffer = Collection::newInstance(); buffer.add(0, (wchar_t)c); } void MappingCharFilter::pushLastChar(int32_t c) { if (!buffer) buffer = Collection::newInstance(); buffer.add((wchar_t)c); } NormalizeCharMapPtr MappingCharFilter::match(NormalizeCharMapPtr map) { NormalizeCharMapPtr result; if (map->submap) { int32_t chr = nextChar(); if (chr != -1) { NormalizeCharMapPtr subMap(map->submap.get((wchar_t)chr)); if (subMap) result = match(subMap); if (!result) pushChar(chr); } } if (!result) result = map; return result; } int32_t MappingCharFilter::read(wchar_t* buffer, int32_t offset, int32_t length) { CharArray tmp(CharArray::newInstance(length)); int32_t l = input->read(tmp.get(), 0, length); if (l != -1) { for (int32_t i = 0; i < l; ++i) pushLastChar(tmp[i]); } l = 0; for (int32_t i = offset; i < offset + length; ++i) { int32_t c = read(); if (c == -1) break; buffer[i] = (wchar_t)c; ++l; } return l == 0 ? -1 : l; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/NormalizeCharMap.cpp000066400000000000000000000026411217574114600245600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NormalizeCharMap.h" namespace Lucene { NormalizeCharMap::NormalizeCharMap() { diff = 0; } NormalizeCharMap::~NormalizeCharMap() { } void NormalizeCharMap::add(const String& singleMatch, const String& replacement) { NormalizeCharMapPtr currMap(shared_from_this()); for (String::const_iterator c = singleMatch.begin(); c != singleMatch.end(); ++c) { if (!currMap->submap) currMap->submap = MapCharNormalizeCharMap::newInstance(); NormalizeCharMapPtr map(currMap->submap.get(*c)); if (!map) { map = newLucene(); currMap->submap.put(*c, map); } currMap = map; } if (!currMap->normStr.empty()) boost::throw_exception(RuntimeException(L"MappingCharFilter: there is already a mapping for " + singleMatch)); currMap->normStr = replacement; currMap->diff = (int32_t)(singleMatch.length() - replacement.length()); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/NumericTokenStream.cpp000066400000000000000000000117221217574114600251430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericTokenStream.h" #include "NumericUtils.h" #include "AttributeSource.h" #include "TermAttribute.h" #include "TypeAttribute.h" #include "PositionIncrementAttribute.h" namespace Lucene { NumericTokenStream::NumericTokenStream() { this->shift = 0; this->valSize = 0; this->termAtt = addAttribute(); this->typeAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->precisionStep = NumericUtils::PRECISION_STEP_DEFAULT; } NumericTokenStream::NumericTokenStream(int32_t precisionStep) { this->shift = 0; this->valSize = 0; this->termAtt = addAttribute(); this->typeAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->precisionStep = precisionStep; if (precisionStep < 1) boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } NumericTokenStream::NumericTokenStream(AttributeSourcePtr source, int32_t precisionStep) : TokenStream(source) { this->shift = 0; this->valSize = 0; this->termAtt = addAttribute(); this->typeAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->precisionStep = precisionStep; if (precisionStep < 1) boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } NumericTokenStream::NumericTokenStream(AttributeFactoryPtr factory, int32_t precisionStep) : TokenStream(factory) { this->shift = 0; this->valSize = 0; this->termAtt = addAttribute(); this->typeAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->precisionStep = precisionStep; if (precisionStep < 1) boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } NumericTokenStream::~NumericTokenStream() { } const String& NumericTokenStream::TOKEN_TYPE_FULL_PREC() { static String _TOKEN_TYPE_FULL_PREC(L"fullPrecNumeric"); return _TOKEN_TYPE_FULL_PREC; } const String& NumericTokenStream::TOKEN_TYPE_LOWER_PREC() { static String _TOKEN_TYPE_LOWER_PREC(L"lowerPrecNumeric"); return _TOKEN_TYPE_LOWER_PREC; } NumericTokenStreamPtr NumericTokenStream::setLongValue(int64_t value) { this->value = value; valSize = 64; shift = 0; return shared_from_this(); } NumericTokenStreamPtr NumericTokenStream::setIntValue(int32_t value) { this->value = (int64_t)value; valSize = 32; shift = 0; return shared_from_this(); } NumericTokenStreamPtr NumericTokenStream::setDoubleValue(double value) { this->value = (int64_t)value; valSize = 64; shift = 0; return shared_from_this(); } void NumericTokenStream::reset() { if (valSize == 0) boost::throw_exception(IllegalStateException(L"call setValue() before usage")); shift = 0; } bool NumericTokenStream::incrementToken() { if (valSize == 0) boost::throw_exception(IllegalStateException(L"call setValue() before usage")); if (shift >= valSize) return false; clearAttributes(); CharArray buffer; switch (valSize) { case 64: buffer = termAtt->resizeTermBuffer(NumericUtils::BUF_SIZE_LONG); termAtt->setTermLength(NumericUtils::longToPrefixCoded(value, shift, buffer)); break; case 32: buffer = termAtt->resizeTermBuffer(NumericUtils::BUF_SIZE_INT); termAtt->setTermLength(NumericUtils::intToPrefixCoded((int32_t)value, shift, buffer)); break; default: // should not happen boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); } typeAtt->setType(shift == 0 ? TOKEN_TYPE_FULL_PREC() : TOKEN_TYPE_LOWER_PREC()); posIncrAtt->setPositionIncrement(shift == 0 ? 1 : 0); shift += precisionStep; return true; } String NumericTokenStream::toString() { StringStream buffer; buffer << L"(numeric,valSize=" << valSize << L",precisionStep=" << precisionStep << L")"; return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/PerFieldAnalyzerWrapper.cpp000066400000000000000000000047401217574114600261270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PerFieldAnalyzerWrapper.h" #include "Fieldable.h" namespace Lucene { PerFieldAnalyzerWrapper::PerFieldAnalyzerWrapper(AnalyzerPtr defaultAnalyzer) { this->defaultAnalyzer = defaultAnalyzer; this->analyzerMap = MapStringAnalyzer::newInstance(); } PerFieldAnalyzerWrapper::PerFieldAnalyzerWrapper(AnalyzerPtr defaultAnalyzer, MapStringAnalyzer fieldAnalyzers) { this->defaultAnalyzer = defaultAnalyzer; this->analyzerMap = MapStringAnalyzer::newInstance(); if (fieldAnalyzers) analyzerMap.putAll(fieldAnalyzers.begin(), fieldAnalyzers.end()); } PerFieldAnalyzerWrapper::~PerFieldAnalyzerWrapper() { } void PerFieldAnalyzerWrapper::addAnalyzer(const String& fieldName, AnalyzerPtr analyzer) { analyzerMap.put(fieldName, analyzer); } TokenStreamPtr PerFieldAnalyzerWrapper::tokenStream(const String& fieldName, ReaderPtr reader) { AnalyzerPtr analyzer(analyzerMap.get(fieldName)); if (!analyzer) analyzer = defaultAnalyzer; return analyzer->tokenStream(fieldName, reader); } TokenStreamPtr PerFieldAnalyzerWrapper::reusableTokenStream(const String& fieldName, ReaderPtr reader) { AnalyzerPtr analyzer(analyzerMap.get(fieldName)); if (!analyzer) analyzer = defaultAnalyzer; return analyzer->reusableTokenStream(fieldName, reader); } int32_t PerFieldAnalyzerWrapper::getPositionIncrementGap(const String& fieldName) { AnalyzerPtr analyzer(analyzerMap.get(fieldName)); if (!analyzer) analyzer = defaultAnalyzer; return analyzer->getPositionIncrementGap(fieldName); } int32_t PerFieldAnalyzerWrapper::getOffsetGap(FieldablePtr field) { AnalyzerPtr analyzer(analyzerMap.get(field->name())); if (!analyzer) analyzer = defaultAnalyzer; return analyzer->getOffsetGap(field); } String PerFieldAnalyzerWrapper::toString() { return L"PerFieldAnalyzerWrapper(default=" + defaultAnalyzer->toString() + L")"; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/PorterStemFilter.cpp000066400000000000000000000017701217574114600246400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PorterStemFilter.h" #include "PorterStemmer.h" #include "TermAttribute.h" namespace Lucene { PorterStemFilter::PorterStemFilter(TokenStreamPtr input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } PorterStemFilter::~PorterStemFilter() { } bool PorterStemFilter::incrementToken() { if (!input->incrementToken()) return false; if (stemmer->stem(termAtt->termBuffer())) termAtt->setTermBuffer(stemmer->getResultBuffer(), 0, stemmer->getResultLength()); return true; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/PorterStemmer.cpp000066400000000000000000000301031217574114600241660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PorterStemmer.h" namespace Lucene { PorterStemmer::PorterStemmer() { b = NULL; k = 0; j = 0; i = 0; dirty = false; } PorterStemmer::~PorterStemmer() { } bool PorterStemmer::stem(CharArray word) { return stem(word.get(), word.size() - 1); } bool PorterStemmer::stem(wchar_t* b, int32_t k) { this->b = b; this->k = k; this->j = 0; this->i = k; dirty = false; if (k <= 1) return false; // DEPARTURE // With these lines, strings of length 1 or 2 don't go through the stemming process, although no mention // is made of this in the published algorithm. Remove the line to match the published algorithm. step1ab(); step1c(); step2(); step3(); step4(); step5(); if (i != this->k) dirty = true; return dirty; } wchar_t* PorterStemmer::getResultBuffer() { return b; } int32_t PorterStemmer::getResultLength() { return k + 1; } bool PorterStemmer::cons(int32_t i) { switch (b[i]) { case L'a': case L'e': case L'i': case L'o': case L'u': return false; case L'y': return (i == 0) ? true : !cons(i - 1); default: return true; } } int32_t PorterStemmer::m() { int32_t n = 0; int32_t i = 0; while (true) { if (i > j) return n; if (!cons(i)) break; ++i; } ++i; while (true) { while (true) { if (i > j) return n; if (cons(i)) break; ++i; } ++i; ++n; while (true) { if (i > j) return n; if (!cons(i)) break; ++i; } ++i; } } bool PorterStemmer::vowelinstem() { for (int32_t i = 0; i <= j; ++i) { if (!cons(i)) return true; } return false; } bool PorterStemmer::doublec(int32_t j) { if (j < 1) return false; if (b[j] != b[j - 1]) return false; return cons(j); } bool PorterStemmer::cvc(int32_t i) { if (i < 2 || !cons(i) || cons(i - 1) || !cons(i - 2)) return false; int32_t ch = b[i]; if (ch == L'w' || ch == L'x' || ch == L'y') return false; return true; } bool PorterStemmer::ends(const wchar_t* s) { int32_t length = s[0]; if (s[length] != b[k]) return false; // tiny speed-up if (length > k + 1) return false; if (std::memcmp(b + k - length + 1, s + 1, length) != 0) return false; j = k - length; return true; } void PorterStemmer::setto(const wchar_t* s) { int32_t length = s[0]; std::memmove(b + j + 1, s + 1, length); k = j + length; dirty = true; } void PorterStemmer::r(const wchar_t* s) { if (m() > 0) setto(s); } void PorterStemmer::step1ab() { if (b[k] == L's') { if (ends(L"\04" L"sses")) k -= 2; else if (ends(L"\03" L"ies")) setto(L"\01" L"i"); else if (b[k - 1] != L's') --k; } if (ends(L"\03" L"eed")) { if (m() > 0) --k; } else if ((ends(L"\02" L"ed") || ends(L"\03" L"ing")) && vowelinstem()) { k = j; if (ends(L"\02" L"at")) setto(L"\03" L"ate"); else if (ends(L"\02" L"bl")) setto(L"\03" L"ble"); else if (ends(L"\02" L"iz")) setto(L"\03" L"ize"); else if (doublec(k)) { --k; int32_t ch = b[k]; if (ch == L'l' || ch == L's' || ch == L'z') ++k; } else if (m() == 1 && cvc(k)) setto(L"\01" L"e"); } } void PorterStemmer::step1c() { if (ends(L"\01" L"y") && vowelinstem()) { b[k] = L'i'; dirty = true; } } void PorterStemmer::step2() { if (k == 0) return; switch (b[k - 1]) { case L'a': if (ends(L"\07" L"ational")) { r(L"\03" L"ate"); break; } if (ends(L"\06" L"tional")) { r(L"\04" L"tion"); break; } break; case L'c': if (ends(L"\04" L"enci")) { r(L"\04" L"ence"); break; } if (ends(L"\04" L"anci")) { r(L"\04" L"ance"); break; } break; case L'e': if (ends(L"\04" L"izer")) { r(L"\03" L"ize"); break; } break; case L'l': if (ends(L"\03" L"bli")) // DEPARTURE { r(L"\03" L"ble"); break; } if (ends(L"\04" L"alli")) { r(L"\02" L"al"); break; } if (ends(L"\05" L"entli")) { r(L"\03" L"ent"); break; } if (ends(L"\03" L"eli")) { r(L"\01" L"e"); break; } if (ends(L"\05" L"ousli")) { r(L"\03" L"ous"); break; } break; case L'o': if (ends(L"\07" L"ization")) { r(L"\03" L"ize"); break; } if (ends(L"\05" L"ation")) { r(L"\03" L"ate"); break; } if (ends(L"\04" L"ator")) { r(L"\03" L"ate"); break; } break; case L's': if (ends(L"\05" L"alism")) { r(L"\02" L"al"); break; } if (ends(L"\07" L"iveness")) { r(L"\03" L"ive"); break; } if (ends(L"\07" L"fulness")) { r(L"\03" L"ful"); break; } if (ends(L"\07" L"ousness")) { r(L"\03" L"ous"); break; } break; case L't': if (ends(L"\05" L"aliti")) { r(L"\02" L"al"); break; } if (ends(L"\05" L"iviti")) { r(L"\03" L"ive"); break; } if (ends(L"\06" L"biliti")) { r(L"\03" L"ble"); break; } break; case L'g': if (ends(L"\04" L"logi")) // DEPARTURE { r(L"\03" L"log"); break; } } } void PorterStemmer::step3() { switch (b[k]) { case L'e': if (ends(L"\05" L"icate")) { r(L"\02" L"ic"); break; } if (ends(L"\05" L"ative")) { r(L"\00" L""); break; } if (ends(L"\05" L"alize")) { r(L"\02" L"al"); break; } break; case L'i': if (ends(L"\05" L"iciti")) { r(L"\02" L"ic"); break; } break; case L'l': if (ends(L"\04" L"ical")) { r(L"\02" L"ic"); break; } if (ends(L"\03" L"ful")) { r(L"\00" L""); break; } break; case L's': if (ends(L"\04" L"ness")) { r(L"\00" L""); break; } break; } } void PorterStemmer::step4() { if (k == 0) return; switch (b[k - 1]) { case L'a': if (ends(L"\02" L"al")) break; return; case L'c': if (ends(L"\04" L"ance")) break; if (ends(L"\04" L"ence")) break; return; case L'e': if (ends(L"\02" L"er")) break; return; case L'i': if (ends(L"\02" L"ic")) break; return; case L'l': if (ends(L"\04" L"able")) break; if (ends(L"\04" L"ible")) break; return; case L'n': if (ends(L"\03" L"ant")) break; if (ends(L"\05" L"ement")) break; if (ends(L"\04" L"ment")) break; if (ends(L"\03" L"ent")) break; return; case L'o': if (ends(L"\03" L"ion") && (b[j] == L's' || b[j] == L't')) break; if (ends(L"\02" L"ou")) break; return; // takes care of -ous case L's': if (ends(L"\03" L"ism")) break; return; case L't': if (ends(L"\03" L"ate")) break; if (ends(L"\03" L"iti")) break; return; case L'u': if (ends(L"\03" L"ous")) break; return; case L'v': if (ends(L"\03" L"ive")) break; return; case L'z': if (ends(L"\03" L"ize")) break; return; default: return; } if (m() > 1) k = j; } void PorterStemmer::step5() { j = k; if (b[k] == L'e') { int32_t a = m(); if (a > 1 || a == 1 && !cvc(k - 1)) --k; } if (b[k] == L'l' && doublec(k) && m() > 1) --k; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/SimpleAnalyzer.cpp000066400000000000000000000021131217574114600243150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SimpleAnalyzer.h" #include "LowerCaseTokenizer.h" namespace Lucene { SimpleAnalyzer::~SimpleAnalyzer() { } TokenStreamPtr SimpleAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(reader); } TokenStreamPtr SimpleAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!tokenizer) { tokenizer = newLucene(reader); setPreviousTokenStream(tokenizer); } else tokenizer->reset(reader); return tokenizer; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/StopAnalyzer.cpp000066400000000000000000000063741217574114600240260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StopAnalyzer.h" #include "_StopAnalyzer.h" #include "StopFilter.h" #include "WordlistLoader.h" #include "Reader.h" #include "LowerCaseTokenizer.h" namespace Lucene { const wchar_t* StopAnalyzer::_ENGLISH_STOP_WORDS_SET[] = { L"a", L"an", L"and", L"are", L"as", L"at", L"be", L"but", L"by", L"for", L"if", L"in", L"into", L"is", L"it", L"no", L"not", L"of", L"on", L"or", L"such", L"that", L"the", L"their", L"then", L"there", L"these", L"they", L"this", L"to", L"was", L"will", L"with" }; StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion) { stopWords = ENGLISH_STOP_WORDS_SET(); enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); } StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords) { this->stopWords = stopWords; enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); } StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, const String& stopwordsFile) { stopWords = WordlistLoader::getWordSet(stopwordsFile); enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); } StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, ReaderPtr stopwords) { stopWords = WordlistLoader::getWordSet(stopwords); enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); } StopAnalyzer::~StopAnalyzer() { } const HashSet StopAnalyzer::ENGLISH_STOP_WORDS_SET() { static HashSet __ENGLISH_STOP_WORDS_SET; if (!__ENGLISH_STOP_WORDS_SET) __ENGLISH_STOP_WORDS_SET = HashSet::newInstance(_ENGLISH_STOP_WORDS_SET, _ENGLISH_STOP_WORDS_SET + SIZEOF_ARRAY(_ENGLISH_STOP_WORDS_SET)); return __ENGLISH_STOP_WORDS_SET; } TokenStreamPtr StopAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(enablePositionIncrements, newLucene(reader), stopWords); } TokenStreamPtr StopAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { StopAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(enablePositionIncrements, streams->source, stopWords); setPreviousTokenStream(streams); } else streams->source->reset(reader); return streams->result; } StopAnalyzerSavedStreams::~StopAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/core/analysis/StopFilter.cpp000066400000000000000000000050251217574114600234560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StopFilter.h" #include "CharArraySet.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" namespace Lucene { StopFilter::StopFilter(bool enablePositionIncrements, TokenStreamPtr input, HashSet stopWords, bool ignoreCase) : TokenFilter(input) { this->stopWords = newLucene(stopWords, ignoreCase); this->enablePositionIncrements = enablePositionIncrements; termAtt = addAttribute(); posIncrAtt = addAttribute(); } StopFilter::StopFilter(bool enablePositionIncrements, TokenStreamPtr input, CharArraySetPtr stopWords, bool ignoreCase) : TokenFilter(input) { this->stopWords = stopWords; this->enablePositionIncrements = enablePositionIncrements; termAtt = addAttribute(); posIncrAtt = addAttribute(); } StopFilter::~StopFilter() { } HashSet StopFilter::makeStopSet(Collection stopWords) { return HashSet::newInstance(stopWords.begin(), stopWords.end()); } bool StopFilter::incrementToken() { // return the first non-stop word found int32_t skippedPositions = 0; while (input->incrementToken()) { if (!stopWords->contains(termAtt->termBufferArray(), 0, termAtt->termLength())) { if (enablePositionIncrements) posIncrAtt->setPositionIncrement(posIncrAtt->getPositionIncrement() + skippedPositions); return true; } skippedPositions += posIncrAtt->getPositionIncrement(); } // reached EOS -- return false return false; } bool StopFilter::getEnablePositionIncrementsVersionDefault(LuceneVersion::Version matchVersion) { return LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_29); } bool StopFilter::getEnablePositionIncrements() { return enablePositionIncrements; } void StopFilter::setEnablePositionIncrements(bool enable) { this->enablePositionIncrements = enable; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/TeeSinkTokenFilter.cpp000066400000000000000000000115071217574114600250760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TeeSinkTokenFilter.h" #include "Attribute.h" namespace Lucene { TeeSinkTokenFilter::TeeSinkTokenFilter(TokenStreamPtr input) : TokenFilter(input) { this->sinks = Collection::newInstance(); } TeeSinkTokenFilter::~TeeSinkTokenFilter() { } SinkTokenStreamPtr TeeSinkTokenFilter::newSinkTokenStream() { static SinkFilterPtr ACCEPT_ALL_FILTER; if (!ACCEPT_ALL_FILTER) { ACCEPT_ALL_FILTER = newLucene(); CycleCheck::addStatic(ACCEPT_ALL_FILTER); } return newSinkTokenStream(ACCEPT_ALL_FILTER); } SinkTokenStreamPtr TeeSinkTokenFilter::newSinkTokenStream(SinkFilterPtr filter) { SinkTokenStreamPtr sink(newLucene(this->cloneAttributes(), filter)); this->sinks.add(sink); return sink; } void TeeSinkTokenFilter::addSinkTokenStream(SinkTokenStreamPtr sink) { // check that sink has correct factory if (this->getAttributeFactory() != sink->getAttributeFactory()) boost::throw_exception(IllegalArgumentException(L"The supplied sink is not compatible to this tee.")); // add eventually missing attribute impls to the existing sink Collection attrImpls(this->cloneAttributes()->getAttributes()); for (Collection::iterator it = attrImpls.begin(); it != attrImpls.end(); ++it) sink->addAttribute((*it)->getClassName(), *it); this->sinks.add(sink); } void TeeSinkTokenFilter::consumeAllTokens() { while (incrementToken()) { } } bool TeeSinkTokenFilter::incrementToken() { if (input->incrementToken()) { // capture state lazily - maybe no SinkFilter accepts this state AttributeSourceStatePtr state; for (Collection::iterator ref = sinks.begin(); ref != sinks.end(); ++ref) { if (*ref) { if ((*ref)->accept(shared_from_this())) { if (!state) state = this->captureState(); (*ref)->addState(state); } } } return true; } return false; } void TeeSinkTokenFilter::end() { TokenFilter::end(); AttributeSourceStatePtr finalState(captureState()); for (Collection::iterator ref = sinks.begin(); ref != sinks.end(); ++ref) { if (*ref) (*ref)->setFinalState(finalState); } } SinkFilter::~SinkFilter() { } void SinkFilter::reset() { // nothing to do; can be overridden } AcceptAllSinkFilter::~AcceptAllSinkFilter() { } bool AcceptAllSinkFilter::accept(AttributeSourcePtr source) { return true; } SinkTokenStream::SinkTokenStream(AttributeSourcePtr source, SinkFilterPtr filter) : TokenStream(source) { this->filter = filter; this->cachedStates = Collection::newInstance(); this->it = cachedStates.begin(); this->initIterator = false; } SinkTokenStream::~SinkTokenStream() { } bool SinkTokenStream::accept(AttributeSourcePtr source) { return filter->accept(source); } void SinkTokenStream::addState(AttributeSourceStatePtr state) { if (initIterator) boost::throw_exception(IllegalStateException(L"The tee must be consumed before sinks are consumed.")); cachedStates.add(state); } void SinkTokenStream::setFinalState(AttributeSourceStatePtr finalState) { this->finalState = finalState; } bool SinkTokenStream::incrementToken() { // lazy init the iterator if (!initIterator) { it = cachedStates.begin(); initIterator = true; } if (it == cachedStates.end()) return false; AttributeSourceStatePtr state = *it++; restoreState(state); return true; } void SinkTokenStream::end() { if (finalState) restoreState(finalState); } void SinkTokenStream::reset() { it = cachedStates.begin(); initIterator = false; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/Token.cpp000066400000000000000000000417511217574114600224510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Token.h" #include "Payload.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "PositionIncrementAttribute.h" #include "PayloadAttribute.h" #include "FlagsAttribute.h" #include "TypeAttribute.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t Token::MIN_BUFFER_SIZE = 10; Token::Token() { ConstructToken(0, 0, DEFAULT_TYPE(), 0); } Token::Token(int32_t start, int32_t end) { ConstructToken(start, end, DEFAULT_TYPE(), 0); } Token::Token(int32_t start, int32_t end, const String& type) { ConstructToken(start, end, type, 0); } Token::Token(int32_t start, int32_t end, int32_t flags) { ConstructToken(start, end, DEFAULT_TYPE(), flags); } Token::Token(const String& text, int32_t start, int32_t end) { ConstructToken(start, end, DEFAULT_TYPE(), 0); setTermBuffer(text); } Token::Token(const String& text, int32_t start, int32_t end, const String& type) { ConstructToken(start, end, type, 0); setTermBuffer(text); } Token::Token(const String& text, int32_t start, int32_t end, int32_t flags) { ConstructToken(start, end, DEFAULT_TYPE(), flags); setTermBuffer(text); } Token::Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end) { ConstructToken(start, end, DEFAULT_TYPE(), 0); setTermBuffer(startTermBuffer.get(), termBufferOffset, termBufferLength); } Token::~Token() { } void Token::ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags) { this->_termLength = 0; this->_startOffset = start; this->_endOffset = end; this->_type = type; this->flags = flags; this->positionIncrement = 1; } const String& Token::DEFAULT_TYPE() { static String _DEFAULT_TYPE(L"word"); return _DEFAULT_TYPE; } void Token::setPositionIncrement(int32_t positionIncrement) { if (positionIncrement < 0) boost::throw_exception(IllegalArgumentException(L"Increment must be zero or greater: " + StringUtils::toString(positionIncrement))); this->positionIncrement = positionIncrement; } int32_t Token::getPositionIncrement() { return positionIncrement; } String Token::term() { initTermBuffer(); return String(_termBuffer.get(), _termLength); } void Token::setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length) { growTermBuffer(length); MiscUtils::arrayCopy(buffer, offset, _termBuffer.get(), 0, length); _termLength = length; } void Token::setTermBuffer(const String& buffer) { int32_t length = (int32_t)buffer.size(); growTermBuffer(length); MiscUtils::arrayCopy(buffer.begin(), 0, _termBuffer.get(), 0, length); _termLength = length; } void Token::setTermBuffer(const String& buffer, int32_t offset, int32_t length) { BOOST_ASSERT(offset <= (int32_t)buffer.length()); BOOST_ASSERT(offset + length <= (int32_t)buffer.length()); growTermBuffer(length); MiscUtils::arrayCopy(buffer.begin(), offset, _termBuffer.get(), 0, length); _termLength = length; } CharArray Token::termBuffer() { if (!_termBuffer) initTermBuffer(); return _termBuffer; } wchar_t* Token::termBufferArray() { if (!_termBuffer) initTermBuffer(); return _termBuffer.get(); } CharArray Token::resizeTermBuffer(int32_t newSize) { if (!_termBuffer) { // The buffer is always at least MIN_BUFFER_SIZE _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); } else { if (_termBuffer.size() < newSize) { // Not big enough; create a new array with slight over allocation and preserve content _termBuffer.resize(MiscUtils::getNextSize(newSize)); } } return _termBuffer; } void Token::growTermBuffer(int32_t newSize) { _termBuffer = resizeTermBuffer(newSize); } void Token::initTermBuffer() { if (!_termBuffer) { _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(MIN_BUFFER_SIZE)); _termLength = 0; } } int32_t Token::termLength() { if (!_termBuffer) initTermBuffer(); return _termLength; } void Token::setTermLength(int32_t length) { initTermBuffer(); if (length > _termBuffer.size()) { boost::throw_exception(IllegalArgumentException(L"length " + StringUtils::toString(length) + L" exceeds the size of the termBuffer (" + StringUtils::toString(_termBuffer.size()) + L")")); } _termLength = length; } int32_t Token::startOffset() { return _startOffset; } void Token::setStartOffset(int32_t offset) { this->_startOffset = offset; } int32_t Token::endOffset() { return _endOffset; } void Token::setEndOffset(int32_t offset) { this->_endOffset = offset; } void Token::setOffset(int32_t startOffset, int32_t endOffset) { this->_startOffset = startOffset; this->_endOffset = endOffset; } String Token::type() { return _type; } void Token::setType(const String& type) { this->_type = type; } int32_t Token::getFlags() { return flags; } void Token::setFlags(int32_t flags) { this->flags = flags; } PayloadPtr Token::getPayload() { return this->payload; } void Token::setPayload(PayloadPtr payload) { this->payload = payload; } String Token::toString() { StringStream buffer; initTermBuffer(); buffer << L"("; if (!_termBuffer) buffer << L"null"; else buffer << term() << L"," << _startOffset << L"," << _endOffset; if (_type != L"word") buffer << L",type=" << _type; if (positionIncrement != 1) buffer << L",posIncr=" << positionIncrement; buffer << L")"; return buffer.str(); } void Token::clear() { payload.reset(); // Leave termBuffer to allow re-use _termLength = 0; positionIncrement = 1; flags = 0; _startOffset = 0; _endOffset = 0; _type = DEFAULT_TYPE(); } LuceneObjectPtr Token::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); TokenPtr cloneToken(boost::dynamic_pointer_cast(clone)); cloneToken->_termLength = _termLength; cloneToken->_startOffset = _startOffset; cloneToken->_endOffset = _endOffset; cloneToken->_type = _type; cloneToken->flags = flags; cloneToken->positionIncrement = positionIncrement; // Do a deep clone if (_termBuffer) { cloneToken->_termBuffer = CharArray::newInstance(_termBuffer.size()); MiscUtils::arrayCopy(_termBuffer.get(), 0, cloneToken->_termBuffer.get(), 0, _termBuffer.size()); } if (payload) cloneToken->payload = boost::dynamic_pointer_cast(payload->clone()); return cloneToken; } TokenPtr Token::clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) { TokenPtr clone(newLucene(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset)); clone->positionIncrement = positionIncrement; clone->flags = flags; clone->_type = _type; if (payload) clone->payload = boost::dynamic_pointer_cast(payload->clone()); return clone; } bool Token::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; TokenPtr otherToken(boost::dynamic_pointer_cast(other)); if (otherToken) { initTermBuffer(); otherToken->initTermBuffer(); if (_termLength == otherToken->_termLength && _startOffset == otherToken->_startOffset && _endOffset == otherToken->_endOffset && flags == otherToken->flags && positionIncrement == otherToken->positionIncrement && _type == otherToken->_type && (payload ? payload->equals(otherToken->payload) : !otherToken->payload)) { for (int32_t i = 0; i < _termLength; ++i) { if (_termBuffer[i] != otherToken->_termBuffer[i]) return false; } return true; } else return false; } else return false; } int32_t Token::hashCode() { initTermBuffer(); int32_t code = _termLength; code = code * 31 + _startOffset; code = code * 31 + _endOffset; code = code * 31 + flags; code = code * 31 + positionIncrement; code = code * 31 + StringUtils::hashCode(_type); code = payload ? code * 31 + payload->hashCode() : code; code = code * 31 + MiscUtils::hashCode(_termBuffer.get(), 0, _termLength); return code; } void Token::clearNoTermBuffer() { payload.reset(); positionIncrement = 1; flags = 0; _startOffset = 0; _endOffset = 0; _type = DEFAULT_TYPE(); } TokenPtr Token::reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType) { clearNoTermBuffer(); payload.reset(); positionIncrement = 1; setTermBuffer(newTermBuffer.get(), newTermOffset, newTermLength); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = newType; return shared_from_this(); } TokenPtr Token::reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) { clearNoTermBuffer(); setTermBuffer(newTermBuffer.get(), newTermOffset, newTermLength); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = DEFAULT_TYPE(); return shared_from_this(); } TokenPtr Token::reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType) { clearNoTermBuffer(); setTermBuffer(newTerm); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = newType; return shared_from_this(); } TokenPtr Token::reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType) { clearNoTermBuffer(); setTermBuffer(newTerm, newTermOffset, newTermLength); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = newType; return shared_from_this(); } TokenPtr Token::reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset) { clearNoTermBuffer(); setTermBuffer(newTerm); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = DEFAULT_TYPE(); return shared_from_this(); } TokenPtr Token::reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) { clearNoTermBuffer(); setTermBuffer(newTerm, newTermOffset, newTermLength); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = DEFAULT_TYPE(); return shared_from_this(); } void Token::reinit(TokenPtr prototype) { prototype->initTermBuffer(); setTermBuffer(prototype->_termBuffer.get(), 0, prototype->_termLength); positionIncrement = prototype->positionIncrement; flags = prototype->flags; _startOffset = prototype->_startOffset; _endOffset = prototype->_endOffset; _type = prototype->_type; payload = prototype->payload; } void Token::reinit(TokenPtr prototype, const String& newTerm) { setTermBuffer(newTerm); positionIncrement = prototype->positionIncrement; flags = prototype->flags; _startOffset = prototype->_startOffset; _endOffset = prototype->_endOffset; _type = prototype->_type; payload = prototype->payload; } void Token::reinit(TokenPtr prototype, CharArray newTermBuffer, int32_t offset, int32_t length) { setTermBuffer(newTermBuffer.get(), offset, length); positionIncrement = prototype->positionIncrement; flags = prototype->flags; _startOffset = prototype->_startOffset; _endOffset = prototype->_endOffset; _type = prototype->_type; payload = prototype->payload; } void Token::copyTo(AttributePtr target) { TokenPtr targetToken(boost::dynamic_pointer_cast(target)); if (targetToken) { targetToken->reinit(shared_from_this()); // reinit shares the payload, so clone it if (payload) targetToken->payload = boost::dynamic_pointer_cast(payload->clone()); } else { initTermBuffer(); TermAttributePtr targetTermAttribute(boost::dynamic_pointer_cast(target)); if (targetTermAttribute) targetTermAttribute->setTermBuffer(_termBuffer.get(), 0, _termLength); OffsetAttributePtr targetOffsetAttribute(boost::dynamic_pointer_cast(target)); if (targetOffsetAttribute) targetOffsetAttribute->setOffset(_startOffset, _endOffset); PositionIncrementAttributePtr targetPositionIncrementAttribute(boost::dynamic_pointer_cast(target)); if (targetPositionIncrementAttribute) targetPositionIncrementAttribute->setPositionIncrement(positionIncrement); PayloadAttributePtr targetPayloadAttribute(boost::dynamic_pointer_cast(target)); if (targetPayloadAttribute) targetPayloadAttribute->setPayload(payload ? boost::dynamic_pointer_cast(payload->clone()) : PayloadPtr()); FlagsAttributePtr targetFlagsAttribute(boost::dynamic_pointer_cast(target)); if (targetFlagsAttribute) targetFlagsAttribute->setFlags(flags); TypeAttributePtr targetTypeAttribute(boost::dynamic_pointer_cast(target)); if (targetTypeAttribute) targetTypeAttribute->setType(_type); } } AttributeFactoryPtr Token::TOKEN_ATTRIBUTE_FACTORY() { static AttributeFactoryPtr _TOKEN_ATTRIBUTE_FACTORY; if (!_TOKEN_ATTRIBUTE_FACTORY) { _TOKEN_ATTRIBUTE_FACTORY = newLucene(AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY()); CycleCheck::addStatic(_TOKEN_ATTRIBUTE_FACTORY); } return _TOKEN_ATTRIBUTE_FACTORY; } TokenAttributeFactory::TokenAttributeFactory(AttributeFactoryPtr delegate) { this->delegate = delegate; } TokenAttributeFactory::~TokenAttributeFactory() { } AttributePtr TokenAttributeFactory::createAttributeInstance(const String& className) { return newLucene(); } bool TokenAttributeFactory::equals(LuceneObjectPtr other) { if (AttributeFactory::equals(other)) return true; TokenAttributeFactoryPtr otherTokenAttributeFactory(boost::dynamic_pointer_cast(other)); if (otherTokenAttributeFactory) return this->delegate->equals(otherTokenAttributeFactory->delegate); return false; } int32_t TokenAttributeFactory::hashCode() { return (delegate->hashCode() ^ 0x0a45aa31); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/TokenFilter.cpp000066400000000000000000000014141217574114600236070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TokenFilter.h" namespace Lucene { TokenFilter::TokenFilter(TokenStreamPtr input) : TokenStream(input) { this->input = input; } TokenFilter::~TokenFilter() { } void TokenFilter::end() { input->end(); } void TokenFilter::close() { input->close(); } void TokenFilter::reset() { input->reset(); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/TokenStream.cpp000066400000000000000000000015501217574114600236160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TokenStream.h" namespace Lucene { TokenStream::TokenStream() { } TokenStream::TokenStream(AttributeSourcePtr input) : AttributeSource(input) { } TokenStream::TokenStream(AttributeFactoryPtr factory) : AttributeSource(factory) { } TokenStream::~TokenStream() { } void TokenStream::end() { // do nothing by default } void TokenStream::reset() { } void TokenStream::close() { } } LucenePlusPlus-rel_3.0.4/src/core/analysis/Tokenizer.cpp000066400000000000000000000033631217574114600233400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Tokenizer.h" #include "CharReader.h" namespace Lucene { Tokenizer::Tokenizer() { } Tokenizer::Tokenizer(ReaderPtr input) { this->input = CharReader::get(input); this->charStream = boost::dynamic_pointer_cast(this->input); } Tokenizer::Tokenizer(AttributeFactoryPtr factory) : TokenStream(factory) { } Tokenizer::Tokenizer(AttributeFactoryPtr factory, ReaderPtr input) : TokenStream(factory) { this->input = CharReader::get(input); this->charStream = boost::dynamic_pointer_cast(this->input); } Tokenizer::Tokenizer(AttributeSourcePtr source) : TokenStream(source) { } Tokenizer::Tokenizer(AttributeSourcePtr source, ReaderPtr input) : TokenStream(source) { this->input = CharReader::get(input); this->charStream = boost::dynamic_pointer_cast(this->input); } Tokenizer::~Tokenizer() { } void Tokenizer::close() { if (input) { input->close(); input.reset(); // don't hold onto Reader after close } } int32_t Tokenizer::correctOffset(int32_t currentOff) { return charStream ? charStream->correctOffset(currentOff) : currentOff; } void Tokenizer::reset(ReaderPtr input) { this->input = input; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/WhitespaceAnalyzer.cpp000066400000000000000000000021421217574114600251620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "WhitespaceAnalyzer.h" #include "WhitespaceTokenizer.h" namespace Lucene { WhitespaceAnalyzer::~WhitespaceAnalyzer() { } TokenStreamPtr WhitespaceAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(reader); } TokenStreamPtr WhitespaceAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!tokenizer) { tokenizer = newLucene(reader); setPreviousTokenStream(tokenizer); } else tokenizer->reset(reader); return tokenizer; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/WhitespaceTokenizer.cpp000066400000000000000000000017701217574114600253550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "WhitespaceTokenizer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { WhitespaceTokenizer::WhitespaceTokenizer(ReaderPtr input) : CharTokenizer(input) { } WhitespaceTokenizer::WhitespaceTokenizer(AttributeSourcePtr source, ReaderPtr input) : CharTokenizer(source, input) { } WhitespaceTokenizer::WhitespaceTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : CharTokenizer(factory, input) { } WhitespaceTokenizer::~WhitespaceTokenizer() { } bool WhitespaceTokenizer::isTokenChar(wchar_t c) { return !UnicodeUtil::isSpace(c); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/WordlistLoader.cpp000066400000000000000000000056451217574114600243310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "WordlistLoader.h" #include "FileReader.h" #include "BufferedReader.h" namespace Lucene { WordlistLoader::~WordlistLoader() { } HashSet WordlistLoader::getWordSet(const String& wordfile, const String& comment) { HashSet result(HashSet::newInstance()); FileReaderPtr reader; LuceneException finally; try { reader = newLucene(wordfile); result = getWordSet(reader, comment); } catch (LuceneException& e) { finally = e; } if (reader) reader->close(); finally.throwException(); return result; } HashSet WordlistLoader::getWordSet(ReaderPtr reader, const String& comment) { HashSet result(HashSet::newInstance()); LuceneException finally; BufferedReaderPtr bufferedReader(boost::dynamic_pointer_cast(reader)); try { if (!bufferedReader) bufferedReader = newLucene(reader); String word; while (bufferedReader->readLine(word)) { if (comment.empty() || !boost::starts_with(word, comment)) { boost::trim(word); result.add(word); } } } catch (LuceneException& e) { finally = e; } if (bufferedReader) bufferedReader->close(); finally.throwException(); return result; } MapStringString WordlistLoader::getStemDict(const String& wordstemfile) { MapStringString result(MapStringString::newInstance()); BufferedReaderPtr bufferedReader; FileReaderPtr reader; LuceneException finally; try { reader = newLucene(wordstemfile); bufferedReader = newLucene(reader); String line; while (bufferedReader->readLine(line)) { String::size_type sep = line.find(L'\t'); if (sep != String::npos) result.put(line.substr(0, sep), line.substr(sep + 1)); } } catch (LuceneException& e) { finally = e; } if (reader) reader->close(); if (bufferedReader) bufferedReader->close(); finally.throwException(); return result; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/standard/000077500000000000000000000000001217574114600224555ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/analysis/standard/StandardAnalyzer.cpp000066400000000000000000000074331217574114600264360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StandardAnalyzer.h" #include "_StandardAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopAnalyzer.h" #include "StopFilter.h" #include "WordlistLoader.h" namespace Lucene { /// Construct an analyzer with the given stop words. const int32_t StandardAnalyzer::DEFAULT_MAX_TOKEN_LENGTH = 255; StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion) { ConstructAnalyser(matchVersion, StopAnalyzer::ENGLISH_STOP_WORDS_SET()); } StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords) { ConstructAnalyser(matchVersion, stopWords); } StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, const String& stopwords) { ConstructAnalyser(matchVersion, WordlistLoader::getWordSet(stopwords)); } StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, ReaderPtr stopwords) { ConstructAnalyser(matchVersion, WordlistLoader::getWordSet(stopwords)); } StandardAnalyzer::~StandardAnalyzer() { } void StandardAnalyzer::ConstructAnalyser(LuceneVersion::Version matchVersion, HashSet stopWords) { stopSet = stopWords; enableStopPositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); replaceInvalidAcronym = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_24); this->matchVersion = matchVersion; this->maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; } TokenStreamPtr StandardAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) { StandardTokenizerPtr tokenStream(newLucene(matchVersion, reader)); tokenStream->setMaxTokenLength(maxTokenLength); TokenStreamPtr result(newLucene(tokenStream)); result = newLucene(result); result = newLucene(enableStopPositionIncrements, result, stopSet); return result; } void StandardAnalyzer::setMaxTokenLength(int32_t length) { maxTokenLength = length; } int32_t StandardAnalyzer::getMaxTokenLength() { return maxTokenLength; } TokenStreamPtr StandardAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) { StandardAnalyzerSavedStreamsPtr streams = boost::dynamic_pointer_cast(getPreviousTokenStream()); if (!streams) { streams = newLucene(); setPreviousTokenStream(streams); streams->tokenStream = newLucene(matchVersion, reader); streams->filteredTokenStream = newLucene(streams->tokenStream); streams->filteredTokenStream = newLucene(streams->filteredTokenStream); streams->filteredTokenStream = newLucene(enableStopPositionIncrements, streams->filteredTokenStream, stopSet); } else streams->tokenStream->reset(reader); streams->tokenStream->setMaxTokenLength(maxTokenLength); streams->tokenStream->setReplaceInvalidAcronym(replaceInvalidAcronym); return streams->filteredTokenStream; } StandardAnalyzerSavedStreams::~StandardAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.4/src/core/analysis/standard/StandardFilter.cpp000066400000000000000000000043351217574114600260740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StandardFilter.h" #include "StandardTokenizer.h" #include "TermAttribute.h" #include "TypeAttribute.h" namespace Lucene { StandardFilter::StandardFilter(TokenStreamPtr input) : TokenFilter(input) { termAtt = addAttribute(); typeAtt = addAttribute(); } StandardFilter::~StandardFilter() { } const String& StandardFilter::APOSTROPHE_TYPE() { static String _APOSTROPHE_TYPE; if (_APOSTROPHE_TYPE.empty()) _APOSTROPHE_TYPE = StandardTokenizer::TOKEN_TYPES()[StandardTokenizer::APOSTROPHE]; return _APOSTROPHE_TYPE; } const String& StandardFilter::ACRONYM_TYPE() { static String _ACRONYM_TYPE; if (_ACRONYM_TYPE.empty()) _ACRONYM_TYPE = StandardTokenizer::TOKEN_TYPES()[StandardTokenizer::ACRONYM]; return _ACRONYM_TYPE; } bool StandardFilter::incrementToken() { if (!input->incrementToken()) return false; wchar_t* termBuffer = termAtt->termBufferArray(); int32_t bufferLength = termAtt->termLength(); String type(typeAtt->type()); if (type == APOSTROPHE_TYPE() && bufferLength >= 2 && termBuffer[bufferLength - 2] == L'\'' && (termBuffer[bufferLength - 1] == L's' || termBuffer[bufferLength - 1] == L'S')) // remove 's { // Strip last 2 characters off termAtt->setTermLength(bufferLength - 2); } else if (type == ACRONYM_TYPE()) // remove dots { int32_t upto = 0; for (int32_t i = 0; i < bufferLength; ++i) { wchar_t c = termBuffer[i]; if (c != L'.') termBuffer[upto++] = c; } termAtt->setTermLength(upto); } return true; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/standard/StandardTokenizer.cpp000066400000000000000000000124371217574114600266230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StandardTokenizer.h" #include "StandardTokenizerImpl.h" #include "StandardAnalyzer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "PositionIncrementAttribute.h" #include "TypeAttribute.h" namespace Lucene { const int32_t StandardTokenizer::ALPHANUM = 0; const int32_t StandardTokenizer::APOSTROPHE = 1; const int32_t StandardTokenizer::ACRONYM = 2; const int32_t StandardTokenizer::COMPANY = 3; const int32_t StandardTokenizer::EMAIL = 4; const int32_t StandardTokenizer::HOST = 5; const int32_t StandardTokenizer::NUM = 6; const int32_t StandardTokenizer::CJ = 7; /// @deprecated this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. const int32_t StandardTokenizer::ACRONYM_DEP = 8; StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, ReaderPtr input) { this->scanner = newLucene(input); init(input, matchVersion); } StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, AttributeSourcePtr source, ReaderPtr input) : Tokenizer(source) { this->scanner = newLucene(input); init(input, matchVersion); } StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, AttributeFactoryPtr factory, ReaderPtr input) : Tokenizer(factory) { this->scanner = newLucene(input); init(input, matchVersion); } StandardTokenizer::~StandardTokenizer() { } const Collection StandardTokenizer::TOKEN_TYPES() { static Collection _TOKEN_TYPES; if (!_TOKEN_TYPES) { _TOKEN_TYPES = newCollection( L"", L"", L"", L"", L"", L"", L"", L"", L"" ); } return _TOKEN_TYPES; } void StandardTokenizer::init(ReaderPtr input, LuceneVersion::Version matchVersion) { replaceInvalidAcronym = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_24); maxTokenLength = StandardAnalyzer::DEFAULT_MAX_TOKEN_LENGTH; this->input = input; termAtt = addAttribute(); offsetAtt = addAttribute(); posIncrAtt = addAttribute(); typeAtt = addAttribute(); } void StandardTokenizer::setMaxTokenLength(int32_t length) { this->maxTokenLength = length; } int32_t StandardTokenizer::getMaxTokenLength() { return maxTokenLength; } bool StandardTokenizer::incrementToken() { clearAttributes(); int32_t posIncr = 1; while (true) { int32_t tokenType = scanner->getNextToken(); if (tokenType == StandardTokenizerImpl::YYEOF) return false; if (scanner->yylength() <= maxTokenLength) { posIncrAtt->setPositionIncrement(posIncr); scanner->getText(termAtt); int32_t start = scanner->yychar(); offsetAtt->setOffset(correctOffset(start), correctOffset(start + termAtt->termLength())); // This 'if' should be removed in the next release. For now, it converts invalid acronyms to HOST. /// When removed, only the 'else' part should remain. if (tokenType == ACRONYM_DEP) { if (replaceInvalidAcronym) { typeAtt->setType(TOKEN_TYPES()[HOST]); termAtt->setTermLength(termAtt->termLength() - 1); // remove extra '.' } else typeAtt->setType(TOKEN_TYPES()[ACRONYM]); } else typeAtt->setType(TOKEN_TYPES()[tokenType]); return true; } else { // When we skip a too-long term, we still increment the position increment ++posIncr; } } } void StandardTokenizer::end() { // set final offset int32_t finalOffset = correctOffset(scanner->yychar() + scanner->yylength()); offsetAtt->setOffset(finalOffset, finalOffset); } void StandardTokenizer::reset(ReaderPtr input) { Tokenizer::reset(input); scanner->reset(input); } bool StandardTokenizer::isReplaceInvalidAcronym() { return replaceInvalidAcronym; } void StandardTokenizer::setReplaceInvalidAcronym(bool replaceInvalidAcronym) { this->replaceInvalidAcronym = replaceInvalidAcronym; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/standard/StandardTokenizerImpl.cpp000066400000000000000000000467541217574114600274560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StandardTokenizerImpl.h" #include "StandardTokenizer.h" #include "Reader.h" #include "Token.h" #include "TermAttribute.h" #include "MiscUtils.h" namespace Lucene { /// Initial size of the lookahead buffer const int32_t StandardTokenizerImpl::ZZ_BUFFERSIZE = 16384; /// Translates characters to character classes const wchar_t StandardTokenizerImpl::ZZ_CMAP_PACKED[] = { L"\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5" L"\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12" L"\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12" L"\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12\34\0\136\12" L"\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12\11\0\1\12" L"\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12\1\0\24\12" L"\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12\12\0\71\12" L"\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12\67\0\46\12" L"\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12\56\0\32\12" L"\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12\17\0\2\12" L"\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0\46\12\u015f\0" L"\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2\25\0" L"\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12\3\0" L"\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12\23\0\6\12" L"\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12\1\0\2\12" L"\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2\2\0\3\12" L"\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12\1\0\7\12" L"\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0\1\12" L"\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12" L"\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12" L"\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12\3\0\2\12" L"\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12\3\0\10\12" L"\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12\1\0\27\12" L"\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2\25\0\10\12" L"\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\44\0\1\12" L"\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12" L"\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12\3\0\30\12" L"\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1\60\12\1\1" L"\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0\1\12\2\0" L"\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0\7\12\1\0" L"\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0\4\12\1\0" L"\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0\12\2\2\0" L"\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0\42\12\35\0" L"\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0\12\2\6\0" L"\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0\104\12\5\0" L"\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0\4\12\2\0" L"\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0\1\12\1\0" L"\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0" L"\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0\27\12\1\0" L"\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\47\12\1\0" L"\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0" L"\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0\12\2\6\0" L"\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0\26\12\2\0" L"\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0\1\12\1\0" L"\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0\7\12\1\0" L"\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0\6\12\4\0" L"\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0\1\12\4\0" L"\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0\1\12\1\0" L"\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0\7\12\u0ecb\0" L"\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13\2\13\132\13" L"\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0" L"\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12" L"\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12\5\0\1\12" L"\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12\1\0\2\12" L"\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12\2\0\66\12" L"\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12\23\0\12\2" L"\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12" L"\2\0\6\12\2\0\6\12\2\0\3\12\43\0" }; const int32_t StandardTokenizerImpl::ZZ_CMAP_LENGTH = 65536; const int32_t StandardTokenizerImpl::ZZ_CMAP_PACKED_LENGTH = 1154; const wchar_t StandardTokenizerImpl::ZZ_ACTION_PACKED_0[] = { L"\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4" L"\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4" L"\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12" L"\1\4" }; const int32_t StandardTokenizerImpl::ZZ_ACTION_LENGTH = 51; const int32_t StandardTokenizerImpl::ZZ_ACTION_PACKED_LENGTH = 50; const wchar_t StandardTokenizerImpl::ZZ_ROWMAP_PACKED_0[] = { L"\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124" L"\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304" L"\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134" L"\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4" L"\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206" L"\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214" L"\0\u0268\0\u0276\0\u0284" }; const int32_t StandardTokenizerImpl::ZZ_ROWMAP_LENGTH = 51; const int32_t StandardTokenizerImpl::ZZ_ROWMAP_PACKED_LENGTH = 102; const wchar_t StandardTokenizerImpl::ZZ_TRANS_PACKED_0[] = { L"\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2" L"\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13" L"\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11" L"\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20" L"\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0" L"\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27" L"\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0" L"\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37" L"\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44" L"\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0" L"\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4" L"\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0" L"\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24" L"\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54" L"\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0" L"\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56" L"\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52" L"\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31" L"\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0" L"\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0" L"\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33" L"\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13" L"\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11" L"\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57" L"\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0" L"\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37" L"\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40" L"\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12" L"\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13" L"\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16" L"\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13" L"\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25" L"\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0" L"\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0" L"\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0" L"\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0" L"\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0" L"\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0" L"\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0" L"\1\11\2\52\1\0\1\24\3\0" }; const int32_t StandardTokenizerImpl::ZZ_TRANS_LENGTH = 658; const int32_t StandardTokenizerImpl::ZZ_TRANS_PACKED_LENGTH = 634; const int32_t StandardTokenizerImpl::ZZ_UNKNOWN_ERROR = 0; const int32_t StandardTokenizerImpl::ZZ_NO_MATCH = 1; const int32_t StandardTokenizerImpl::ZZ_PUSHBACK_2BIG = 2; const wchar_t* StandardTokenizerImpl::ZZ_ERROR_MSG[] = { L"Unknown internal scanner error", L"Error: could not match input", L"Error: pushback value was too large" }; const wchar_t StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_0[] = { L"\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0" L"\1\1\1\0\17\1\1\0\1\1\3\0\5\1" }; const int32_t StandardTokenizerImpl::ZZ_ATTRIBUTE_LENGTH = 51; const int32_t StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_LENGTH = 30; /// This character denotes the end of file const int32_t StandardTokenizerImpl::YYEOF = -1; /// Lexical states const int32_t StandardTokenizerImpl::YYINITIAL = 0; StandardTokenizerImpl::StandardTokenizerImpl(ReaderPtr in) { this->zzState = 0; this->zzLexicalState = YYINITIAL; this->zzBuffer = CharArray::newInstance(ZZ_BUFFERSIZE); this->zzMarkedPos = 0; this->zzPushbackPos = 0; this->zzCurrentPos = 0; this->zzStartRead = 0; this->zzEndRead = 0; this->yyline = 0; this->_yychar = 0; this->yycolumn = 0; this->zzAtBOL = true; this->zzAtEOF = false; this->zzReader = in; } StandardTokenizerImpl::~StandardTokenizerImpl() { } const wchar_t* StandardTokenizerImpl::ZZ_CMAP() { static CharArray _ZZ_CMAP; if (!_ZZ_CMAP) { _ZZ_CMAP = CharArray::newInstance(ZZ_CMAP_LENGTH); wchar_t* result = _ZZ_CMAP.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_CMAP_PACKED_LENGTH) { int32_t count = ZZ_CMAP_PACKED[i++]; wchar_t value = ZZ_CMAP_PACKED[i++]; do result[j++] = value; while (--count > 0); } } return _ZZ_CMAP.get(); } const int32_t* StandardTokenizerImpl::ZZ_ACTION() { static IntArray _ZZ_ACTION; if (!_ZZ_ACTION) { _ZZ_ACTION = IntArray::newInstance(ZZ_ACTION_LENGTH); int32_t* result = _ZZ_ACTION.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_ACTION_PACKED_LENGTH) { int32_t count = ZZ_ACTION_PACKED_0[i++]; int32_t value = ZZ_ACTION_PACKED_0[i++]; do result[j++] = value; while (--count > 0); } } return _ZZ_ACTION.get(); } const int32_t* StandardTokenizerImpl::ZZ_ROWMAP() { static IntArray _ZZ_ROWMAP; if (!_ZZ_ROWMAP) { _ZZ_ROWMAP = IntArray::newInstance(ZZ_ROWMAP_LENGTH); int32_t* result = _ZZ_ROWMAP.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_ROWMAP_PACKED_LENGTH) { int32_t high = ZZ_ROWMAP_PACKED_0[i++] << 16; result[j++] = high | ZZ_ROWMAP_PACKED_0[i++]; } } return _ZZ_ROWMAP.get(); } const int32_t* StandardTokenizerImpl::ZZ_TRANS() { static IntArray _ZZ_TRANS; if (!_ZZ_TRANS) { _ZZ_TRANS = IntArray::newInstance(ZZ_TRANS_LENGTH); int32_t* result = _ZZ_TRANS.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_TRANS_PACKED_LENGTH) { int32_t count = ZZ_TRANS_PACKED_0[i++]; int32_t value = ZZ_TRANS_PACKED_0[i++]; --value; do result[j++] = value; while (--count > 0); } } return _ZZ_TRANS.get(); } const int32_t* StandardTokenizerImpl::ZZ_ATTRIBUTE() { static IntArray _ZZ_ATTRIBUTE; if (!_ZZ_ATTRIBUTE) { _ZZ_ATTRIBUTE = IntArray::newInstance(ZZ_ATTRIBUTE_LENGTH); int32_t* result = _ZZ_ATTRIBUTE.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_ATTRIBUTE_PACKED_LENGTH) { int32_t count = ZZ_ATTRIBUTE_PACKED_0[i++]; int32_t value = ZZ_ATTRIBUTE_PACKED_0[i++]; do result[j++] = value; while (--count > 0); } } return _ZZ_ATTRIBUTE.get(); } int32_t StandardTokenizerImpl::yychar() { return _yychar; } void StandardTokenizerImpl::reset(ReaderPtr r) { // reset to default buffer size, if buffer has grown if (zzBuffer.size() > ZZ_BUFFERSIZE) zzBuffer.resize(ZZ_BUFFERSIZE); yyreset(r); } void StandardTokenizerImpl::getText(TokenPtr t) { t->setTermBuffer(zzBuffer.get(), zzStartRead, zzMarkedPos - zzStartRead); } void StandardTokenizerImpl::getText(TermAttributePtr t) { t->setTermBuffer(zzBuffer.get(), zzStartRead, zzMarkedPos - zzStartRead); } bool StandardTokenizerImpl::zzRefill() { // first: make room (if you can) if (zzStartRead > 0) { MiscUtils::arrayCopy(zzBuffer.get(), zzStartRead, zzBuffer.get(), 0, zzEndRead - zzStartRead); // translate stored positions zzEndRead -= zzStartRead; zzCurrentPos -= zzStartRead; zzMarkedPos -= zzStartRead; zzPushbackPos -= zzStartRead; zzStartRead = 0; } // is the buffer big enough? if (zzCurrentPos >= zzBuffer.size()) zzBuffer.resize(zzCurrentPos * 2); // finally: fill the buffer with new input int32_t numRead = zzReader->read(zzBuffer.get(), zzEndRead, zzBuffer.size() - zzEndRead); if (numRead < 0) return true; else { zzEndRead += numRead; return false; } } void StandardTokenizerImpl::yyclose() { zzAtEOF = true; // indicate end of file zzEndRead = zzStartRead; // invalidate buffer if (zzReader) zzReader->close(); } void StandardTokenizerImpl::yyreset(ReaderPtr reader) { zzReader = reader; zzAtBOL = true; zzAtEOF = false; zzEndRead = 0; zzStartRead = 0; zzCurrentPos = 0; zzMarkedPos = 0; zzPushbackPos = 0; yyline = 0; _yychar = 0; yycolumn = 0; zzLexicalState = YYINITIAL; } int32_t StandardTokenizerImpl::yystate() { return zzLexicalState; } void StandardTokenizerImpl::yybegin(int32_t newState) { zzLexicalState = newState; } String StandardTokenizerImpl::yytext() { return String(zzBuffer.get() + zzStartRead, zzMarkedPos - zzStartRead); } wchar_t StandardTokenizerImpl::yycharat(int32_t pos) { return zzBuffer[zzStartRead + pos]; } int32_t StandardTokenizerImpl::yylength() { return zzMarkedPos - zzStartRead; } void StandardTokenizerImpl::zzScanError(int32_t errorCode) { boost::throw_exception(ParseException(ZZ_ERROR_MSG[errorCode])); } void StandardTokenizerImpl::yypushback(int32_t number) { if (number > yylength()) zzScanError(ZZ_PUSHBACK_2BIG); zzMarkedPos -= number; } int32_t StandardTokenizerImpl::getNextToken() { int32_t zzInput; int32_t zzAction; // cached fields int32_t zzCurrentPosL; int32_t zzMarkedPosL; int32_t zzEndReadL = zzEndRead; wchar_t* zzBufferL = zzBuffer.get(); const wchar_t* zzCMapL = ZZ_CMAP(); const int32_t* zzTransL = ZZ_TRANS(); const int32_t* zzRowMapL = ZZ_ROWMAP(); const int32_t* zzAttrL = ZZ_ATTRIBUTE(); const int32_t* zzActionL = ZZ_ACTION(); while (true) { zzMarkedPosL = zzMarkedPos; _yychar += zzMarkedPosL - zzStartRead; zzAction = -1; zzCurrentPosL = zzMarkedPosL; zzCurrentPos = zzMarkedPosL; zzStartRead = zzMarkedPosL; zzState = zzLexicalState; while (true) { if (zzCurrentPosL < zzEndReadL) zzInput = zzBufferL[zzCurrentPosL++]; else if (zzAtEOF) { zzInput = YYEOF; break; } else { // store back cached positions zzCurrentPos = zzCurrentPosL; zzMarkedPos = zzMarkedPosL; bool eof = zzRefill(); // get translated positions and possibly new buffer zzCurrentPosL = zzCurrentPos; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer.get(); zzEndReadL = zzEndRead; if (eof) { zzInput = YYEOF; break; } else zzInput = zzBufferL[zzCurrentPosL++]; } int32_t zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; if (zzNext == -1) break; zzState = zzNext; int32_t zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; zzMarkedPosL = zzCurrentPosL; if ((zzAttributes & 8) == 8) break; } } // store back cached position zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : zzActionL[zzAction]) { case 4: return StandardTokenizer::HOST; case 11: break; case 9: return StandardTokenizer::ACRONYM; case 12: break; case 8: return StandardTokenizer::ACRONYM_DEP; case 13: break; case 1: // ignore case 14: break; case 5: return StandardTokenizer::NUM; case 15: break; case 3: return StandardTokenizer::CJ; case 16: break; case 2: return StandardTokenizer::ALPHANUM; case 17: break; case 7: return StandardTokenizer::COMPANY; case 18: break; case 6: return StandardTokenizer::APOSTROPHE; case 19: break; case 10: return StandardTokenizer::EMAIL; case 20: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; return YYEOF; } else zzScanError(ZZ_NO_MATCH); } } return YYINITIAL; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/tokenattributes/000077500000000000000000000000001217574114600241045ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/analysis/tokenattributes/FlagsAttribute.cpp000066400000000000000000000034661217574114600275410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FlagsAttribute.h" #include "StringUtils.h" namespace Lucene { FlagsAttribute::FlagsAttribute() { flags = 0; } FlagsAttribute::~FlagsAttribute() { } String FlagsAttribute::toString() { return L"flags=" + StringUtils::toString(flags); } int32_t FlagsAttribute::getFlags() { return flags; } void FlagsAttribute::setFlags(int32_t flags) { this->flags = flags; } void FlagsAttribute::clear() { flags = 0; } bool FlagsAttribute::equals(LuceneObjectPtr other) { if (Attribute::equals(other)) return true; FlagsAttributePtr otherFlagsAttribute(boost::dynamic_pointer_cast(other)); if (otherFlagsAttribute) return (otherFlagsAttribute->flags == flags); return false; } int32_t FlagsAttribute::hashCode() { return flags; } void FlagsAttribute::copyTo(AttributePtr target) { boost::dynamic_pointer_cast(target)->setFlags(flags); } LuceneObjectPtr FlagsAttribute::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); FlagsAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->flags = flags; return cloneAttribute; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/tokenattributes/OffsetAttribute.cpp000066400000000000000000000046371217574114600277340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OffsetAttribute.h" #include "StringUtils.h" namespace Lucene { OffsetAttribute::OffsetAttribute() { _startOffset = 0; _endOffset = 0; } OffsetAttribute::~OffsetAttribute() { } String OffsetAttribute::toString() { return L"startOffset=" + StringUtils::toString(_startOffset) + L";endOffset=" + StringUtils::toString(_endOffset); } int32_t OffsetAttribute::startOffset() { return _startOffset; } void OffsetAttribute::setOffset(int32_t startOffset, int32_t endOffset) { this->_startOffset = startOffset; this->_endOffset = endOffset; } int32_t OffsetAttribute::endOffset() { return _endOffset; } void OffsetAttribute::clear() { _startOffset = 0; _endOffset = 0; } bool OffsetAttribute::equals(LuceneObjectPtr other) { if (Attribute::equals(other)) return true; OffsetAttributePtr otherOffsetAttribute(boost::dynamic_pointer_cast(other)); if (otherOffsetAttribute) return (otherOffsetAttribute->_startOffset == _startOffset && otherOffsetAttribute->_endOffset == _endOffset); return false; } int32_t OffsetAttribute::hashCode() { int32_t code = _startOffset; code = code * 31 + _endOffset; return code; } void OffsetAttribute::copyTo(AttributePtr target) { OffsetAttributePtr targetOffsetAttribute(boost::dynamic_pointer_cast(target)); targetOffsetAttribute->setOffset(_startOffset, _endOffset); } LuceneObjectPtr OffsetAttribute::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); OffsetAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->_startOffset = _startOffset; cloneAttribute->_endOffset = _endOffset; return cloneAttribute; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/tokenattributes/PayloadAttribute.cpp000066400000000000000000000045371217574114600300760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadAttribute.h" #include "Payload.h" #include "StringUtils.h" namespace Lucene { PayloadAttribute::PayloadAttribute() { } PayloadAttribute::PayloadAttribute(PayloadPtr payload) { this->payload = payload; } PayloadAttribute::~PayloadAttribute() { } String PayloadAttribute::toString() { return L"payload(length)=" + StringUtils::toString(payload->length()); } PayloadPtr PayloadAttribute::getPayload() { return this->payload; } void PayloadAttribute::setPayload(PayloadPtr payload) { this->payload = payload; } void PayloadAttribute::clear() { payload.reset(); } LuceneObjectPtr PayloadAttribute::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); PayloadAttributePtr cloneAttribute(boost::dynamic_pointer_cast(clone)); if (payload) cloneAttribute->payload = boost::dynamic_pointer_cast(payload->clone()); return cloneAttribute; } bool PayloadAttribute::equals(LuceneObjectPtr other) { if (Attribute::equals(other)) return true; PayloadAttributePtr otherAttribute(boost::dynamic_pointer_cast(other)); if (otherAttribute) { if (!otherAttribute->payload && !payload) return true; return otherAttribute->payload->equals(payload); } return false; } int32_t PayloadAttribute::hashCode() { return payload ? payload->hashCode() : 0; } void PayloadAttribute::copyTo(AttributePtr target) { PayloadAttributePtr targetPayloadAttribute(boost::dynamic_pointer_cast(target)); targetPayloadAttribute->setPayload(payload ? boost::dynamic_pointer_cast(payload->clone()) : PayloadPtr()); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/tokenattributes/PositionIncrementAttribute.cpp000066400000000000000000000050541217574114600321510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PositionIncrementAttribute.h" #include "StringUtils.h" namespace Lucene { PositionIncrementAttribute::PositionIncrementAttribute() { positionIncrement = 1; } PositionIncrementAttribute::~PositionIncrementAttribute() { } String PositionIncrementAttribute::toString() { return L"positionIncrement=" + StringUtils::toString(positionIncrement); } void PositionIncrementAttribute::setPositionIncrement(int32_t positionIncrement) { if (positionIncrement < 0) boost::throw_exception(IllegalArgumentException(L"Increment must be zero or greater: " + StringUtils::toString(positionIncrement))); this->positionIncrement = positionIncrement; } int32_t PositionIncrementAttribute::getPositionIncrement() { return positionIncrement; } void PositionIncrementAttribute::clear() { this->positionIncrement = 1; } bool PositionIncrementAttribute::equals(LuceneObjectPtr other) { if (Attribute::equals(other)) return true; PositionIncrementAttributePtr otherPositionIncrementAttribute(boost::dynamic_pointer_cast(other)); if (otherPositionIncrementAttribute) return positionIncrement == otherPositionIncrementAttribute->positionIncrement; return false; } int32_t PositionIncrementAttribute::hashCode() { return positionIncrement; } void PositionIncrementAttribute::copyTo(AttributePtr target) { PositionIncrementAttributePtr targetPositionIncrementAttribute(boost::dynamic_pointer_cast(target)); targetPositionIncrementAttribute->setPositionIncrement(positionIncrement); } LuceneObjectPtr PositionIncrementAttribute::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); PositionIncrementAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->positionIncrement = positionIncrement; return cloneAttribute; } } LucenePlusPlus-rel_3.0.4/src/core/analysis/tokenattributes/TermAttribute.cpp000066400000000000000000000120431217574114600274030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermAttribute.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t TermAttribute::MIN_BUFFER_SIZE = 10; TermAttribute::TermAttribute() { _termLength = 0; } TermAttribute::~TermAttribute() { } String TermAttribute::toString() { return L"term=" + term(); } String TermAttribute::term() { initTermBuffer(); return String(_termBuffer.get(), _termLength); } void TermAttribute::setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length) { growTermBuffer(length); MiscUtils::arrayCopy(buffer, offset, _termBuffer.get(), 0, length); _termLength = length; } void TermAttribute::setTermBuffer(const String& buffer) { int32_t length = (int32_t)buffer.size(); growTermBuffer(length); MiscUtils::arrayCopy(buffer.begin(), 0, _termBuffer.get(), 0, length); _termLength = length; } CharArray TermAttribute::termBuffer() { if (!_termBuffer) initTermBuffer(); return _termBuffer; } wchar_t* TermAttribute::termBufferArray() { if (!_termBuffer) initTermBuffer(); return _termBuffer.get(); } CharArray TermAttribute::resizeTermBuffer(int32_t newSize) { if (!_termBuffer) { // The buffer is always at least MIN_BUFFER_SIZE _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); } else if (_termBuffer.size() < newSize) _termBuffer.resize(MiscUtils::getNextSize(newSize)); return _termBuffer; } void TermAttribute::growTermBuffer(int32_t newSize) { if (!_termBuffer) { // The buffer is always at least MIN_BUFFER_SIZE _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); } else if (_termBuffer.size() < newSize) _termBuffer.resize(MiscUtils::getNextSize(newSize)); } void TermAttribute::initTermBuffer() { if (!_termBuffer) { _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(MIN_BUFFER_SIZE)); _termLength = 0; } } int32_t TermAttribute::termLength() { return _termLength; } void TermAttribute::setTermLength(int32_t length) { if (!_termBuffer) initTermBuffer(); if (length > _termBuffer.size()) { boost::throw_exception(IllegalArgumentException(L"length " + StringUtils::toString(length) + L" exceeds the size of the termBuffer (" + StringUtils::toString(_termBuffer.size()) + L")")); } _termLength = length; } int32_t TermAttribute::hashCode() { initTermBuffer(); int32_t code = _termLength; code = code * 31 + MiscUtils::hashCode(_termBuffer.get(), 0, _termLength); return code; } void TermAttribute::clear() { _termLength = 0; } LuceneObjectPtr TermAttribute::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); TermAttributePtr cloneAttribute(boost::dynamic_pointer_cast(clone)); cloneAttribute->_termLength = _termLength; if (_termBuffer) { cloneAttribute->_termBuffer = CharArray::newInstance(_termBuffer.size()); MiscUtils::arrayCopy(_termBuffer.get(), 0, cloneAttribute->_termBuffer.get(), 0, _termBuffer.size()); } return cloneAttribute; } bool TermAttribute::equals(LuceneObjectPtr other) { if (Attribute::equals(other)) return true; TermAttributePtr otherTermAttribute(boost::dynamic_pointer_cast(other)); if (otherTermAttribute) { initTermBuffer(); otherTermAttribute->initTermBuffer(); if (_termLength != otherTermAttribute->_termLength) return false; return (std::memcmp(_termBuffer.get(), otherTermAttribute->_termBuffer.get(), _termLength) == 0); } return false; } void TermAttribute::copyTo(AttributePtr target) { initTermBuffer(); TermAttributePtr targetTermAttribute(boost::dynamic_pointer_cast(target)); targetTermAttribute->setTermBuffer(_termBuffer.get(), 0, _termLength); } } LucenePlusPlus-rel_3.0.4/src/core/analysis/tokenattributes/TypeAttribute.cpp000066400000000000000000000040311217574114600274130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TypeAttribute.h" #include "StringUtils.h" namespace Lucene { TypeAttribute::TypeAttribute() { _type = DEFAULT_TYPE(); } TypeAttribute::TypeAttribute(const String& type) { _type = type; } TypeAttribute::~TypeAttribute() { } const String& TypeAttribute::DEFAULT_TYPE() { static String _DEFAULT_TYPE(L"word"); return _DEFAULT_TYPE; } String TypeAttribute::toString() { return L"type=" + _type; } String TypeAttribute::type() { return _type; } void TypeAttribute::setType(const String& type) { _type = type; } void TypeAttribute::clear() { _type = DEFAULT_TYPE(); } bool TypeAttribute::equals(LuceneObjectPtr other) { if (Attribute::equals(other)) return true; TypeAttributePtr otherTypeAttribute(boost::dynamic_pointer_cast(other)); if (otherTypeAttribute) return (otherTypeAttribute->_type == _type); return false; } int32_t TypeAttribute::hashCode() { return StringUtils::hashCode(_type); } void TypeAttribute::copyTo(AttributePtr target) { boost::dynamic_pointer_cast(target)->setType(_type); } LuceneObjectPtr TypeAttribute::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); TypeAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->_type = _type; return cloneAttribute; } } LucenePlusPlus-rel_3.0.4/src/core/document/000077500000000000000000000000001217574114600206505ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/document/AbstractField.cpp000066400000000000000000000134551217574114600240730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AbstractField.h" #include "Field.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { AbstractField::AbstractField() { this->_name = L"body"; this->storeTermVector = false; this->storeOffsetWithTermVector = false; this->storePositionWithTermVector = false; this->_omitNorms = false; this->_isStored = false; this->_isIndexed = true; this->_isTokenized = true; this->_isBinary = false; this->lazy = false; this->omitTermFreqAndPositions = false; this->boost = 1.0; this->fieldsData = VariantUtils::null(); this->binaryLength = 0; this->binaryOffset = 0; } AbstractField::AbstractField(const String& name, Field::Store store, Field::Index index, Field::TermVector termVector) { this->_name = name; this->_isStored = Field::isStored(store); this->_isIndexed = Field::isIndexed(index); this->_isTokenized = Field::isAnalyzed(index); this->_omitNorms = Field::omitNorms(index); this->_isBinary = false; this->lazy = false; this->omitTermFreqAndPositions = false; this->boost = 1.0; this->fieldsData = VariantUtils::null(); this->binaryLength = 0; this->binaryOffset = 0; setStoreTermVector(termVector); } AbstractField::~AbstractField() { } void AbstractField::setBoost(double boost) { this->boost = boost; } double AbstractField::getBoost() { return boost; } String AbstractField::name() { return _name; } void AbstractField::setStoreTermVector(Field::TermVector termVector) { this->storeTermVector = Field::isStored(termVector); this->storePositionWithTermVector = Field::withPositions(termVector); this->storeOffsetWithTermVector = Field::withOffsets(termVector); } bool AbstractField::isStored() { return _isStored; } bool AbstractField::isIndexed() { return _isIndexed; } bool AbstractField::isTokenized() { return _isTokenized; } bool AbstractField::isTermVectorStored() { return storeTermVector; } bool AbstractField::isStoreOffsetWithTermVector() { return storeOffsetWithTermVector; } bool AbstractField::isStorePositionWithTermVector() { return storePositionWithTermVector; } bool AbstractField::isBinary() { return _isBinary; } ByteArray AbstractField::getBinaryValue() { return getBinaryValue(ByteArray()); } ByteArray AbstractField::getBinaryValue(ByteArray result) { return VariantUtils::get(fieldsData); } int32_t AbstractField::getBinaryLength() { if (_isBinary) return binaryLength; ByteArray binary(VariantUtils::get(fieldsData)); return binary ? binary.size() : 0; } int32_t AbstractField::getBinaryOffset() { return binaryOffset; } bool AbstractField::getOmitNorms() { return _omitNorms; } bool AbstractField::getOmitTermFreqAndPositions() { return omitTermFreqAndPositions; } void AbstractField::setOmitNorms(bool omitNorms) { this->_omitNorms = omitNorms; } void AbstractField::setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) { this->omitTermFreqAndPositions = omitTermFreqAndPositions; } bool AbstractField::isLazy() { return lazy; } String AbstractField::toString() { StringStream result; if (_isStored) result << L"stored"; if (_isIndexed) { if (!result.str().empty()) result << L","; result << L"indexed"; } if (_isTokenized) { if (!result.str().empty()) result << L","; result << L"tokenized"; } if (storeTermVector) { if (!result.str().empty()) result << L","; result << L"termVector"; } if (storeOffsetWithTermVector) { if (!result.str().empty()) result << L","; result << L"termVectorOffsets"; } if (storePositionWithTermVector) { if (!result.str().empty()) result << L","; result << L"termVectorPosition"; } if (_isBinary) { if (!result.str().empty()) result << L","; result << L"binary"; } if (_omitNorms) result << L",omitNorms"; if (omitTermFreqAndPositions) result << L",omitTermFreqAndPositions"; if (lazy) result << L",lazy"; result << L"<" << _name << L":"; if (VariantUtils::typeOf(fieldsData)) result << VariantUtils::get(fieldsData); else if (VariantUtils::typeOf(fieldsData)) result << L"Reader"; else if (VariantUtils::typeOf(fieldsData)) result << L"Binary [size=" << StringUtils::toString(VariantUtils::get(fieldsData).size()) << L"]"; result << L">"; return result.str(); } } LucenePlusPlus-rel_3.0.4/src/core/document/CompressionTools.cpp000066400000000000000000000130111217574114600246720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CompressionTools.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" #include #include #include #include #include namespace Lucene { const int32_t CompressionTools::COMPRESS_BUFFER = 4096; String ZLibToMessage(int32_t error) { if (error == boost::iostreams::zlib::okay) return L"okay"; else if (error == boost::iostreams::zlib::stream_end) return L"stream_end"; else if (error == boost::iostreams::zlib::stream_error) return L"stream_error"; else if (error == boost::iostreams::zlib::version_error) return L"version_error"; else if (error == boost::iostreams::zlib::data_error) return L"data_error"; else if (error == boost::iostreams::zlib::mem_error) return L"mem_error"; else if (error == boost::iostreams::zlib::buf_error ) return L"buf_error"; else return L"unknown"; } class BufferArraySink : public boost::iostreams::sink { public: BufferArraySink(ByteArray& _buffer, std::streamsize& _position, size_t allocSize) : buffer(_buffer), position(_position) { this->allocSize = allocSize; this->buffer.resize((int32_t)allocSize); } public: ByteArray& buffer; std::streamsize& position; private: size_t allocSize; public: std::streamsize write(const char* s, std::streamsize n) { if (position + n >= (std::streamsize)allocSize) { // grow buffer allocSize <<= 1; buffer.resize((int32_t)allocSize); } MiscUtils::arrayCopy(s, 0, buffer.get(), position, n); position += n; return n; } }; CompressionTools::~CompressionTools() { } ByteArray CompressionTools::compress(uint8_t* value, int32_t offset, int32_t length, int32_t compressionLevel) { // setup the outStream boost::iostreams::filtering_ostreambuf outStream; boost::iostreams::zlib_compressor zcompressor(compressionLevel); outStream.push(zcompressor); // and the output buffer ByteArray buffer(ByteArray::newInstance(COMPRESS_BUFFER)); std::streamsize position = 0; outStream.push(BufferArraySink(buffer, position, COMPRESS_BUFFER)); // setup the source stream, and then copy it to the outStream boost::iostreams::stream< boost::iostreams::array_source > source((char*)(value + offset), length); try { boost::iostreams::copy(source, outStream); } catch (boost::iostreams::zlib_error& err) { boost::throw_exception(CompressionException(L"deflate failure: " + ZLibToMessage(err.error()))); } buffer.resize((int32_t)position); return buffer; } ByteArray CompressionTools::compress(uint8_t* value, int32_t offset, int32_t length) { return compress(value, offset, length, boost::iostreams::zlib::best_compression); } ByteArray CompressionTools::compress(ByteArray value) { return compress(value.get(), 0, value.size(), boost::iostreams::zlib::best_compression); } ByteArray CompressionTools::compressString(const String& value) { return compressString(value, boost::iostreams::zlib::best_compression); } ByteArray CompressionTools::compressString(const String& value, int32_t compressionLevel) { UTF8ResultPtr utf8Result(newLucene()); StringUtils::toUTF8(value.c_str(), (int32_t)value.length(), utf8Result); return compress(utf8Result->result.get(), 0, utf8Result->length, compressionLevel); } ByteArray CompressionTools::decompress(ByteArray value) { // setup the outStream boost::iostreams::filtering_ostreambuf outStream; outStream.push(boost::iostreams::zlib_decompressor()); // and the output buffer ByteArray buffer(ByteArray::newInstance(COMPRESS_BUFFER)); std::streamsize position = 0; outStream.push(BufferArraySink(buffer, position, COMPRESS_BUFFER)); //setup the source stream, and then copy it to the outStream boost::iostreams::stream< boost::iostreams::array_source > source((char*)value.get(), value.size()); try { boost::iostreams::copy(source, outStream); } catch (boost::iostreams::zlib_error& err) { boost::throw_exception(CompressionException(L"deflate failure: " + ZLibToMessage(err.error()))); } buffer.resize((int32_t)position); return buffer; } String CompressionTools::decompressString(ByteArray value) { ByteArray bytes(decompress(value)); return StringUtils::toUnicode(bytes.get(), bytes.size()); } } LucenePlusPlus-rel_3.0.4/src/core/document/DateField.cpp000066400000000000000000000041641217574114600232020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DateField.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DateField::~DateField() { } int32_t DateField::DATE_LEN() { static int32_t _DATE_LEN = 0; if (_DATE_LEN == 0) { // make date strings long enough to last a millennium _DATE_LEN = (int32_t)StringUtils::toString((int64_t)(1000 * 365 * 24) * (int64_t)(60 * 60 * 1000), StringUtils::CHARACTER_MAX_RADIX).length(); } return _DATE_LEN; } const String& DateField::MIN_DATE_STRING() { static String _MIN_DATE_STRING; if (_MIN_DATE_STRING.empty()) _MIN_DATE_STRING = timeToString(0); return _MIN_DATE_STRING; } const String& DateField::MAX_DATE_STRING() { static String _MAX_DATE_STRING; if (_MAX_DATE_STRING.empty()) { _MAX_DATE_STRING.resize(DATE_LEN()); std::fill(_MAX_DATE_STRING.begin(), _MAX_DATE_STRING.end(), L'z'); } return _MAX_DATE_STRING; } String DateField::dateToString(const boost::posix_time::ptime& date) { return timeToString(MiscUtils::getTimeMillis(date)); } String DateField::timeToString(int64_t time) { if (time < 0) boost::throw_exception(RuntimeException(L"time '" + StringUtils::toString(time) + L"' is too early, must be >= 0")); String timeString(DATE_LEN(), L'0'); timeString += StringUtils::toString(time, StringUtils::CHARACTER_MAX_RADIX); return timeString.substr(timeString.length() - DATE_LEN(), DATE_LEN()); } int64_t DateField::stringToTime(const String& s) { return StringUtils::toLong(s, StringUtils::CHARACTER_MAX_RADIX); } } LucenePlusPlus-rel_3.0.4/src/core/document/DateTools.cpp000066400000000000000000000250451217574114600232600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "DateTools.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DateTools::DateOrder DateTools::dateOrder = DateTools::DATEORDER_LOCALE; DateTools::~DateTools() { } String DateTools::dateToString(const boost::posix_time::ptime& date, Resolution resolution) { return timeToString(MiscUtils::getTimeMillis(date), resolution); } String DateTools::timeToString(int64_t time, Resolution resolution) { std::string timeString(boost::posix_time::to_iso_string(boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1), boost::posix_time::milliseconds(time)))); switch (resolution) { case RESOLUTION_YEAR: return StringUtils::toUnicode(timeString.substr(0, 4).c_str()); case RESOLUTION_MONTH: return StringUtils::toUnicode(timeString.substr(0, 6).c_str()); case RESOLUTION_DAY: return StringUtils::toUnicode(timeString.substr(0, 8).c_str()); case RESOLUTION_HOUR: return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 2)).c_str()); case RESOLUTION_MINUTE: return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 4)).c_str()); case RESOLUTION_SECOND: return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 6)).c_str()); case RESOLUTION_MILLISECOND: { std::string fraction(timeString.length() > 16 ? timeString.substr(16, 3) : "000" ); return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 6) + fraction).c_str()); } case RESOLUTION_NULL: // silence static analyzers break; } boost::throw_exception(IllegalArgumentException(L"unknown resolution '" + StringUtils::toString(resolution) + L"'")); return L""; } int64_t DateTools::stringToTime(const String& dateString) { return MiscUtils::getTimeMillis(stringToDate(dateString)); } boost::posix_time::ptime DateTools::stringToDate(const String& dateString) { uint16_t year = dateString.length() >= 4 ? (uint16_t)wcstol(dateString.substr(0, 4).c_str(), 0, 10) : 1970; uint16_t month = dateString.length() >= 6 ? (uint16_t)wcstol(dateString.substr(4, 2).c_str(), 0, 10) : 1; uint16_t day = dateString.length() >= 8 ? (uint16_t)wcstol(dateString.substr(6, 2).c_str(), 0, 10) : 1; uint16_t hour = dateString.length() >= 10 ? (uint16_t)wcstol(dateString.substr(8, 2).c_str(), 0, 10) : 0; uint16_t minute = dateString.length() >= 12 ? (uint16_t)wcstol(dateString.substr(10, 2).c_str(), 0, 10) : 0; uint16_t second = dateString.length() >= 14 ? (uint16_t)wcstol(dateString.substr(12, 2).c_str(), 0, 10) : 0; uint16_t millisecond = dateString.length() >= 16 ? (uint16_t)wcstol(dateString.substr(14, 3).c_str(), 0, 10) : 0; boost::posix_time::ptime date; try { date = boost::posix_time::ptime(boost::gregorian::date(year, month, day), boost::posix_time::hours(hour) + boost::posix_time::minutes(minute) + boost::posix_time::seconds(second) + boost::posix_time::milliseconds(millisecond)); } catch (...) { boost::throw_exception(ParseException(L"Input is not valid date string: " + dateString)); } return date; } boost::posix_time::ptime DateTools::round(const boost::posix_time::ptime& date, Resolution resolution) { boost::posix_time::ptime roundDate; switch (resolution) { case RESOLUTION_YEAR: return boost::posix_time::ptime(boost::gregorian::date(date.date().year(), 1, 1)); case RESOLUTION_MONTH: return boost::posix_time::ptime(boost::gregorian::date(date.date().year(), date.date().month(), 1)); case RESOLUTION_DAY: return boost::posix_time::ptime(date.date()); case RESOLUTION_HOUR: return boost::posix_time::ptime(date.date(), boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours())); case RESOLUTION_MINUTE: return boost::posix_time::ptime(date.date(), boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours()) + boost::posix_time::minutes(boost::posix_time::time_duration(date.time_of_day()).minutes())); case RESOLUTION_SECOND: return boost::posix_time::ptime(date.date(), boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours()) + boost::posix_time::minutes(boost::posix_time::time_duration(date.time_of_day()).minutes()) + boost::posix_time::seconds(boost::posix_time::time_duration(date.time_of_day()).seconds())); case RESOLUTION_MILLISECOND: return date; case RESOLUTION_NULL: // silence static analyzers break; } return boost::posix_time::ptime(); } int64_t DateTools::round(int64_t time, Resolution resolution) { return MiscUtils::getTimeMillis(round(boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1), boost::posix_time::milliseconds(time)), resolution)); } void DateTools::setDateOrder(DateTools::DateOrder order) { dateOrder = order; } DateTools::DateOrder DateTools::getDateOrder(std::locale locale) { if (dateOrder != DATEORDER_LOCALE) return dateOrder; std::locale localeDate(std::locale(locale, new boost::gregorian::date_facet("%x"))); SingleStringStream controlStream; controlStream.imbue(localeDate); controlStream << boost::gregorian::date(1974, 10, 20); // Oct 20th 1974 SingleString controlDate(controlStream.str()); SingleString::size_type year = controlDate.find("74"); SingleString::size_type month = controlDate.find("10"); if (month == SingleString::npos) month = controlDate.find("O"); // safety SingleString::size_type day = controlDate.find("20"); if (year < month) return DATEORDER_YMD; else if (month < day) return DATEORDER_MDY; else return DATEORDER_DMY; } boost::posix_time::ptime DateTools::parseDate(const String& dateString, std::locale locale) { Collection dateTokens(StringUtils::split(dateString, L",-. /")); String delimiter(dateTokens.size() == 1 ? L"" : L"/"); String paddedDate; for (Collection::iterator token = dateTokens.begin(); token != dateTokens.end(); ++token) { if (token != dateTokens.begin()) paddedDate += delimiter; if (token->length() == 1) paddedDate += L"0" + *token; else paddedDate += *token; } Collection dateFormats(Collection::newInstance()); switch (getDateOrder(locale)) { case DATEORDER_DMY: dateFormats.add(L"%d" + delimiter + L"%m" + delimiter + L"%Y"); dateFormats.add(L"%d" + delimiter + L"%m" + delimiter + L"%y"); dateFormats.add(L"%d" + delimiter + L"%b" + delimiter + L"%Y"); dateFormats.add(L"%d" + delimiter + L"%b" + delimiter + L"%y"); dateFormats.add(L"%d" + delimiter + L"%B" + delimiter + L"%Y"); dateFormats.add(L"%d" + delimiter + L"%B" + delimiter + L"%y"); break; case DATEORDER_MDY: dateFormats.add(L"%m" + delimiter + L"%d" + delimiter + L"%Y"); dateFormats.add(L"%m" + delimiter + L"%d" + delimiter + L"%y"); dateFormats.add(L"%b" + delimiter + L"%d" + delimiter + L"%Y"); dateFormats.add(L"%b" + delimiter + L"%d" + delimiter + L"%y"); dateFormats.add(L"%B" + delimiter + L"%d" + delimiter + L"%Y"); dateFormats.add(L"%B" + delimiter + L"%d" + delimiter + L"%y"); break; case DATEORDER_YMD: dateFormats.add(L"%Y" + delimiter + L"%m" + delimiter + L"%d"); dateFormats.add(L"%y" + delimiter + L"%m" + delimiter + L"%d"); dateFormats.add(L"%Y" + delimiter + L"%b" + delimiter + L"%d"); dateFormats.add(L"%y" + delimiter + L"%b" + delimiter + L"%d"); dateFormats.add(L"%Y" + delimiter + L"%B" + delimiter + L"%d"); dateFormats.add(L"%y" + delimiter + L"%B" + delimiter + L"%d"); break; case DATEORDER_LOCALE: // silence static analyzers break; } boost::date_time::format_date_parser parser(L"", locale); boost::date_time::special_values_parser svp; for (Collection::iterator dateFormat = dateFormats.begin(); dateFormat != dateFormats.end(); ++dateFormat) { try { boost::gregorian::date date = parser.parse_date(paddedDate.c_str(), dateFormat->c_str(), svp); if (!date.is_not_a_date()) return boost::posix_time::ptime(date); } catch (...) { } } boost::throw_exception(ParseException(L"Invalid date '" + dateString + L"'")); return boost::posix_time::ptime(); } } LucenePlusPlus-rel_3.0.4/src/core/document/Document.cpp000066400000000000000000000112441217574114600231340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Document.h" #include "Fieldable.h" #include "Field.h" namespace Lucene { Document::Document() { fields = Collection::newInstance(); boost = 1.0; } Document::~Document() { } void Document::setBoost(double boost) { this->boost = boost; } double Document::getBoost() { return boost; } void Document::add(FieldablePtr field) { fields.add(field); } /// Utility functor for comparing fieldable names. /// see {@link Document}. struct equalFieldableName { equalFieldableName(const String& name) : equalName(name) {} inline bool operator()(const FieldablePtr& other) const { return (equalName == other->name()); } const String& equalName; }; void Document::removeField(const String& name) { Collection::iterator field = fields.find_if(equalFieldableName(name)); if (field != fields.end()) fields.remove(field); } void Document::removeFields(const String& name) { fields.remove_if(equalFieldableName(name)); } FieldPtr Document::getField(const String& name) { return boost::static_pointer_cast(getFieldable(name)); } FieldablePtr Document::getFieldable(const String& name) { Collection::iterator field = fields.find_if(equalFieldableName(name)); return field == fields.end() ? FieldablePtr() : *field; } String Document::get(const String& name) { for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name && !(*field)->isBinary()) return (*field)->stringValue(); } return L""; } Collection Document::getFields() { return fields; } Collection Document::getFields(const String& name) { Collection result(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name) result.add(boost::static_pointer_cast(*field)); } return result; } Collection Document::getFieldables(const String& name) { Collection result(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name) result.add(*field); } return result; } Collection Document::getValues(const String& name) { Collection result(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name && !(*field)->isBinary()) result.add((*field)->stringValue()); } return result; } Collection Document::getBinaryValues(const String& name) { Collection result(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name && (*field)->isBinary()) result.add((*field)->getBinaryValue()); } return result; } ByteArray Document::getBinaryValue(const String& name) { for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name && (*field)->isBinary()) return (*field)->getBinaryValue(); } return ByteArray(); } String Document::toString() { StringStream buffer; buffer << L"Document<"; for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if (field != fields.begin()) buffer << L" "; buffer << (*field)->stringValue(); } buffer << L">"; return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/document/Field.cpp000066400000000000000000000261071217574114600224050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Field.h" #include "MiscUtils.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { Field::Field(const String& name, const String& value, Store store, Index index) { ConstructField(name, value, store, index, TERM_VECTOR_NO); } Field::Field(const String& name, const String& value, Store store, Index index, TermVector termVector) { ConstructField(name, value, store, index, termVector); } Field::Field(const String& name, ReaderPtr reader) { ConstructField(name, reader, TERM_VECTOR_NO); } Field::Field(const String& name, ReaderPtr reader, TermVector termVector) { ConstructField(name, reader, termVector); } Field::Field(const String& name, TokenStreamPtr tokenStream) { ConstructField(name, tokenStream, TERM_VECTOR_NO); } Field::Field(const String& name, TokenStreamPtr tokenStream, TermVector termVector) { ConstructField(name, tokenStream, termVector); } Field::Field(const String& name, ByteArray value, Store store) { ConstructField(name, value, 0, value.size(), store); } Field::Field(const String& name, ByteArray value, int32_t offset, int32_t length, Store store) { ConstructField(name, value, offset, length, store); } void Field::ConstructField(const String& name, const String& value, Store store, Index index, TermVector termVector) { if (name.empty() && value.empty()) boost::throw_exception(IllegalArgumentException(L"name and value cannot both be empty")); if (index == INDEX_NO && store == STORE_NO) boost::throw_exception(IllegalArgumentException(L"it doesn't make sense to have a field that is neither indexed nor stored")); if (index == INDEX_NO && termVector != TERM_VECTOR_NO) boost::throw_exception(IllegalArgumentException(L"cannot store term vector information for a field that is not indexed")); this->_name = name; this->fieldsData = value; this->_isStored = isStored(store); this->_isIndexed = isIndexed(index); this->_isTokenized = isAnalyzed(index); this->_omitNorms = omitNorms(index); this->_isBinary = false; if (index == INDEX_NO) this->omitTermFreqAndPositions = false; setStoreTermVector(termVector); } void Field::ConstructField(const String& name, ReaderPtr reader, TermVector termVector) { this->_name = name; this->fieldsData = reader; this->_isStored = false; this->_isIndexed = true; this->_isTokenized = true; this->_isBinary = false; setStoreTermVector(termVector); } void Field::ConstructField(const String& name, TokenStreamPtr tokenStream, TermVector termVector) { this->_name = name; this->fieldsData = VariantUtils::null(); this->tokenStream = tokenStream; this->_isStored = false; this->_isIndexed = true; this->_isTokenized = true; this->_isBinary = false; setStoreTermVector(termVector); } void Field::ConstructField(const String& name, ByteArray value, int32_t offset, int32_t length, Store store) { if (store == STORE_NO) boost::throw_exception(IllegalArgumentException(L"binary values can't be unstored")); this->_name = name; this->fieldsData = value; this->_isStored = isStored(store); this->_isIndexed = false; this->_isTokenized = false; this->omitTermFreqAndPositions = false; this->_omitNorms = true; this->_isBinary = true; this->binaryLength = length; this->binaryOffset = offset; setStoreTermVector(TERM_VECTOR_NO); } Field::~Field() { } String Field::stringValue() { return VariantUtils::get(fieldsData); } ReaderPtr Field::readerValue() { return VariantUtils::get(fieldsData); } TokenStreamPtr Field::tokenStreamValue() { return tokenStream; } void Field::setValue(const String& value) { if (_isBinary) boost::throw_exception(IllegalArgumentException(L"cannot set a String value on a binary field")); fieldsData = value; } void Field::setValue(ReaderPtr value) { if (_isBinary) boost::throw_exception(IllegalArgumentException(L"cannot set a Reader value on a binary field")); if (_isStored) boost::throw_exception(IllegalArgumentException(L"cannot set a Reader value on a stored field")); fieldsData = value; } void Field::setValue(ByteArray value) { if (!_isBinary) boost::throw_exception(IllegalArgumentException(L"cannot set a byte[] value on a non-binary field")); fieldsData = value; binaryLength = value.size(); binaryOffset = 0; } void Field::setValue(ByteArray value, int32_t offset, int32_t length) { if (!_isBinary) boost::throw_exception(IllegalArgumentException(L"cannot set a byte[] value on a non-binary field")); fieldsData = value; binaryLength = length; binaryOffset = offset; } void Field::setTokenStream(TokenStreamPtr tokenStream) { this->_isIndexed = true; this->_isTokenized = true; this->tokenStream = tokenStream; } bool Field::isStored(Store store) { switch (store) { case STORE_YES: return true; case STORE_NO: return false; default: boost::throw_exception(IllegalArgumentException(L"Invalid field store")); return false; } } bool Field::isIndexed(Index index) { switch (index) { case INDEX_NO: return false; case INDEX_ANALYZED: return true; case INDEX_NOT_ANALYZED: return true; case INDEX_NOT_ANALYZED_NO_NORMS: return true; case INDEX_ANALYZED_NO_NORMS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field index")); return false; } } bool Field::isAnalyzed(Index index) { switch (index) { case INDEX_NO: return false; case INDEX_ANALYZED: return true; case INDEX_NOT_ANALYZED: return false; case INDEX_NOT_ANALYZED_NO_NORMS: return false; case INDEX_ANALYZED_NO_NORMS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field index")); return false; } } bool Field::omitNorms(Index index) { switch (index) { case INDEX_NO: return true; case INDEX_ANALYZED: return false; case INDEX_NOT_ANALYZED: return false; case INDEX_NOT_ANALYZED_NO_NORMS: return true; case INDEX_ANALYZED_NO_NORMS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field index")); return false; } } Field::Index Field::toIndex(bool indexed, bool analyzed) { return toIndex(indexed, analyzed, false); } Field::Index Field::toIndex(bool indexed, bool analyzed, bool omitNorms) { // If it is not indexed nothing else matters if (!indexed) return INDEX_NO; // typical, non-expert if (!omitNorms) return analyzed ? INDEX_ANALYZED : INDEX_NOT_ANALYZED; // Expert: Norms omitted return analyzed ? INDEX_ANALYZED_NO_NORMS : INDEX_NOT_ANALYZED_NO_NORMS; } bool Field::isStored(TermVector termVector) { switch (termVector) { case TERM_VECTOR_NO: return false; case TERM_VECTOR_YES: return true; case TERM_VECTOR_WITH_POSITIONS: return true; case TERM_VECTOR_WITH_OFFSETS: return true; case TERM_VECTOR_WITH_POSITIONS_OFFSETS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); return false; } } bool Field::withPositions(TermVector termVector) { switch (termVector) { case TERM_VECTOR_NO: return false; case TERM_VECTOR_YES: return false; case TERM_VECTOR_WITH_POSITIONS: return true; case TERM_VECTOR_WITH_OFFSETS: return false; case TERM_VECTOR_WITH_POSITIONS_OFFSETS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); return false; } } bool Field::withOffsets(TermVector termVector) { switch (termVector) { case TERM_VECTOR_NO: return false; case TERM_VECTOR_YES: return false; case TERM_VECTOR_WITH_POSITIONS: return false; case TERM_VECTOR_WITH_OFFSETS: return true; case TERM_VECTOR_WITH_POSITIONS_OFFSETS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); return false; } } Field::TermVector Field::toTermVector(bool stored, bool withOffsets, bool withPositions) { // If it is not stored, nothing else matters. if (!stored) return TERM_VECTOR_NO; if (withOffsets) return withPositions ? TERM_VECTOR_WITH_POSITIONS_OFFSETS : TERM_VECTOR_WITH_OFFSETS; return withPositions ? TERM_VECTOR_WITH_POSITIONS : TERM_VECTOR_YES; } } LucenePlusPlus-rel_3.0.4/src/core/document/FieldSelector.cpp000066400000000000000000000007731217574114600241070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldSelector.h" namespace Lucene { FieldSelector::FieldSelector() { } FieldSelector::~FieldSelector() { } } LucenePlusPlus-rel_3.0.4/src/core/document/Fieldable.cpp000066400000000000000000000057621217574114600232350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Fieldable.h" namespace Lucene { void Fieldable::setBoost(double boost) { BOOST_ASSERT(false); // override } double Fieldable::getBoost() { BOOST_ASSERT(false); return 0; // override } String Fieldable::name() { BOOST_ASSERT(false); return L""; // override } String Fieldable::stringValue() { BOOST_ASSERT(false); return L""; // override } ReaderPtr Fieldable::readerValue() { BOOST_ASSERT(false); return ReaderPtr(); // override } TokenStreamPtr Fieldable::tokenStreamValue() { BOOST_ASSERT(false); return TokenStreamPtr(); // override } bool Fieldable::isStored() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isIndexed() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isTokenized() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isTermVectorStored() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isStoreOffsetWithTermVector() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isStorePositionWithTermVector() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isBinary() { BOOST_ASSERT(false); return false; // override } bool Fieldable::getOmitNorms() { BOOST_ASSERT(false); return false; // override } void Fieldable::setOmitNorms(bool omitNorms) { BOOST_ASSERT(false); // override } bool Fieldable::isLazy() { BOOST_ASSERT(false); return false; // override } int32_t Fieldable::getBinaryOffset() { BOOST_ASSERT(false); return 0; // override } int32_t Fieldable::getBinaryLength() { BOOST_ASSERT(false); return 0; // override } ByteArray Fieldable::getBinaryValue() { BOOST_ASSERT(false); return ByteArray(); // override } ByteArray Fieldable::getBinaryValue(ByteArray result) { BOOST_ASSERT(false); return ByteArray(); // override } bool Fieldable::getOmitTermFreqAndPositions() { BOOST_ASSERT(false); return false; // override } void Fieldable::setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) { BOOST_ASSERT(false); // override } } LucenePlusPlus-rel_3.0.4/src/core/document/LoadFirstFieldSelector.cpp000066400000000000000000000012151217574114600257070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LoadFirstFieldSelector.h" namespace Lucene { LoadFirstFieldSelector::~LoadFirstFieldSelector() { } FieldSelector::FieldSelectorResult LoadFirstFieldSelector::accept(const String& fieldName) { return FieldSelector::SELECTOR_LOAD_AND_BREAK; } } LucenePlusPlus-rel_3.0.4/src/core/document/MapFieldSelector.cpp000066400000000000000000000023351217574114600245410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MapFieldSelector.h" namespace Lucene { MapFieldSelector::MapFieldSelector(MapStringFieldSelectorResult fieldSelections) { this->fieldSelections = fieldSelections; } MapFieldSelector::MapFieldSelector(Collection fields) { fieldSelections = MapStringFieldSelectorResult::newInstance(); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) fieldSelections.put(*field, FieldSelector::SELECTOR_LOAD); } MapFieldSelector::~MapFieldSelector() { } FieldSelector::FieldSelectorResult MapFieldSelector::accept(const String& fieldName) { MapStringFieldSelectorResult::iterator selection = fieldSelections.find(fieldName); return selection != fieldSelections.end() ? selection->second : FieldSelector::SELECTOR_NO_LOAD; } } LucenePlusPlus-rel_3.0.4/src/core/document/NumberTools.cpp000066400000000000000000000055561217574114600236400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumberTools.h" #include "StringUtils.h" namespace Lucene { const int32_t NumberTools::RADIX = 36; const wchar_t NumberTools::NEGATIVE_PREFIX = L'-'; const wchar_t NumberTools::POSITIVE_PREFIX = L'0'; NumberTools::~NumberTools() { } const String& NumberTools::MIN_STRING_VALUE() { static String _MIN_STRING_VALUE; if (_MIN_STRING_VALUE.empty()) { _MIN_STRING_VALUE += NEGATIVE_PREFIX; _MIN_STRING_VALUE += L"0000000000000"; } return _MIN_STRING_VALUE; } const String& NumberTools::MAX_STRING_VALUE() { static String _MAX_STRING_VALUE; if (_MAX_STRING_VALUE.empty()) { _MAX_STRING_VALUE += POSITIVE_PREFIX; _MAX_STRING_VALUE += L"1y2p0ij32e8e7"; } return _MAX_STRING_VALUE; } int32_t NumberTools::STR_SIZE() { static int32_t _STR_SIZE = 0; if (_STR_SIZE == 0) _STR_SIZE = (int32_t)MIN_STRING_VALUE().length(); return _STR_SIZE; } String NumberTools::longToString(int64_t l) { if (l == std::numeric_limits::min()) { // special case, because long is not symmetric around zero return MIN_STRING_VALUE(); } String buf; buf.reserve(STR_SIZE()); if (l < 0) { buf += NEGATIVE_PREFIX; l = std::numeric_limits::max() + l + 1; } buf += POSITIVE_PREFIX; String num(StringUtils::toString(l, RADIX)); int32_t padLen = (int32_t)(STR_SIZE() - num.length() - buf.length()); while (padLen-- > 0) buf += L'0'; return buf + num; } int64_t NumberTools::stringToLong(const String& str) { if ((int32_t)str.length() != STR_SIZE()) boost::throw_exception(NumberFormatException(L"string is the wrong size")); if (str == MIN_STRING_VALUE()) return std::numeric_limits::min(); wchar_t prefix = str[0]; int64_t l = StringUtils::toLong(str.substr(1), RADIX); if (prefix == POSITIVE_PREFIX) { // nop } else if (prefix == NEGATIVE_PREFIX) l = l - std::numeric_limits::max() - 1; else boost::throw_exception(NumberFormatException(L"string does not begin with the correct prefix")); return l; } } LucenePlusPlus-rel_3.0.4/src/core/document/NumericField.cpp000066400000000000000000000057411217574114600237310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericField.h" #include "Field.h" #include "NumericUtils.h" #include "NumericTokenStream.h" #include "StringUtils.h" namespace Lucene { NumericField::NumericField(const String& name) : AbstractField(name, Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS, Field::TERM_VECTOR_NO) { setOmitTermFreqAndPositions(true); tokenStream = newLucene(NumericUtils::PRECISION_STEP_DEFAULT); } NumericField::NumericField(const String& name, Field::Store store, bool index) : AbstractField(name, store, index ? Field::INDEX_ANALYZED_NO_NORMS : Field::INDEX_NO, Field::TERM_VECTOR_NO) { setOmitTermFreqAndPositions(true); tokenStream = newLucene(NumericUtils::PRECISION_STEP_DEFAULT); } NumericField::NumericField(const String& name, int32_t precisionStep) : AbstractField(name, Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS, Field::TERM_VECTOR_NO) { setOmitTermFreqAndPositions(true); tokenStream = newLucene(precisionStep); } NumericField::NumericField(const String& name, int32_t precisionStep, Field::Store store, bool index) : AbstractField(name, store, index ? Field::INDEX_ANALYZED_NO_NORMS : Field::INDEX_NO, Field::TERM_VECTOR_NO) { setOmitTermFreqAndPositions(true); tokenStream = newLucene(precisionStep); } NumericField::~NumericField() { } TokenStreamPtr NumericField::tokenStreamValue() { return isIndexed() ? boost::static_pointer_cast(tokenStream) : TokenStreamPtr(); } ByteArray NumericField::getBinaryValue(ByteArray result) { return ByteArray(); } ReaderPtr NumericField::readerValue() { return ReaderPtr(); } String NumericField::stringValue() { StringStream value; value << fieldsData; return value.str(); } int64_t NumericField::getNumericValue() { return StringUtils::toLong(stringValue()); } NumericFieldPtr NumericField::setLongValue(int64_t value) { tokenStream->setLongValue(value); fieldsData = value; return shared_from_this(); } NumericFieldPtr NumericField::setIntValue(int32_t value) { tokenStream->setIntValue(value); fieldsData = value; return shared_from_this(); } NumericFieldPtr NumericField::setDoubleValue(double value) { tokenStream->setDoubleValue(value); fieldsData = value; return shared_from_this(); } } LucenePlusPlus-rel_3.0.4/src/core/document/SetBasedFieldSelector.cpp000066400000000000000000000021511217574114600255120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SetBasedFieldSelector.h" namespace Lucene { SetBasedFieldSelector::SetBasedFieldSelector(HashSet fieldsToLoad, HashSet lazyFieldsToLoad) { this->fieldsToLoad = fieldsToLoad; this->lazyFieldsToLoad = lazyFieldsToLoad; } SetBasedFieldSelector::~SetBasedFieldSelector() { } FieldSelector::FieldSelectorResult SetBasedFieldSelector::accept(const String& fieldName) { FieldSelector::FieldSelectorResult result = FieldSelector::SELECTOR_NO_LOAD; if (fieldsToLoad.contains(fieldName)) result = FieldSelector::SELECTOR_LOAD; if (lazyFieldsToLoad.contains(fieldName)) result = FieldSelector::SELECTOR_LAZY_LOAD; return result; } } LucenePlusPlus-rel_3.0.4/src/core/include/000077500000000000000000000000001217574114600204555ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/include/LuceneInc.h000066400000000000000000000007321217574114600224750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifdef _WIN32 #include "targetver.h" #define WIN32_LEAN_AND_MEAN #define NOMINMAX #include #endif #include "Lucene.h" LucenePlusPlus-rel_3.0.4/src/core/include/_BooleanQuery.h000066400000000000000000000032311217574114600233710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _BOOLEANQUERY_H #define _BOOLEANQUERY_H #include "SimilarityDelegator.h" namespace Lucene { /// The Weight for BooleanQuery, used to normalize, score and explain these queries. class BooleanWeight : public Weight { public: BooleanWeight(BooleanQueryPtr query, SearcherPtr searcher); virtual ~BooleanWeight(); LUCENE_CLASS(BooleanWeight); protected: BooleanQueryPtr query; /// The Similarity implementation. SimilarityPtr similarity; Collection weights; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); virtual bool scoresDocsOutOfOrder(); }; /// Disabled coord Similarity class SimilarityDisableCoord : public SimilarityDelegator { public: SimilarityDisableCoord(SimilarityPtr delegee); virtual ~SimilarityDisableCoord(); LUCENE_CLASS(SimilarityDisableCoord); public: virtual double coord(int32_t overlap, int32_t maxOverlap); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_ByteFieldSource.h000066400000000000000000000017031217574114600240160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _BYTEFIELDSOURCE_H #define _BYTEFIELDSOURCE_H #include "DocValues.h" namespace Lucene { class ByteDocValues : public DocValues { public: ByteDocValues(ByteFieldSourcePtr source, Collection arr); virtual ~ByteDocValues(); LUCENE_CLASS(ByteDocValues); protected: ByteFieldSourceWeakPtr _source; Collection arr; public: virtual double doubleVal(int32_t doc); virtual int32_t intVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_CachingSpanFilter.h000066400000000000000000000015171217574114600243150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CACHINGSPANFILTER_H #define _CACHINGSPANFILTER_H #include "_CachingWrapperFilter.h" namespace Lucene { class FilterCacheSpanFilterResult : public FilterCache { public: FilterCacheSpanFilterResult(CachingWrapperFilter::DeletesMode deletesMode); virtual ~FilterCacheSpanFilterResult(); LUCENE_CLASS(FilterCacheSpanFilterResult); protected: virtual LuceneObjectPtr mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_CachingWrapperFilter.h000066400000000000000000000034771217574114600250430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CACHINGWRAPPERFILTER_H #define _CACHINGWRAPPERFILTER_H #include "FilteredDocIdSet.h" namespace Lucene { class FilterCache : public LuceneObject { public: FilterCache(CachingWrapperFilter::DeletesMode deletesMode); virtual ~FilterCache(); LUCENE_CLASS(FilterCache); public: WeakMapObjectObject cache; CachingWrapperFilter::DeletesMode deletesMode; public: virtual LuceneObjectPtr get(IndexReaderPtr reader, LuceneObjectPtr coreKey, LuceneObjectPtr delCoreKey); virtual void put(LuceneObjectPtr coreKey, LuceneObjectPtr delCoreKey, LuceneObjectPtr value); protected: virtual LuceneObjectPtr mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value) = 0; }; class FilterCacheDocIdSet : public FilterCache { public: FilterCacheDocIdSet(CachingWrapperFilter::DeletesMode deletesMode); virtual ~FilterCacheDocIdSet(); LUCENE_CLASS(FilterCacheDocIdSet); protected: virtual LuceneObjectPtr mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value); }; class FilteredCacheDocIdSet : public FilteredDocIdSet { public: FilteredCacheDocIdSet(IndexReaderPtr reader, DocIdSetPtr innerSet); virtual ~FilteredCacheDocIdSet(); LUCENE_CLASS(FilteredCacheDocIdSet); protected: IndexReaderPtr reader; protected: virtual bool match(int32_t docid); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_CheckIndex.h000066400000000000000000000014311217574114600227710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CHECKINDEX_H #define _CHECKINDEX_H #include "SegmentTermDocs.h" namespace Lucene { class MySegmentTermDocs : public SegmentTermDocs { public: MySegmentTermDocs(SegmentReaderPtr p); virtual ~MySegmentTermDocs(); LUCENE_CLASS(MySegmentTermDocs); public: int32_t delCount; public: virtual void seek(TermPtr term); virtual void skippingDoc(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_ConcurrentMergeScheduler.h000066400000000000000000000020561217574114600257310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CONCURRENTMERGESCHEDULER_H #define _CONCURRENTMERGESCHEDULER_H #include "LuceneThread.h" namespace Lucene { class MergeThread : public LuceneThread { public: MergeThread(ConcurrentMergeSchedulerPtr merger, IndexWriterPtr writer, OneMergePtr startMerge); virtual ~MergeThread(); LUCENE_CLASS(MergeThread); protected: ConcurrentMergeSchedulerWeakPtr _merger; IndexWriterWeakPtr _writer; OneMergePtr startMerge; OneMergePtr runningMerge; public: void setRunningMerge(OneMergePtr merge); OneMergePtr getRunningMerge(); void setThreadPriority(int32_t pri); virtual void run(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_ConstantScoreQuery.h000066400000000000000000000033041217574114600246000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CONSTANTSCOREQUERY_H #define _CONSTANTSCOREQUERY_H #include "Weight.h" namespace Lucene { class ConstantWeight : public Weight { public: ConstantWeight(ConstantScoreQueryPtr constantScorer, SearcherPtr searcher); virtual ~ConstantWeight(); LUCENE_CLASS(ConstantWeight); protected: ConstantScoreQueryPtr constantScorer; SimilarityPtr similarity; double queryNorm; double queryWeight; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); }; class ConstantScorer : public Scorer { public: ConstantScorer(ConstantScoreQueryPtr constantScorer, SimilarityPtr similarity, IndexReaderPtr reader, WeightPtr w); virtual ~ConstantScorer(); LUCENE_CLASS(ConstantScorer); public: DocIdSetIteratorPtr docIdSetIterator; double theScore; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); virtual double score(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_CustomScoreQuery.h000066400000000000000000000061261217574114600242660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CUSTOMSCOREQUERY_H #define _CUSTOMSCOREQUERY_H #include "Weight.h" #include "Scorer.h" #include "CustomScoreProvider.h" namespace Lucene { // when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider class DefaultCustomScoreProvider : public CustomScoreProvider { public: DefaultCustomScoreProvider(CustomScoreQueryPtr customQuery, IndexReaderPtr reader); virtual ~DefaultCustomScoreProvider(); LUCENE_CLASS(DefaultCustomScoreProvider); protected: CustomScoreQueryWeakPtr _customQuery; public: virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls); virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl); }; class CustomWeight : public Weight { public: CustomWeight(CustomScoreQueryPtr query, SearcherPtr searcher); virtual ~CustomWeight(); LUCENE_CLASS(CustomWeight); public: CustomScoreQueryPtr query; SimilarityPtr similarity; WeightPtr subQueryWeight; Collection valSrcWeights; bool qStrict; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); virtual bool scoresDocsOutOfOrder(); protected: ExplanationPtr doExplain(IndexReaderPtr reader, int32_t doc); }; /// A scorer that applies a (callback) function on scores of the subQuery. class CustomScorer : public Scorer { public: CustomScorer(SimilarityPtr similarity, IndexReaderPtr reader, CustomWeightPtr weight, ScorerPtr subQueryScorer, Collection valSrcScorers); virtual ~CustomScorer(); LUCENE_CLASS(CustomScorer); protected: double qWeight; ScorerPtr subQueryScorer; Collection valSrcScorers; IndexReaderPtr reader; CustomScoreProviderPtr provider; Collection vScores; // reused in score() to avoid allocating this array for each doc public: virtual int32_t nextDoc(); virtual int32_t docID(); virtual double score(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_DirectoryReader.h000066400000000000000000000027511217574114600240610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _DIRECTORYREADER_H #define _DIRECTORYREADER_H #include "_SegmentInfos.h" namespace Lucene { class FindSegmentsOpen : public FindSegmentsFileT { public: FindSegmentsOpen(bool readOnly, IndexDeletionPolicyPtr deletionPolicy, int32_t termInfosIndexDivisor, SegmentInfosPtr infos, DirectoryPtr directory); virtual ~FindSegmentsOpen(); LUCENE_CLASS(FindSegmentsOpen); protected: bool readOnly; IndexDeletionPolicyPtr deletionPolicy; int32_t termInfosIndexDivisor; public: virtual IndexReaderPtr doBody(const String& segmentFileName); }; class FindSegmentsReopen : public FindSegmentsFileT { public: FindSegmentsReopen(DirectoryReaderPtr reader, bool openReadOnly, SegmentInfosPtr infos, DirectoryPtr directory); virtual ~FindSegmentsReopen(); LUCENE_CLASS(FindSegmentsReopen); protected: DirectoryReaderWeakPtr _reader; bool openReadOnly; public: virtual DirectoryReaderPtr doBody(const String& segmentFileName); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_DisjunctionMaxQuery.h000066400000000000000000000036601217574114600247570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _DISJUNCTIONMAXQUERY_H #define _DISJUNCTIONMAXQUERY_H #include "Weight.h" namespace Lucene { /// The Weight for DisjunctionMaxQuery, used to normalize, score and explain these queries. class DisjunctionMaxWeight : public Weight { public: /// Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. DisjunctionMaxWeight(DisjunctionMaxQueryPtr query, SearcherPtr searcher); virtual ~DisjunctionMaxWeight(); LUCENE_CLASS(DisjunctionMaxWeight); protected: DisjunctionMaxQueryPtr query; /// The Similarity implementation. SimilarityPtr similarity; /// The Weights for our subqueries, in 1-1 correspondence with disjuncts Collection weights; public: /// Return our associated DisjunctionMaxQuery virtual QueryPtr getQuery(); /// Return our boost virtual double getValue(); /// Compute the sub of squared weights of us applied to our subqueries. Used for normalization. virtual double sumOfSquaredWeights(); /// Apply the computed normalization factor to our subqueries virtual void normalize(double norm); /// Create the scorer used to score our associated DisjunctionMaxQuery virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); /// Explain the score we computed for doc virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_DocIdBitSet.h000066400000000000000000000015351217574114600230660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _DOCIDBITSET_H #define _DOCIDBITSET_H #include "DocIdSet.h" namespace Lucene { class DocIdBitSetIterator : public DocIdSetIterator { public: DocIdBitSetIterator(BitSetPtr bitSet); virtual ~DocIdBitSetIterator(); LUCENE_CLASS(DocIdBitSetIterator); protected: int32_t docId; BitSetPtr bitSet; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_DocIdSet.h000066400000000000000000000020731217574114600224250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _DOCIDSET_H #define _DOCIDSET_H #include "DocIdSetIterator.h" namespace Lucene { class EmptyDocIdSetIterator : public DocIdSetIterator { public: virtual ~EmptyDocIdSetIterator(); LUCENE_CLASS(EmptyDocIdSetIterator); public: virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual int32_t nextDoc(); }; /// An empty {@code DocIdSet} instance for easy use, eg. in Filters that hit no documents. class EmptyDocIdSet : public DocIdSet { public: virtual ~EmptyDocIdSet(); LUCENE_CLASS(EmptyDocIdSet); public: virtual DocIdSetIteratorPtr iterator(); virtual bool isCacheable(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_FieldCache.h000066400000000000000000000052041217574114600227350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDCACHE_H #define _FIELDCACHE_H #include "LuceneObject.h" namespace Lucene { /// @see FieldCache#DEFAULT_BYTE_PARSER() class DefaultByteParser : public ByteParser { public: virtual ~DefaultByteParser(); LUCENE_CLASS(DefaultByteParser); public: virtual uint8_t parseByte(const String& string); virtual String toString(); }; /// @see FieldCache#DEFAULT_INT_PARSER() class DefaultIntParser : public IntParser { public: virtual ~DefaultIntParser(); LUCENE_CLASS(DefaultIntParser); public: virtual int32_t parseInt(const String& string); virtual String toString(); }; /// @see FieldCache#NUMERIC_UTILS_INT_PARSER() class NumericUtilsIntParser : public IntParser { public: virtual ~NumericUtilsIntParser(); LUCENE_CLASS(NumericUtilsIntParser); public: virtual int32_t parseInt(const String& string); virtual String toString(); }; /// @see FieldCache#DEFAULT_LONG_PARSER() class DefaultLongParser : public LongParser { public: virtual ~DefaultLongParser(); LUCENE_CLASS(DefaultLongParser); public: virtual int64_t parseLong(const String& string); virtual String toString(); }; /// @see FieldCache#NUMERIC_UTILS_LONG_PARSER() class NumericUtilsLongParser : public LongParser { public: virtual ~NumericUtilsLongParser(); LUCENE_CLASS(NumericUtilsLongParser); public: virtual int64_t parseLong(const String& string); virtual String toString(); }; /// @see FieldCache#DEFAULT_DOUBLE_PARSER() class DefaultDoubleParser : public DoubleParser { public: virtual ~DefaultDoubleParser(); LUCENE_CLASS(DefaultDoubleParser); public: virtual double parseDouble(const String& string); virtual String toString(); }; /// @see FieldCache#NUMERIC_UTILS_DOUBLE_PARSER() class NumericUtilsDoubleParser : public DoubleParser { public: virtual ~NumericUtilsDoubleParser(); LUCENE_CLASS(NumericUtilsDoubleParser); public: virtual double parseDouble(const String& string); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_FieldCacheRangeFilter.h000066400000000000000000000232101217574114600250550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDCACHERANGEFILTER_H #define _FIELDCACHERANGEFILTER_H #include "Filter.h" #include "DocIdSet.h" #include "DocIdSetIterator.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { class FieldCacheRangeFilterString : public FieldCacheRangeFilter { public: FieldCacheRangeFilterString(const String& field, ParserPtr parser, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterString(); LUCENE_CLASS(FieldCacheRangeFilterString); public: String lowerVal; String upperVal; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); virtual String toString(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); }; class FieldCacheDocIdSet : public DocIdSet { public: FieldCacheDocIdSet(IndexReaderPtr reader, bool mayUseTermDocs); virtual ~FieldCacheDocIdSet(); LUCENE_CLASS(FieldCacheDocIdSet); protected: IndexReaderPtr reader; bool mayUseTermDocs; public: /// This method checks, if a doc is a hit, should throw ArrayIndexOutOfBounds, when position invalid virtual bool matchDoc(int32_t doc) = 0; /// This DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs. virtual bool isCacheable(); virtual DocIdSetIteratorPtr iterator(); }; template class FieldCacheDocIdSetNumeric : public FieldCacheDocIdSet { public: FieldCacheDocIdSetNumeric(IndexReaderPtr reader, bool mayUseTermDocs, Collection values, TYPE inclusiveLowerPoint, TYPE inclusiveUpperPoint) : FieldCacheDocIdSet(reader, mayUseTermDocs) { this->values = values; this->inclusiveLowerPoint = inclusiveLowerPoint; this->inclusiveUpperPoint = inclusiveUpperPoint; } virtual ~FieldCacheDocIdSetNumeric() { } protected: Collection values; TYPE inclusiveLowerPoint; TYPE inclusiveUpperPoint; public: virtual bool matchDoc(int32_t doc) { if (doc < 0 || doc >= values.size()) boost::throw_exception(IndexOutOfBoundsException()); return (values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint); } }; template class FieldCacheRangeFilterNumeric : public FieldCacheRangeFilter { public: FieldCacheRangeFilterNumeric(const String& field, ParserPtr parser, TYPE lowerVal, TYPE upperVal, TYPE maxVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilter(field, parser, includeLower, includeUpper) { this->lowerVal = lowerVal; this->upperVal = upperVal; this->maxVal = maxVal; } virtual ~FieldCacheRangeFilterNumeric() { } public: TYPE lowerVal; TYPE upperVal; TYPE maxVal; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { if (!includeLower && lowerVal == maxVal) return DocIdSet::EMPTY_DOCIDSET(); int64_t inclusiveLowerPoint = (int64_t)(includeLower ? lowerVal : (lowerVal + 1)); if (!includeUpper && upperVal == 0) return DocIdSet::EMPTY_DOCIDSET(); int64_t inclusiveUpperPoint = (int64_t)(includeUpper ? upperVal : (upperVal - 1)); if (inclusiveLowerPoint > inclusiveUpperPoint) return DocIdSet::EMPTY_DOCIDSET(); // we only request the usage of termDocs, if the range contains 0 return newLucene< FieldCacheDocIdSetNumeric >(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0), getValues(reader), inclusiveLowerPoint, inclusiveUpperPoint); } virtual Collection getValues(IndexReaderPtr reader) = 0; virtual String toString() { StringStream buffer; buffer << field << L":" << (includeLower ? L"[" : L"{"); buffer << lowerVal << L" TO " << lowerVal; buffer << (includeLower ? L"]" : L"}"); return buffer.str(); } virtual bool equals(LuceneObjectPtr other) { if (Filter::equals(other)) return true; boost::shared_ptr< FieldCacheRangeFilterNumeric > otherFilter(boost::dynamic_pointer_cast< FieldCacheRangeFilterNumeric >(other)); if (!otherFilter) return false; if (field != otherFilter->field || includeLower != otherFilter->includeLower || includeUpper != otherFilter->includeUpper) return false; if (lowerVal != otherFilter->lowerVal || upperVal != otherFilter->upperVal) return false; if (parser ? !parser->equals(otherFilter->parser) : otherFilter->parser) return false; return true; } int32_t hashCode() { int32_t code = StringUtils::hashCode(field); code ^= lowerVal == 0 ? 550356204 : (int32_t)lowerVal; code = (code << 1) | MiscUtils::unsignedShift(code, 31); // rotate to distinguish lower from upper code ^= upperVal == 0 ? -1674416163 : (int32_t)upperVal; code ^= parser ? parser->hashCode() : -1572457324; code ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653); return code; } }; class FieldCacheRangeFilterByte : public FieldCacheRangeFilterNumeric { public: FieldCacheRangeFilterByte(const String& field, ParserPtr parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterByte(); LUCENE_CLASS(FieldCacheRangeFilterByte); public: virtual Collection getValues(IndexReaderPtr reader); }; class FieldCacheRangeFilterInt : public FieldCacheRangeFilterNumeric { public: FieldCacheRangeFilterInt(const String& field, ParserPtr parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterInt(); LUCENE_CLASS(FieldCacheRangeFilterInt); public: virtual Collection getValues(IndexReaderPtr reader); }; class FieldCacheRangeFilterLong : public FieldCacheRangeFilterNumeric { public: FieldCacheRangeFilterLong(const String& field, ParserPtr parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterLong(); LUCENE_CLASS(FieldCacheRangeFilterLong); public: virtual Collection getValues(IndexReaderPtr reader); }; class FieldCacheRangeFilterDouble : public FieldCacheRangeFilterNumeric { public: FieldCacheRangeFilterDouble(const String& field, ParserPtr parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterDouble(); LUCENE_CLASS(FieldCacheRangeFilterDouble); public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); virtual Collection getValues(IndexReaderPtr reader); }; class FieldCacheDocIdSetString : public FieldCacheDocIdSet { public: FieldCacheDocIdSetString(IndexReaderPtr reader, bool mayUseTermDocs, StringIndexPtr fcsi, int32_t inclusiveLowerPoint, int32_t inclusiveUpperPoint); virtual ~FieldCacheDocIdSetString(); LUCENE_CLASS(FieldCacheDocIdSetString); protected: StringIndexPtr fcsi; int32_t inclusiveLowerPoint; int32_t inclusiveUpperPoint; public: virtual bool matchDoc(int32_t doc); }; /// A DocIdSetIterator using TermDocs to iterate valid docIds class FieldDocIdSetIteratorTermDocs : public DocIdSetIterator { public: FieldDocIdSetIteratorTermDocs(FieldCacheDocIdSetPtr cacheDocIdSet, TermDocsPtr termDocs); virtual ~FieldDocIdSetIteratorTermDocs(); LUCENE_CLASS(FieldDocIdSetIteratorTermDocs); protected: FieldCacheDocIdSetWeakPtr _cacheDocIdSet; TermDocsPtr termDocs; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; /// A DocIdSetIterator generating docIds by incrementing a variable - this one can be used if there /// are no deletions are on the index. class FieldDocIdSetIteratorIncrement : public DocIdSetIterator { public: FieldDocIdSetIteratorIncrement(FieldCacheDocIdSetPtr cacheDocIdSet); virtual ~FieldDocIdSetIteratorIncrement(); LUCENE_CLASS(FieldDocIdSetIteratorIncrement); protected: FieldCacheDocIdSetWeakPtr _cacheDocIdSet; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_FieldCacheSanityChecker.h000066400000000000000000000017111217574114600254110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDCACHESANITYCHECKER_H #define _FIELDCACHESANITYCHECKER_H #include "LuceneObject.h" namespace Lucene { /// Simple pair object for using "readerKey + fieldName" a Map key class ReaderField : public LuceneObject { public: ReaderField(LuceneObjectPtr readerKey, const String& fieldName); virtual ~ReaderField(); LUCENE_CLASS(ReaderField); public: LuceneObjectPtr readerKey; String fieldName; public: virtual int32_t hashCode(); virtual bool equals(LuceneObjectPtr other); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_FieldCacheTermsFilter.h000066400000000000000000000030611217574114600251150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDCACHETERMSFILTER_H #define _FIELDCACHETERMSFILTER_H #include "DocIdSet.h" #include "DocIdSetIterator.h" namespace Lucene { class FieldCacheTermsFilterDocIdSet : public DocIdSet { public: FieldCacheTermsFilterDocIdSet(Collection terms, StringIndexPtr fcsi); virtual ~FieldCacheTermsFilterDocIdSet(); LUCENE_CLASS(FieldCacheTermsFilterDocIdSet); protected: StringIndexPtr fcsi; OpenBitSetPtr openBitSet; public: virtual DocIdSetIteratorPtr iterator(); /// This DocIdSet implementation is cacheable. virtual bool isCacheable(); }; class FieldCacheTermsFilterDocIdSetIterator : public DocIdSetIterator { public: FieldCacheTermsFilterDocIdSetIterator(StringIndexPtr fcsi, OpenBitSetPtr openBitSet); virtual ~FieldCacheTermsFilterDocIdSetIterator(); LUCENE_CLASS(FieldCacheTermsFilterDocIdSetIterator); protected: StringIndexPtr fcsi; OpenBitSetPtr openBitSet; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_FieldValueHitQueue.h000066400000000000000000000030751217574114600244640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDVALUEHITQUEUE_H #define _FIELDVALUEHITQUEUE_H #include "LuceneObject.h" namespace Lucene { /// An implementation of {@link FieldValueHitQueue} which is optimized in case there is just one comparator. class OneComparatorFieldValueHitQueue : public FieldValueHitQueue { public: OneComparatorFieldValueHitQueue(Collection fields, int32_t size); virtual ~OneComparatorFieldValueHitQueue(); LUCENE_CLASS(OneComparatorFieldValueHitQueue); public: FieldComparatorPtr comparator; int32_t oneReverseMul; protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); }; /// An implementation of {@link FieldValueHitQueue} which is optimized in case there is more than one comparator. class MultiComparatorsFieldValueHitQueue : public FieldValueHitQueue { public: MultiComparatorsFieldValueHitQueue(Collection fields, int32_t size); virtual ~MultiComparatorsFieldValueHitQueue(); LUCENE_CLASS(MultiComparatorsFieldValueHitQueue); protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_FilterManager.h000066400000000000000000000033051217574114600235060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FILTERMANAGER_H #define _FILTERMANAGER_H #include "LuceneThread.h" namespace Lucene { /// Holds the filter and the last time the filter was used, to make LRU-based cache cleaning possible. class FilterItem : public LuceneObject { public: FilterItem(FilterPtr filter); virtual ~FilterItem(); LUCENE_CLASS(FilterItem); public: FilterPtr filter; int64_t timestamp; }; /// Keeps the cache from getting too big. /// /// The SortedSet sortedFilterItems is used only to sort the items from the cache, so when it's time to clean /// up we have the TreeSet sort the FilterItems by timestamp. /// /// Removes 1.5 * the numbers of items to make the cache smaller. /// For example: If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 /// round up to 8. This way we clean the cache a bit more, and avoid having the cache cleaner having to do /// it frequently. class FilterCleaner : public LuceneThread { public: FilterCleaner(FilterManagerPtr manager); virtual ~FilterCleaner(); LUCENE_CLASS(FilterCleaner); protected: FilterManagerWeakPtr _manager; bool running; MapLongInt sortedFilterItems; public: virtual void run(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_FilteredDocIdSet.h000066400000000000000000000017101217574114600241010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FILTEREDDOCIDSET_H #define _FILTEREDDOCIDSET_H #include "FilteredDocIdSetIterator.h" namespace Lucene { /// Implementation of the contract to build a DocIdSetIterator. class DefaultFilteredDocIdSetIterator : public FilteredDocIdSetIterator { public: DefaultFilteredDocIdSetIterator(FilteredDocIdSetPtr filtered, DocIdSetIteratorPtr innerIter); virtual ~DefaultFilteredDocIdSetIterator(); LUCENE_CLASS(DefaultFilteredDocIdSetIterator); protected: FilteredDocIdSetPtr filtered; protected: virtual bool match(int32_t docid); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_FilteredQuery.h000066400000000000000000000037241217574114600235570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FILTEREDQUERY_H #define _FILTEREDQUERY_H #include "Weight.h" #include "Scorer.h" namespace Lucene { class FilteredQueryWeight : public Weight { public: FilteredQueryWeight(FilteredQueryPtr query, WeightPtr weight, SimilarityPtr similarity); virtual ~FilteredQueryWeight(); LUCENE_CLASS(FilteredQueryWeight); protected: FilteredQueryPtr query; WeightPtr weight; SimilarityPtr similarity; double value; public: virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); virtual QueryPtr getQuery(); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); friend class FilteredQueryWeightScorer; }; class FilteredQueryWeightScorer : public Scorer { public: FilteredQueryWeightScorer(FilteredQueryWeightPtr weight, ScorerPtr scorer, DocIdSetIteratorPtr docIdSetIterator, SimilarityPtr similarity); virtual ~FilteredQueryWeightScorer(); LUCENE_CLASS(FilteredQueryWeightScorer); protected: FilteredQueryWeightPtr weight; ScorerPtr scorer; DocIdSetIteratorPtr docIdSetIterator; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); virtual int32_t advance(int32_t target); virtual double score(); protected: int32_t advanceToCommon(int32_t scorerDoc, int32_t disiDoc); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_FuzzyQuery.h000066400000000000000000000017661217574114600231540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FUZZYQUERY_H #define _FUZZYQUERY_H #include "PriorityQueue.h" namespace Lucene { class ScoreTerm : public LuceneObject { public: virtual ~ScoreTerm(); LUCENE_CLASS(ScoreTerm); public: TermPtr term; double score; public: int32_t compareTo(ScoreTermPtr other); }; class ScoreTermQueue : public PriorityQueue { public: ScoreTermQueue(int32_t size); virtual ~ScoreTermQueue(); LUCENE_CLASS(ScoreTermQueue); protected: virtual bool lessThan(const ScoreTermPtr& first, const ScoreTermPtr& second); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_IndexReader.h000066400000000000000000000014401217574114600231560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _INDEXREADER_H #define _INDEXREADER_H #include "_SegmentInfos.h" namespace Lucene { class FindSegmentsModified : public FindSegmentsFileT { public: FindSegmentsModified(SegmentInfosPtr infos, DirectoryPtr directory); virtual ~FindSegmentsModified(); LUCENE_CLASS(FindSegmentsModified); public: virtual uint64_t doBody(const String& segmentFileName); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_IndexWriter.h000066400000000000000000000053751217574114600232430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _INDEXWRITER_H #define _INDEXWRITER_H #include "LuceneObject.h" namespace Lucene { /// Holds shared SegmentReader instances. IndexWriter uses SegmentReaders for 1) applying deletes, /// 2) doing merges, 3) handing out a real-time reader. This pool reuses instances of the SegmentReaders /// in all these places if it is in "near real-time mode" (getReader() has been called on this instance). class ReaderPool : public LuceneObject { public: ReaderPool(IndexWriterPtr writer); virtual ~ReaderPool(); LUCENE_CLASS(ReaderPool); protected: IndexWriterWeakPtr _indexWriter; MapSegmentInfoSegmentReader readerMap; public: /// Forcefully clear changes for the specified segments, and remove from the pool. /// This is called on successful merge. void clear(SegmentInfosPtr infos); /// used only by asserts bool infoIsLive(SegmentInfoPtr info); SegmentInfoPtr mapToLive(SegmentInfoPtr info); /// Release the segment reader (i.e. decRef it and close if there are no more references. void release(SegmentReaderPtr sr); /// Release the segment reader (i.e. decRef it and close if there are no more references. void release(SegmentReaderPtr sr, bool drop); /// Remove all our references to readers, and commits any pending changes. void close(); /// Commit all segment reader in the pool. void commit(); /// Returns a ref to a clone. NOTE: this clone is not enrolled in the pool, so you should /// simply close() it when you're done (ie, do not call release()). IndexReaderPtr getReadOnlyClone(const SegmentInfoPtr info, bool doOpenStores, int32_t termInfosIndexDivisor); /// Obtain a SegmentReader from the readerPool. The reader must be returned by calling /// {@link #release(SegmentReader)} SegmentReaderPtr get(SegmentInfoPtr info, bool doOpenStores); /// Obtain a SegmentReader from the readerPool. The reader must be returned by calling /// {@link #release(SegmentReader)} SegmentReaderPtr get(SegmentInfoPtr info, bool doOpenStores, int32_t readBufferSize, int32_t termsIndexDivisor); /// Returns a ref SegmentReaderPtr getIfExists(SegmentInfoPtr info); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_IntFieldSource.h000066400000000000000000000016731217574114600236530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _INTFIELDSOURCE_H #define _INTFIELDSOURCE_H #include "DocValues.h" namespace Lucene { class IntDocValues : public DocValues { public: IntDocValues(IntFieldSourcePtr source, Collection arr); virtual ~IntDocValues(); LUCENE_CLASS(IntDocValues); protected: IntFieldSourceWeakPtr _source; Collection arr; public: virtual double doubleVal(int32_t doc); virtual int32_t intVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_MMapDirectory.h000066400000000000000000000040441217574114600235060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MMAPDIRECTORY_H #define _MMAPDIRECTORY_H #include #include "IndexInput.h" namespace Lucene { class MMapIndexInput : public IndexInput { public: MMapIndexInput(const String& path = L""); virtual ~MMapIndexInput(); LUCENE_CLASS(MMapIndexInput); protected: int32_t _length; bool isClone; boost::iostreams::mapped_file_source file; int32_t bufferPosition; // next byte to read public: /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte(); /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*,int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// The number of bytes in the file. virtual int64_t length(); /// Closes the stream to further operations. virtual void close(); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_MatchAllDocsQuery.h000066400000000000000000000035001217574114600243070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MATCHALLDOCSQUERY_H #define _MATCHALLDOCSQUERY_H #include "Weight.h" #include "Scorer.h" namespace Lucene { class MatchAllDocsWeight : public Weight { public: MatchAllDocsWeight(MatchAllDocsQueryPtr query, SearcherPtr searcher); virtual ~MatchAllDocsWeight(); LUCENE_CLASS(MatchAllDocsWeight); protected: MatchAllDocsQueryPtr query; SimilarityPtr similarity; double queryWeight; double queryNorm; public: virtual String toString(); virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); }; class MatchAllScorer : public Scorer { public: MatchAllScorer(MatchAllDocsQueryPtr query, IndexReaderPtr reader, SimilarityPtr similarity, WeightPtr weight, ByteArray norms); virtual ~MatchAllScorer(); LUCENE_CLASS(MatchAllScorer); public: TermDocsPtr termDocs; double _score; ByteArray norms; protected: MatchAllDocsQueryPtr query; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_MultiPhraseQuery.h000066400000000000000000000022741217574114600242550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MULTIPHRASEQUERY_H #define _MULTIPHRASEQUERY_H #include "Weight.h" namespace Lucene { class MultiPhraseWeight : public Weight { public: MultiPhraseWeight(MultiPhraseQueryPtr query, SearcherPtr searcher); virtual ~MultiPhraseWeight(); LUCENE_CLASS(MultiPhraseWeight); protected: MultiPhraseQueryPtr query; SimilarityPtr similarity; double value; double idf; double queryNorm; double queryWeight; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_MultiSearcher.h000066400000000000000000000074251217574114600235440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MULTISEARCHER_H #define _MULTISEARCHER_H #include "Searcher.h" #include "Collector.h" namespace Lucene { /// Document Frequency cache acting as a Dummy-Searcher. This class is not a full-fledged Searcher, but /// only supports the methods necessary to initialize Weights. class CachedDfSource : public Searcher { public: CachedDfSource(MapTermInt dfMap, int32_t maxDoc, SimilarityPtr similarity); virtual ~CachedDfSource(); LUCENE_CLASS(CachedDfSource); protected: MapTermInt dfMap; // Map from Terms to corresponding doc freqs int32_t _maxDoc; // document count public: virtual int32_t docFreq(TermPtr term); virtual Collection docFreqs(Collection terms); virtual int32_t maxDoc(); virtual QueryPtr rewrite(QueryPtr query); virtual void close(); virtual DocumentPtr doc(int32_t n); virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector); virtual ExplanationPtr explain(WeightPtr weight, int32_t doc); virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr results); virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n); virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort); }; /// A subclass for searching a single searchable class MultiSearcherCallableNoSort : public LuceneObject { public: MultiSearcherCallableNoSort(SynchronizePtr lock, SearchablePtr searchable, WeightPtr weight, FilterPtr filter, int32_t nDocs, HitQueuePtr hq, int32_t i, Collection starts); virtual ~MultiSearcherCallableNoSort(); LUCENE_CLASS(MultiSearcherCallableNoSort); protected: SynchronizePtr lock; SearchablePtr searchable; WeightPtr weight; FilterPtr filter; int32_t nDocs; int32_t i; HitQueuePtr hq; Collection starts; public: TopDocsPtr call(); }; /// A subclass for searching a single searchable class MultiSearcherCallableWithSort : public LuceneObject { public: MultiSearcherCallableWithSort(SynchronizePtr lock, SearchablePtr searchable, WeightPtr weight, FilterPtr filter, int32_t nDocs, FieldDocSortedHitQueuePtr hq, SortPtr sort, int32_t i, Collection starts); virtual ~MultiSearcherCallableWithSort(); LUCENE_CLASS(MultiSearcherCallableWithSort); protected: SynchronizePtr lock; SearchablePtr searchable; WeightPtr weight; FilterPtr filter; int32_t nDocs; int32_t i; FieldDocSortedHitQueuePtr hq; Collection starts; SortPtr sort; public: TopFieldDocsPtr call(); }; class MultiSearcherCollector : public Collector { public: MultiSearcherCollector(CollectorPtr collector, int32_t start); virtual ~MultiSearcherCollector(); LUCENE_CLASS(MultiSearcherCollector); protected: CollectorPtr collector; int32_t start; public: virtual void setScorer(ScorerPtr scorer); virtual void collect(int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual bool acceptsDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_MultiTermQuery.h000066400000000000000000000032131217574114600237340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MULTITERMQUERY_H #define _MULTITERMQUERY_H #include "LuceneObject.h" namespace Lucene { class ConstantScoreFilterRewrite : public RewriteMethod { public: virtual ~ConstantScoreFilterRewrite(); LUCENE_CLASS(ConstantScoreFilterRewrite); public: virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query); }; class ScoringBooleanQueryRewrite : public RewriteMethod { public: virtual ~ScoringBooleanQueryRewrite(); LUCENE_CLASS(ScoringBooleanQueryRewrite); public: virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query); }; class ConstantScoreBooleanQueryRewrite : public ScoringBooleanQueryRewrite { public: virtual ~ConstantScoreBooleanQueryRewrite(); LUCENE_CLASS(ConstantScoreBooleanQueryRewrite); public: virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query); }; class ConstantScoreAutoRewriteDefault : public ConstantScoreAutoRewrite { public: virtual ~ConstantScoreAutoRewriteDefault(); LUCENE_CLASS(ConstantScoreAutoRewriteDefault); public: virtual void setTermCountCutoff(int32_t count); virtual void setDocCountPercent(double percent); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_MultipleTermPositions.h000066400000000000000000000026421217574114600253240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MULTIPLETERMPOSITIONS_H #define _MULTIPLETERMPOSITIONS_H #include "PriorityQueue.h" namespace Lucene { class TermPositionsQueue : public PriorityQueue { public: TermPositionsQueue(Collection termPositions); virtual ~TermPositionsQueue(); LUCENE_CLASS(TermPositionsQueue); protected: Collection termPositions; public: virtual void initialize(); protected: virtual bool lessThan(const TermPositionsPtr& first, const TermPositionsPtr& second); }; class IntQueue : public LuceneObject { public: IntQueue(); virtual ~IntQueue(); LUCENE_CLASS(IntQueue); protected: int32_t arraySize; int32_t index; int32_t lastIndex; Collection array; public: void add(int32_t i); int32_t next(); void sort(); void clear(); int32_t size(); protected: void growArray(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_NativeFSLockFactory.h000066400000000000000000000020461217574114600246070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _NATIVEFSLOCKFACTORY_H #define _NATIVEFSLOCKFACTORY_H #include "Lock.h" namespace Lucene { class NativeFSLock : public Lock { public: NativeFSLock(const String& lockDir, const String& lockFileName); virtual ~NativeFSLock(); LUCENE_CLASS(NativeFSLock); protected: String lockDir; String path; filelockPtr lock; static SynchronizePtr LOCK_HELD_LOCK(); static HashSet LOCK_HELD(); public: virtual bool obtain(); virtual void release(); virtual bool isLocked(); virtual String toString(); protected: bool lockExists(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_NearSpansUnordered.h000066400000000000000000000031471217574114600245340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _NEARSPANSUNORDERED_H #define _NEARSPANSUNORDERED_H #include "Spans.h" #include "PriorityQueue.h" namespace Lucene { /// Wraps a Spans, and can be used to form a linked list. class SpansCell : public Spans { public: SpansCell(NearSpansUnorderedPtr unordered, SpansPtr spans, int32_t index); virtual ~SpansCell(); LUCENE_CLASS(SpansCell); protected: NearSpansUnorderedWeakPtr _unordered; SpansPtr spans; SpansCellPtr _next; int32_t length; int32_t index; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); protected: bool adjust(bool condition); friend class NearSpansUnordered; }; class CellQueue : public PriorityQueue { public: CellQueue(int32_t size); virtual ~CellQueue(); LUCENE_CLASS(CellQueue); protected: virtual bool lessThan(const SpansCellPtr& first, const SpansCellPtr& second); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_NoLockFactory.h000066400000000000000000000013141217574114600235010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _NOLOCKFACTORY_H #define _NOLOCKFACTORY_H #include "Lock.h" namespace Lucene { class NoLock : public Lock { public: virtual ~NoLock(); LUCENE_CLASS(NoLock); public: virtual bool obtain(); virtual void release(); virtual bool isLocked(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_NumericRangeQuery.h000066400000000000000000000057311217574114600244000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _NUMERICRANGEQUERY_H #define _NUMERICRANGEQUERY_H #include "FilteredTermEnum.h" #include "NumericUtils.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that match the sub-ranges for trie range queries. /// /// Warning: This term enumeration is not guaranteed to be always ordered by {@link Term#compareTo}. The /// ordering depends on how {@link NumericUtils#splitLongRange} and {@link NumericUtils#splitIntRange} /// generates the sub-ranges. For {@link MultiTermQuery} ordering is not relevant. class NumericRangeTermEnum : public FilteredTermEnum { public: NumericRangeTermEnum(NumericRangeQueryPtr query, IndexReaderPtr reader); virtual ~NumericRangeTermEnum(); LUCENE_CLASS(NumericRangeTermEnum); protected: NumericRangeQueryWeakPtr _query; IndexReaderPtr reader; Collection rangeBounds; TermPtr termTemplate; String currentUpperBound; public: virtual double difference(); /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); protected: /// This is a dummy, it is not used by this class. virtual bool endEnum(); /// This is a dummy, it is not used by this class. virtual void setEnum(TermEnumPtr actualEnum); /// Compares if current upper bound is reached, this also updates the term count for statistics. /// In contrast to {@link FilteredTermEnum}, a return value of false ends iterating the current enum /// and forwards to the next sub-range. virtual bool termCompare(TermPtr term); }; class NumericLongRangeBuilder : public LongRangeBuilder { public: NumericLongRangeBuilder(Collection rangeBounds); virtual ~NumericLongRangeBuilder(); LUCENE_CLASS(NumericLongRangeBuilder); protected: Collection rangeBounds; public: virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); }; class NumericIntRangeBuilder : public IntRangeBuilder { public: NumericIntRangeBuilder(Collection rangeBounds); virtual ~NumericIntRangeBuilder(); LUCENE_CLASS(NumericIntRangeBuilder); protected: Collection rangeBounds; public: virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_OrdFieldSource.h000066400000000000000000000017011217574114600236350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _ORDFIELDSOURCE_H #define _ORDFIELDSOURCE_H #include "DocValues.h" namespace Lucene { class LPPAPI OrdDocValues : public DocValues { public: OrdDocValues(OrdFieldSourcePtr source, Collection arr); virtual ~OrdDocValues(); LUCENE_CLASS(OrdDocValues); protected: OrdFieldSourceWeakPtr _source; Collection arr; public: virtual double doubleVal(int32_t doc); virtual String strVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_ParallelReader.h000066400000000000000000000053721217574114600236530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _PARALLELREADER_H #define _PARALLELREADER_H #include "TermEnum.h" #include "TermDocs.h" #include "TermPositions.h" namespace Lucene { class ParallelTermEnum : public TermEnum { public: ParallelTermEnum(ParallelReaderPtr reader); ParallelTermEnum(ParallelReaderPtr reader, TermPtr term); virtual ~ParallelTermEnum(); LUCENE_CLASS(ParallelTermEnum); protected: ParallelReaderWeakPtr _reader; String field; MapStringIndexReader::iterator fieldIterator; bool setIterator; TermEnumPtr termEnum; public: /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Returns the current Term in the enumeration. virtual TermPtr term(); /// Returns the docFreq of the current Term in the enumeration. virtual int32_t docFreq(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); }; /// Wrap a TermDocs in order to support seek(Term) class ParallelTermDocs : public TermPositions, public LuceneObject { public: ParallelTermDocs(ParallelReaderPtr reader); ParallelTermDocs(ParallelReaderPtr reader, TermPtr term); virtual ~ParallelTermDocs(); LUCENE_CLASS(ParallelTermDocs); protected: ParallelReaderWeakPtr _reader; TermDocsPtr termDocs; public: virtual int32_t doc(); virtual int32_t freq(); virtual void seek(TermPtr term); virtual void seek(TermEnumPtr termEnum); virtual bool next(); virtual int32_t read(Collection docs, Collection freqs); virtual bool skipTo(int32_t target); virtual void close(); }; class ParallelTermPositions : public ParallelTermDocs { public: ParallelTermPositions(ParallelReaderPtr reader); ParallelTermPositions(ParallelReaderPtr reader, TermPtr term); virtual ~ParallelTermPositions(); LUCENE_CLASS(ParallelTermPositions); public: virtual void seek(TermPtr term); virtual int32_t nextPosition(); virtual int32_t getPayloadLength(); virtual ByteArray getPayload(ByteArray data, int32_t offset); virtual bool isPayloadAvailable(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_PayloadTermQuery.h000066400000000000000000000037271217574114600242450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _PAYLOADTERMQUERY_H #define _PAYLOADTERMQUERY_H #include "SpanWeight.h" #include "SpanScorer.h" namespace Lucene { class PayloadTermWeight : public SpanWeight { public: PayloadTermWeight(PayloadTermQueryPtr query, SearcherPtr searcher); virtual ~PayloadTermWeight(); LUCENE_CLASS(PayloadTermWeight); public: virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); }; class PayloadTermSpanScorer : public SpanScorer { public: PayloadTermSpanScorer(TermSpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms); virtual ~PayloadTermSpanScorer(); LUCENE_CLASS(PayloadTermSpanScorer); protected: ByteArray payload; TermPositionsPtr positions; double payloadScore; int32_t payloadsSeen; public: virtual double score(); protected: virtual bool setFreqCurrentDoc(); void processPayload(SimilarityPtr similarity); /// Returns the SpanScorer score only. /// /// Should not be overridden without good cause /// /// @return the score for just the Span part without the payload /// @see #score() virtual double getSpanScore(); /// The score for the payload /// /// @return The score, as calculated by {@link PayloadFunction#docScore(int32_t, const String&, /// int32_t, double)} virtual double getPayloadScore(); virtual ExplanationPtr explain(int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_PhraseQuery.h000066400000000000000000000023311217574114600232340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _PHRASEQUERY_H #define _PHRASEQUERY_H #include "Weight.h" namespace Lucene { class PhraseWeight : public Weight { public: PhraseWeight(PhraseQueryPtr query, SearcherPtr searcher); virtual ~PhraseWeight(); LUCENE_CLASS(PhraseWeight); protected: PhraseQueryPtr query; SimilarityPtr similarity; double value; double idf; double queryNorm; double queryWeight; IDFExplanationPtr idfExp; public: virtual String toString(); virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_QueryWrapperFilter.h000066400000000000000000000015711217574114600246050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _QUERYWRAPPERFILTER_H #define _QUERYWRAPPERFILTER_H #include "DocIdSet.h" namespace Lucene { class QueryWrapperFilterDocIdSet : public DocIdSet { public: QueryWrapperFilterDocIdSet(IndexReaderPtr reader, WeightPtr weight); virtual ~QueryWrapperFilterDocIdSet(); LUCENE_CLASS(QueryWrapperFilterDocIdSet); protected: IndexReaderPtr reader; WeightPtr weight; public: virtual DocIdSetIteratorPtr iterator(); virtual bool isCacheable(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_ReverseOrdFieldSource.h000066400000000000000000000021011217574114600251640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _REVERSEORDFIELDSOURCE_H #define _REVERSEORDFIELDSOURCE_H #include "DocValues.h" namespace Lucene { class ReverseOrdDocValues : public DocValues { public: ReverseOrdDocValues(ReverseOrdFieldSourcePtr source, Collection arr, int32_t end); virtual ~ReverseOrdDocValues(); LUCENE_CLASS(ReverseOrdDocValues); protected: ReverseOrdFieldSourceWeakPtr _source; Collection arr; int32_t end; public: virtual double doubleVal(int32_t doc); virtual int32_t intVal(int32_t doc); virtual String strVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_ScorerDocQueue.h000066400000000000000000000014441217574114600236600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SCORERDOCQUEUE_H #define _SCORERDOCQUEUE_H #include "LuceneObject.h" namespace Lucene { class HeapedScorerDoc : public LuceneObject { public: HeapedScorerDoc(ScorerPtr scorer); HeapedScorerDoc(ScorerPtr scorer, int32_t doc); virtual ~HeapedScorerDoc(); LUCENE_CLASS(HeapedScorerDoc); public: ScorerPtr scorer; int32_t doc; public: void adjust(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SegmentInfos.h000066400000000000000000000045651217574114600234000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SEGMENTINFOS_H #define _SEGMENTINFOS_H #include "LuceneObject.h" namespace Lucene { /// Utility class for executing code that needs to do something with the current segments file. class FindSegmentsFile : public LuceneObject { public: FindSegmentsFile(SegmentInfosPtr infos, DirectoryPtr directory); virtual ~FindSegmentsFile(); LUCENE_CLASS(FindSegmentsFile); protected: SegmentInfosWeakPtr _segmentInfos; DirectoryPtr directory; public: void doRun(IndexCommitPtr commit = IndexCommitPtr()); virtual void runBody(const String& segmentFileName) = 0; }; template class FindSegmentsFileT : public FindSegmentsFile { public: FindSegmentsFileT(SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFile(infos, directory) {} virtual ~FindSegmentsFileT() {} protected: TYPE result; public: virtual TYPE run(IndexCommitPtr commit = IndexCommitPtr()) { doRun(commit); return result; } virtual void runBody(const String& segmentFileName) { result = doBody(segmentFileName); } virtual TYPE doBody(const String& segmentFileName) = 0; }; /// Utility class for executing code that needs to do something with the current segments file. This is necessary with /// lock-less commits because from the time you locate the current segments file name, until you actually open it, read /// its contents, or check modified time, etc., it could have been deleted due to a writer commit finishing. class FindSegmentsRead : public FindSegmentsFileT { public: FindSegmentsRead(SegmentInfosPtr infos, DirectoryPtr directory); virtual ~FindSegmentsRead(); LUCENE_CLASS(FindSegmentsRead); public: virtual int64_t doBody(const String& segmentFileName); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SegmentReader.h000066400000000000000000000123371217574114600235200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SEGMENTREADER_H #define _SEGMENTREADER_H #include "CloseableThreadLocal.h" namespace Lucene { /// Holds core readers that are shared (unchanged) when SegmentReader is cloned or reopened class CoreReaders : public LuceneObject { public: CoreReaders(SegmentReaderPtr origInstance, DirectoryPtr dir, SegmentInfoPtr si, int32_t readBufferSize, int32_t termsIndexDivisor); virtual ~CoreReaders(); LUCENE_CLASS(CoreReaders); protected: /// Counts how many other reader share the core objects (freqStream, proxStream, tis, etc.) of this reader; /// when coreRef drops to 0, these core objects may be closed. A given instance of SegmentReader may be /// closed, even those it shares core objects with other SegmentReaders SegmentReaderRefPtr ref; SegmentReaderWeakPtr _origInstance; public: String segment; FieldInfosPtr fieldInfos; IndexInputPtr freqStream; IndexInputPtr proxStream; TermInfosReaderPtr tisNoIndex; DirectoryPtr dir; DirectoryPtr cfsDir; int32_t readBufferSize; int32_t termsIndexDivisor; TermInfosReaderPtr tis; FieldsReaderPtr fieldsReaderOrig; TermVectorsReaderPtr termVectorsReaderOrig; CompoundFileReaderPtr cfsReader; CompoundFileReaderPtr storeCFSReader; public: TermVectorsReaderPtr getTermVectorsReaderOrig(); FieldsReaderPtr getFieldsReaderOrig(); void incRef(); DirectoryPtr getCFSReader(); TermInfosReaderPtr getTermsReader(); bool termsIndexIsLoaded(); /// NOTE: only called from IndexWriter when a near real-time reader is opened, or applyDeletes is run, /// sharing a segment that's still being merged. This method is not fully thread safe, and relies on the /// synchronization in IndexWriter void loadTermsIndex(SegmentInfoPtr si, int32_t termsIndexDivisor); void openDocStores(SegmentInfoPtr si); void decRef(); friend class SegmentReader; }; /// Sets the initial value class FieldsReaderLocal : public CloseableThreadLocal { public: FieldsReaderLocal(SegmentReaderPtr reader); protected: SegmentReaderWeakPtr _reader; protected: virtual FieldsReaderPtr initialValue(); }; class SegmentReaderRef : public LuceneObject { public: SegmentReaderRef(); virtual ~SegmentReaderRef(); LUCENE_CLASS(SegmentReaderRef); protected: int32_t _refCount; public: virtual String toString(); int32_t refCount(); int32_t incRef(); int32_t decRef(); friend class SegmentReader; }; /// Byte[] referencing is used because a new norm object needs to be created for each clone, and the byte /// array is all that is needed for sharing between cloned readers. The current norm referencing is for /// sharing between readers whereas the byte[] referencing is for copy on write which is independent of /// reader references (i.e. incRef, decRef). class Norm : public LuceneObject { public: Norm(); Norm(SegmentReaderPtr reader, IndexInputPtr in, int32_t number, int64_t normSeek); virtual ~Norm(); LUCENE_CLASS(Norm); protected: SegmentReaderWeakPtr _reader; int32_t refCount; /// If this instance is a clone, the originalNorm references the Norm that has a real open IndexInput NormPtr origNorm; SegmentReaderPtr origReader; IndexInputPtr in; int64_t normSeek; SegmentReaderRefPtr _bytesRef; ByteArray _bytes; bool dirty; int32_t number; bool rollbackDirty; public: void incRef(); void decRef(); /// Load bytes but do not cache them if they were not already cached void bytes(uint8_t* bytesOut, int32_t offset, int32_t length); /// Load & cache full bytes array. Returns bytes. ByteArray bytes(); /// Only for testing SegmentReaderRefPtr bytesRef(); /// Called if we intend to change a norm value. We make a private copy of bytes if it's shared // with others ByteArray copyOnWrite(); /// Returns a copy of this Norm instance that shares IndexInput & bytes with the original one virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); /// Flush all pending changes to the next generation separate norms file. void reWrite(SegmentInfoPtr si); protected: void closeInput(); friend class SegmentReader; }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_Similarity.h000066400000000000000000000016731217574114600231220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SIMILARITY_H #define _SIMILARITY_H #include "Explanation.h" namespace Lucene { class SimilarityIDFExplanation : public IDFExplanation { public: SimilarityIDFExplanation(int32_t df, int32_t max, double idf); SimilarityIDFExplanation(const String& exp, double idf); virtual ~SimilarityIDFExplanation(); LUCENE_CLASS(SimilarityIDFExplanation); protected: String exp; int32_t df; int32_t max; double idf; public: virtual String explain(); virtual double getIdf(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SimpleFSDirectory.h000066400000000000000000000060071217574114600243370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SIMPLEFSDIRECTORY_H #define _SIMPLEFSDIRECTORY_H #include "BufferedIndexInput.h" #include "BufferedIndexOutput.h" namespace Lucene { class InputFile : public LuceneObject { public: InputFile(const String& path); virtual ~InputFile(); LUCENE_CLASS(InputFile); public: static const int32_t FILE_EOF; static const int32_t FILE_ERROR; protected: ifstreamPtr file; int64_t position; int64_t length; public: void setPosition(int64_t position); int64_t getPosition(); int64_t getLength(); int32_t read(uint8_t* b, int32_t offset, int32_t length); void close(); bool isValid(); }; class SimpleFSIndexInput : public BufferedIndexInput { public: SimpleFSIndexInput(); SimpleFSIndexInput(const String& path, int32_t bufferSize, int32_t chunkSize); virtual ~SimpleFSIndexInput(); LUCENE_CLASS(SimpleFSIndexInput); protected: String path; InputFilePtr file; bool isClone; int32_t chunkSize; protected: virtual void readInternal(uint8_t* b, int32_t offset, int32_t length); virtual void seekInternal(int64_t pos); public: virtual int64_t length(); virtual void close(); /// Method used for testing. bool isValid(); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; class OutputFile : public LuceneObject { public: OutputFile(const String& path); virtual ~OutputFile(); LUCENE_CLASS(OutputFile); protected: ofstreamPtr file; String path; public: bool write(const uint8_t* b, int32_t offset, int32_t length); void close(); void setPosition(int64_t position); int64_t getLength(); void setLength(int64_t length); void flush(); bool isValid(); }; class SimpleFSIndexOutput : public BufferedIndexOutput { public: SimpleFSIndexOutput(const String& path); virtual ~SimpleFSIndexOutput(); LUCENE_CLASS(SimpleFSIndexOutput); protected: OutputFilePtr file; bool isOpen; public: virtual void flushBuffer(const uint8_t* b, int32_t offset, int32_t length); virtual void close(); virtual void seek(int64_t pos); virtual int64_t length(); virtual void setLength(int64_t length); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SimpleFSLockFactory.h000066400000000000000000000024201217574114600246060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SIMPLEFSLOCKFACTORY_H #define _SIMPLEFSLOCKFACTORY_H #include "Lock.h" namespace Lucene { class SimpleFSLock : public Lock { public: SimpleFSLock(const String& lockDir, const String& lockFileName); virtual ~SimpleFSLock(); LUCENE_CLASS(SimpleFSLock); public: String lockDir; String lockFile; public: /// Attempts to obtain exclusive access and immediately return upon success or failure. /// @return true if exclusive access is obtained. virtual bool obtain(); /// Releases exclusive access. virtual void release(); /// Returns true if the resource is currently locked. Note that one must still call {@link #obtain()} /// before using the resource. virtual bool isLocked(); /// Returns derived object name. virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SingleInstanceLockFactory.h000066400000000000000000000025151217574114600260370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SINGLEINSTANCELOCKFACTORY_H #define _SINGLEINSTANCELOCKFACTORY_H #include "Lock.h" namespace Lucene { class SingleInstanceLock : public Lock { public: SingleInstanceLock(HashSet locks, const String& lockName); virtual ~SingleInstanceLock(); LUCENE_CLASS(SingleInstanceLock); protected: HashSet locks; String lockName; public: /// Attempts to obtain exclusive access and immediately return /// upon success or failure. /// @return true if exclusive access is obtained. virtual bool obtain(); /// Releases exclusive access. virtual void release(); /// Returns true if the resource is currently locked. Note that /// one must still call {@link #obtain()} before using the resource. virtual bool isLocked(); /// Returns derived object name. virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SnapshotDeletionPolicy.h000066400000000000000000000034211217574114600254300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SNAPSHOTDELETIONPOLICY_H #define _SNAPSHOTDELETIONPOLICY_H #include "IndexCommit.h" namespace Lucene { class MyCommitPoint : public IndexCommit { public: MyCommitPoint(SnapshotDeletionPolicyPtr deletionPolicy, IndexCommitPtr cp); virtual ~MyCommitPoint(); LUCENE_CLASS(MyCommitPoint); protected: SnapshotDeletionPolicyWeakPtr _deletionPolicy; public: IndexCommitPtr cp; public: virtual String toString(); /// Get the segments file (segments_N) associated with this commit point. virtual String getSegmentsFileName(); /// Returns all index files referenced by this commit point. virtual HashSet getFileNames(); /// Returns the {@link Directory} for the index. virtual DirectoryPtr getDirectory(); /// Delete this commit point. virtual void deleteCommit(); virtual bool isDeleted(); /// Returns the version for this IndexCommit. virtual int64_t getVersion(); /// Returns the generation (the _N in segments_N) for this IndexCommit. virtual int64_t getGeneration(); /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. virtual MapStringString getUserData(); virtual bool isOptimized(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SortedVIntList.h000066400000000000000000000017351217574114600236700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SORTEDVINTLIST_H #define _SORTEDVINTLIST_H #include "DocIdSetIterator.h" namespace Lucene { class SortedDocIdSetIterator : public DocIdSetIterator { public: SortedDocIdSetIterator(SortedVIntListPtr list); virtual ~SortedDocIdSetIterator(); LUCENE_CLASS(SortedDocIdSetIterator); public: SortedVIntListWeakPtr _list; int32_t bytePos; int32_t lastInt; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); protected: void advance(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SpanFirstQuery.h000066400000000000000000000017431217574114600237310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SPANFIRSTQUERY_H #define _SPANFIRSTQUERY_H #include "Spans.h" namespace Lucene { class FirstSpans : public Spans { public: FirstSpans(SpanFirstQueryPtr query, SpansPtr spans); virtual ~FirstSpans(); LUCENE_CLASS(FirstSpans); protected: SpanFirstQueryPtr query; SpansPtr spans; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SpanNotQuery.h000066400000000000000000000021561217574114600234010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SPANNOTQUERY_H #define _SPANNOTQUERY_H #include "Spans.h" namespace Lucene { class NotSpans : public Spans { public: NotSpans(SpanNotQueryPtr query, SpansPtr includeSpans, SpansPtr excludeSpans); virtual ~NotSpans(); LUCENE_CLASS(NotSpans); protected: SpanNotQueryPtr query; SpansPtr includeSpans; bool moreInclude; SpansPtr excludeSpans; bool moreExclude; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_SpanOrQuery.h000066400000000000000000000026511217574114600232210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SPANORQUERY_H #define _SPANORQUERY_H #include "PriorityQueue.h" #include "Spans.h" namespace Lucene { class SpanQueue : public PriorityQueue { public: SpanQueue(int32_t size); virtual ~SpanQueue(); LUCENE_CLASS(SpanQueue); protected: virtual bool lessThan(const SpansPtr& first, const SpansPtr& second); }; class OrSpans : public Spans { public: OrSpans(SpanOrQueryPtr query, IndexReaderPtr reader); virtual ~OrSpans(); LUCENE_CLASS(OrSpans); protected: SpanOrQueryPtr query; IndexReaderPtr reader; SpanQueuePtr queue; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); protected: bool initSpanQueue(int32_t target); SpansPtr top(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_StandardAnalyzer.h000066400000000000000000000012621217574114600242340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _STANDARDANALYZER_H #define _STANDARDANALYZER_H #include "LuceneObject.h" namespace Lucene { class StandardAnalyzerSavedStreams : public LuceneObject { public: virtual ~StandardAnalyzerSavedStreams(); public: StandardTokenizerPtr tokenStream; TokenStreamPtr filteredTokenStream; }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_StopAnalyzer.h000066400000000000000000000013651217574114600234250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _STOPANALYZER_H #define _STOPANALYZER_H #include "LuceneObject.h" namespace Lucene { /// Filters LowerCaseTokenizer with StopFilter. class StopAnalyzerSavedStreams : public LuceneObject { public: virtual ~StopAnalyzerSavedStreams(); LUCENE_CLASS(StopAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_TermQuery.h000066400000000000000000000023111217574114600227170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _TERMQUERY_H #define _TERMQUERY_H #include "Weight.h" namespace Lucene { class TermWeight : public Weight { public: TermWeight(TermQueryPtr query, SearcherPtr searcher); virtual ~TermWeight(); LUCENE_CLASS(TermWeight); protected: TermQueryPtr query; SimilarityPtr similarity; double value; double idf; double queryNorm; double queryWeight; IDFExplanationPtr idfExp; public: virtual String toString(); virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_TimeLimitingCollector.h000066400000000000000000000016511217574114600252320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _TIMELIMITINGCOLLECTOR_H #define _TIMELIMITINGCOLLECTOR_H #include "LuceneThread.h" namespace Lucene { class TimerThread : public LuceneThread { public: TimerThread(); virtual ~TimerThread(); LUCENE_CLASS(TimerThread); protected: int64_t time; bool _stopThread; public: virtual void start(); virtual void run(); /// Get the timer value in milliseconds. int64_t getMilliseconds(); /// Stop timer thread. void stopThread(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_TopFieldCollector.h000066400000000000000000000212611217574114600243440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _TOPFIELDCOLLECTOR_H #define _TOPFIELDCOLLECTOR_H #include "TopDocsCollector.h" namespace Lucene { /// Implements a TopFieldCollector over one SortField criteria, without tracking document scores and maxScore. class OneComparatorNonScoringCollector : public TopFieldCollector { public: OneComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~OneComparatorNonScoringCollector(); LUCENE_CLASS(OneComparatorNonScoringCollector); public: FieldComparatorPtr comparator; int32_t reverseMul; public: virtual void initialize(); virtual void updateBottom(int32_t doc); virtual void collect(int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setScorer(ScorerPtr scorer); }; /// Implements a TopFieldCollector over one SortField criteria, without tracking document scores and maxScore, /// and assumes out of orderness in doc Ids collection. class OutOfOrderOneComparatorNonScoringCollector : public OneComparatorNonScoringCollector { public: OutOfOrderOneComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderOneComparatorNonScoringCollector(); LUCENE_CLASS(OutOfOrderOneComparatorNonScoringCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over one SortField criteria, while tracking document scores but no maxScore. class OneComparatorScoringNoMaxScoreCollector : public OneComparatorNonScoringCollector { public: OneComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~OneComparatorScoringNoMaxScoreCollector(); LUCENE_CLASS(OneComparatorScoringNoMaxScoreCollector); public: ScorerPtr scorer; public: virtual void updateBottom(int32_t doc, double score); virtual void collect(int32_t doc); virtual void setScorer(ScorerPtr scorer); }; /// Implements a TopFieldCollector over one SortField criteria, while tracking document scores but no maxScore, /// and assumes out of orderness in doc Ids collection. class OutOfOrderOneComparatorScoringNoMaxScoreCollector : public OneComparatorScoringNoMaxScoreCollector { public: OutOfOrderOneComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderOneComparatorScoringNoMaxScoreCollector(); LUCENE_CLASS(OutOfOrderOneComparatorScoringNoMaxScoreCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over one SortField criteria, with tracking document scores and maxScore. class OneComparatorScoringMaxScoreCollector : public OneComparatorNonScoringCollector { public: OneComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~OneComparatorScoringMaxScoreCollector(); LUCENE_CLASS(OneComparatorScoringMaxScoreCollector); public: ScorerPtr scorer; public: virtual void updateBottom(int32_t doc, double score); virtual void collect(int32_t doc); virtual void setScorer(ScorerPtr scorer); }; /// Implements a TopFieldCollector over one SortField criteria, with tracking document scores and maxScore, /// and assumes out of orderness in doc Ids collection. class OutOfOrderOneComparatorScoringMaxScoreCollector : public OneComparatorScoringMaxScoreCollector { public: OutOfOrderOneComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderOneComparatorScoringMaxScoreCollector(); LUCENE_CLASS(OutOfOrderOneComparatorScoringMaxScoreCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. class MultiComparatorNonScoringCollector : public TopFieldCollector { public: MultiComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~MultiComparatorNonScoringCollector(); LUCENE_CLASS(MultiComparatorNonScoringCollector); public: Collection comparators; Collection reverseMul; public: virtual void initialize(); virtual void updateBottom(int32_t doc); virtual void collect(int32_t doc); virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); virtual void setScorer(ScorerPtr scorer); }; /// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. class OutOfOrderMultiComparatorNonScoringCollector : public MultiComparatorNonScoringCollector { public: OutOfOrderMultiComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderMultiComparatorNonScoringCollector(); LUCENE_CLASS(OutOfOrderMultiComparatorNonScoringCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore. class MultiComparatorScoringMaxScoreCollector : public MultiComparatorNonScoringCollector { public: MultiComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~MultiComparatorScoringMaxScoreCollector(); LUCENE_CLASS(MultiComparatorScoringMaxScoreCollector); public: ScorerWeakPtr _scorer; public: virtual void updateBottom(int32_t doc, double score); virtual void collect(int32_t doc); virtual void setScorer(ScorerPtr scorer); }; /// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. class OutOfOrderMultiComparatorScoringMaxScoreCollector : public MultiComparatorScoringMaxScoreCollector { public: OutOfOrderMultiComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderMultiComparatorScoringMaxScoreCollector(); LUCENE_CLASS(OutOfOrderMultiComparatorScoringMaxScoreCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore. class MultiComparatorScoringNoMaxScoreCollector : public MultiComparatorNonScoringCollector { public: MultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~MultiComparatorScoringNoMaxScoreCollector(); LUCENE_CLASS(MultiComparatorScoringNoMaxScoreCollector); public: ScorerWeakPtr _scorer; public: virtual void updateBottom(int32_t doc, double score); virtual void collect(int32_t doc); virtual void setScorer(ScorerPtr scorer); }; /// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore, /// and assumes out of orderness in doc Ids collection. class OutOfOrderMultiComparatorScoringNoMaxScoreCollector : public MultiComparatorScoringNoMaxScoreCollector { public: OutOfOrderMultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderMultiComparatorScoringNoMaxScoreCollector(); LUCENE_CLASS(OutOfOrderMultiComparatorScoringNoMaxScoreCollector); public: virtual void collect(int32_t doc); virtual void setScorer(ScorerPtr scorer); virtual bool acceptsDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_TopScoreDocCollector.h000066400000000000000000000024031217574114600250170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _TOPSCOREDOCCOLLECTOR_H #define _TOPSCOREDOCCOLLECTOR_H #include "TopDocsCollector.h" namespace Lucene { /// Assumes docs are scored in order. class InOrderTopScoreDocCollector : public TopScoreDocCollector { public: InOrderTopScoreDocCollector(int32_t numHits); virtual ~InOrderTopScoreDocCollector(); LUCENE_CLASS(InOrderTopScoreDocCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Assumes docs are scored out of order. class OutOfOrderTopScoreDocCollector : public TopScoreDocCollector { public: OutOfOrderTopScoreDocCollector(int32_t numHits); virtual ~OutOfOrderTopScoreDocCollector(); LUCENE_CLASS(OutOfOrderTopScoreDocCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/include/_ValueSourceQuery.h000066400000000000000000000040711217574114600242520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _VALUESOURCEQUERY_H #define _VALUESOURCEQUERY_H #include "Weight.h" #include "Scorer.h" namespace Lucene { class ValueSourceWeight : public Weight { public: ValueSourceWeight(ValueSourceQueryPtr query, SearcherPtr searcher); virtual ~ValueSourceWeight(); LUCENE_CLASS(ValueSourceWeight); public: ValueSourceQueryPtr query; SimilarityPtr similarity; double queryNorm; double queryWeight; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); }; /// A scorer that (simply) matches all documents, and scores each document with the value of the value /// source in effect. As an example, if the value source is a (cached) field source, then value of that /// field in that document will be used. (assuming field is indexed for this doc, with a single token.) class ValueSourceScorer : public Scorer { public: ValueSourceScorer(SimilarityPtr similarity, IndexReaderPtr reader, ValueSourceWeightPtr weight); virtual ~ValueSourceScorer(); LUCENE_CLASS(ValueSourceScorer); public: ValueSourceWeightPtr weight; double qWeight; DocValuesPtr vals; TermDocsPtr termDocs; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); virtual int32_t advance(int32_t target); virtual double score(); }; } #endif LucenePlusPlus-rel_3.0.4/src/core/index/000077500000000000000000000000001217574114600201415ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/index/AbstractAllTermDocs.cpp000066400000000000000000000035521217574114600245070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AbstractAllTermDocs.h" namespace Lucene { AbstractAllTermDocs::AbstractAllTermDocs(int32_t maxDoc) { this->maxDoc = maxDoc; this->_doc = -1; } AbstractAllTermDocs::~AbstractAllTermDocs() { } void AbstractAllTermDocs::seek(TermPtr term) { if (!term) _doc = -1; else boost::throw_exception(UnsupportedOperationException()); } void AbstractAllTermDocs::seek(TermEnumPtr termEnum) { boost::throw_exception(UnsupportedOperationException()); } int32_t AbstractAllTermDocs::doc() { return _doc; } int32_t AbstractAllTermDocs::freq() { return 1; } bool AbstractAllTermDocs::next() { return skipTo(_doc + 1); } int32_t AbstractAllTermDocs::read(Collection docs, Collection freqs) { int32_t length = docs.size(); int32_t i = 0; while (i < length && _doc < maxDoc) { if (!isDeleted(_doc)) { docs[i] = _doc; freqs[i] = 1; ++i; } ++_doc; } return i; } bool AbstractAllTermDocs::skipTo(int32_t target) { _doc = target; while (_doc < maxDoc) { if (!isDeleted(_doc)) return true; ++_doc; } return false; } void AbstractAllTermDocs::close() { } } LucenePlusPlus-rel_3.0.4/src/core/index/AllTermDocs.cpp000066400000000000000000000015521217574114600230210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AllTermDocs.h" #include "SegmentReader.h" #include "BitVector.h" namespace Lucene { AllTermDocs::AllTermDocs(SegmentReaderPtr parent) : AbstractAllTermDocs(parent->maxDoc()) { SyncLock parentLock(parent); this->_deletedDocs = parent->deletedDocs; } AllTermDocs::~AllTermDocs() { } bool AllTermDocs::isDeleted(int32_t doc) { BitVectorPtr deletedDocs(_deletedDocs.lock()); return (deletedDocs && deletedDocs->get(_doc)); } } LucenePlusPlus-rel_3.0.4/src/core/index/BufferedDeletes.cpp000066400000000000000000000076361217574114600237110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BufferedDeletes.h" #include "MergeDocIDRemapper.h" namespace Lucene { BufferedDeletes::BufferedDeletes(bool doTermSort) { // doTermSort not used: always use sorted term map terms = MapTermNum::newInstance(); queries = MapQueryInt::newInstance(); docIDs = Collection::newInstance(); numTerms = 0; bytesUsed = 0; } BufferedDeletes::~BufferedDeletes() { } int32_t BufferedDeletes::size() { // We use numTerms not terms.size() intentionally, so that deletes by the same term // multiple times "count", ie if you ask to flush every 1000 deletes then even dup'd // terms are counted towards that 1000 return numTerms + queries.size() + docIDs.size(); } void BufferedDeletes::update(BufferedDeletesPtr in) { numTerms += in->numTerms; bytesUsed += in->bytesUsed; terms.putAll(in->terms.begin(), in->terms.end()); queries.putAll(in->queries.begin(), in->queries.end()); docIDs.addAll(in->docIDs.begin(), in->docIDs.end()); in->clear(); } void BufferedDeletes::clear() { terms.clear(); queries.clear(); docIDs.clear(); numTerms = 0; bytesUsed = 0; } void BufferedDeletes::addBytesUsed(int64_t b) { bytesUsed += b; } bool BufferedDeletes::any() { return (!terms.empty() || !docIDs.empty() || !queries.empty()); } void BufferedDeletes::remap(MergeDocIDRemapperPtr mapper, SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergedDocCount) { SyncLock syncLock(this); MapTermNum newDeleteTerms; // Remap delete-by-term if (!terms.empty()) { newDeleteTerms = MapTermNum::newInstance(); for (MapTermNum::iterator entry = terms.begin(); entry != terms.end(); ++entry) newDeleteTerms.put(entry->first, newLucene(mapper->remap(entry->second->getNum()))); } // Remap delete-by-docID Collection newDeleteDocIDs; if (!docIDs.empty()) { newDeleteDocIDs = Collection::newInstance(); for (Collection::iterator num = docIDs.begin(); num != docIDs.end(); ++num) newDeleteDocIDs.add(mapper->remap(*num)); } // Remap delete-by-query MapQueryInt newDeleteQueries; if (!queries.empty()) { newDeleteQueries = MapQueryInt::newInstance(); for (MapQueryInt::iterator entry = queries.begin(); entry != queries.end(); ++entry) newDeleteQueries.put(entry->first, mapper->remap(entry->second)); } if (newDeleteTerms) terms = newDeleteTerms; if (newDeleteDocIDs) docIDs = newDeleteDocIDs; if (newDeleteQueries) queries = newDeleteQueries; } Num::Num(int32_t num) { this->num = num; } int32_t Num::getNum() { return num; } void Num::setNum(int32_t num) { // Only record the new number if it's greater than the current one. This is important // because if multiple threads are replacing the same doc at nearly the same time, it's // possible that one thread that got a higher docID is scheduled before the other threads. this->num = std::max(this->num, num); } } LucenePlusPlus-rel_3.0.4/src/core/index/ByteBlockPool.cpp000066400000000000000000000103071217574114600233560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ByteBlockPool.h" #include "DocumentsWriter.h" #include "MiscUtils.h" namespace Lucene { // Size of each slice. These arrays should be at most 16 elements (index is encoded with 4 bits). First array // is just a compact way to encode X+1 with a max. Second array is the length of each slice, ie first slice is // 5 bytes, next slice is 14 bytes, etc. const int32_t ByteBlockPool::nextLevelArray[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; const int32_t ByteBlockPool::levelSizeArray[] = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200}; ByteBlockPool::ByteBlockPool(ByteBlockPoolAllocatorBasePtr allocator, bool trackAllocations) { buffers = Collection::newInstance(10); bufferUpto = -1; byteUpto = DocumentsWriter::BYTE_BLOCK_SIZE; byteOffset = -DocumentsWriter::BYTE_BLOCK_SIZE; this->allocator = allocator; this->trackAllocations = trackAllocations; } ByteBlockPool::~ByteBlockPool() { } int32_t ByteBlockPool::FIRST_LEVEL_SIZE() { return levelSizeArray[0]; } void ByteBlockPool::reset() { if (bufferUpto != -1) { // We allocated at least one buffer for (int32_t i = 0; i < bufferUpto; ++i) { // Fully zero fill buffers that we fully used MiscUtils::arrayFill(buffers[i].get(), 0, buffers[i].size(), 0); } // Partial zero fill the final buffer MiscUtils::arrayFill(buffers[bufferUpto].get(), 0, byteUpto, 0); if (bufferUpto > 0) { // Recycle all but the first buffer allocator->recycleByteBlocks(buffers, 1, 1 + bufferUpto); } // Re-use the first buffer bufferUpto = 0; byteUpto = 0; byteOffset = 0; buffer = buffers[0]; } } void ByteBlockPool::nextBuffer() { if (1 + bufferUpto == buffers.size()) buffers.resize((int32_t)((double)buffers.size() * 1.5)); buffers[1 + bufferUpto] = allocator->getByteBlock(trackAllocations); buffer = buffers[1 + bufferUpto]; ++bufferUpto; byteUpto = 0; byteOffset += DocumentsWriter::BYTE_BLOCK_SIZE; } int32_t ByteBlockPool::newSlice(int32_t size) { if (byteUpto > DocumentsWriter::BYTE_BLOCK_SIZE - size) nextBuffer(); int32_t upto = byteUpto; byteUpto += size; buffer[byteUpto - 1] = 16; return upto; } int32_t ByteBlockPool::allocSlice(ByteArray slice, int32_t upto) { int32_t level = slice[upto] & 15; int32_t newLevel = nextLevelArray[level]; int32_t newSize = levelSizeArray[newLevel]; // Maybe allocate another block if (byteUpto > DocumentsWriter::BYTE_BLOCK_SIZE - newSize) nextBuffer(); int32_t newUpto = byteUpto; int32_t offset = newUpto + byteOffset; byteUpto += newSize; // Copy forward the past 3 bytes (which we are about to overwrite with the forwarding address) buffer[newUpto] = slice[upto - 3]; buffer[newUpto + 1] = slice[upto - 2]; buffer[newUpto + 2] = slice[upto - 1]; // Write forwarding address at end of last slice slice[upto - 3] = (uint8_t)MiscUtils::unsignedShift(offset, 24); slice[upto - 2] = (uint8_t)MiscUtils::unsignedShift(offset, 16); slice[upto - 1] = (uint8_t)MiscUtils::unsignedShift(offset, 8); slice[upto] = (uint8_t)offset; // Write new level buffer[byteUpto - 1] = (uint8_t)(16 | newLevel); return (newUpto + 3); } ByteBlockPoolAllocatorBase::~ByteBlockPoolAllocatorBase() { } } LucenePlusPlus-rel_3.0.4/src/core/index/ByteSliceReader.cpp000066400000000000000000000114521217574114600236560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ByteSliceReader.h" #include "DocumentsWriter.h" #include "IndexOutput.h" #include "MiscUtils.h" namespace Lucene { ByteSliceReader::ByteSliceReader() { bufferUpto = 0; upto = 0; limit = 0; level = 0; bufferOffset = 0; endIndex = 0; } ByteSliceReader::~ByteSliceReader() { } void ByteSliceReader::init(ByteBlockPoolPtr pool, int32_t startIndex, int32_t endIndex) { BOOST_ASSERT(endIndex - startIndex >= 0); BOOST_ASSERT(startIndex >= 0); BOOST_ASSERT(endIndex >= 0); this->pool = pool; this->endIndex = endIndex; level = 0; bufferUpto = startIndex / DocumentsWriter::BYTE_BLOCK_SIZE; bufferOffset = bufferUpto * DocumentsWriter::BYTE_BLOCK_SIZE; buffer = pool->buffers[bufferUpto]; upto = startIndex & DocumentsWriter::BYTE_BLOCK_MASK; int32_t firstSize = ByteBlockPool::levelSizeArray[0]; if (startIndex + firstSize >= endIndex) { // There is only this one slice to read limit = endIndex & DocumentsWriter::BYTE_BLOCK_MASK; } else limit = upto + firstSize - 4; } bool ByteSliceReader::eof() { BOOST_ASSERT(upto + bufferOffset <= endIndex); return (upto + bufferOffset == endIndex); } uint8_t ByteSliceReader::readByte() { BOOST_ASSERT(!eof()); BOOST_ASSERT(upto <= limit); if (upto == limit) nextSlice(); return buffer[upto++]; } int64_t ByteSliceReader::writeTo(IndexOutputPtr out) { int64_t size = 0; while (true) { if (limit + bufferOffset == endIndex) { BOOST_ASSERT(endIndex - bufferOffset >= upto); out->writeBytes(buffer.get(), upto, limit - upto); size += limit - upto; break; } else { out->writeBytes(buffer.get(), upto, limit - upto); size += limit-upto; nextSlice(); } } return size; } void ByteSliceReader::nextSlice() { // Skip to our next slice int32_t nextIndex = ((buffer[limit] & 0xff) << 24) + ((buffer[1 + limit] & 0xff) << 16) + ((buffer[2 + limit] & 0xff) << 8) + (buffer[3 + limit] & 0xff); level = ByteBlockPool::nextLevelArray[level]; int32_t newSize = ByteBlockPool::levelSizeArray[level]; bufferUpto = nextIndex / DocumentsWriter::BYTE_BLOCK_SIZE; bufferOffset = bufferUpto * DocumentsWriter::BYTE_BLOCK_SIZE; this->buffer = pool->buffers[bufferUpto]; upto = nextIndex & DocumentsWriter::BYTE_BLOCK_MASK; if (nextIndex + newSize >= endIndex) { // We are advancing to the final slice BOOST_ASSERT(endIndex - nextIndex > 0); limit = endIndex - bufferOffset; } else { // This is not the final slice (subtract 4 for the forwarding address at the end of this new slice) limit = upto + newSize - 4; } } void ByteSliceReader::readBytes(uint8_t* b, int32_t offset, int32_t length) { while (length > 0) { int32_t numLeft = limit - upto; if (numLeft < length) { // Read entire slice MiscUtils::arrayCopy(buffer.get(), upto, b, offset, numLeft); offset += numLeft; length -= numLeft; nextSlice(); } else { // This slice is the last one MiscUtils::arrayCopy(buffer.get(), upto, b, offset, length); upto += length; break; } } } int64_t ByteSliceReader::getFilePointer() { boost::throw_exception(RuntimeException(L"not implemented")); return 0; } int64_t ByteSliceReader::length() { boost::throw_exception(RuntimeException(L"not implemented")); return 0; } void ByteSliceReader::seek(int64_t pos) { boost::throw_exception(RuntimeException(L"not implemented")); } void ByteSliceReader::close() { boost::throw_exception(RuntimeException(L"not implemented")); } } LucenePlusPlus-rel_3.0.4/src/core/index/ByteSliceWriter.cpp000066400000000000000000000043071217574114600237310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ByteSliceWriter.h" #include "DocumentsWriter.h" #include "MiscUtils.h" namespace Lucene { ByteSliceWriter::ByteSliceWriter(ByteBlockPoolPtr pool) { this->pool = pool; upto = 0; offset0 = 0; } ByteSliceWriter::~ByteSliceWriter() { } void ByteSliceWriter::init(int32_t address) { slice = pool->buffers[address >> DocumentsWriter::BYTE_BLOCK_SHIFT]; BOOST_ASSERT(slice); upto = (address & DocumentsWriter::BYTE_BLOCK_MASK); offset0 = address; BOOST_ASSERT(upto < slice.size()); } void ByteSliceWriter::writeByte(uint8_t b) { BOOST_ASSERT(slice); if (slice[upto] != 0) { upto = pool->allocSlice(slice, upto); slice = pool->buffer; offset0 = pool->byteOffset; BOOST_ASSERT(slice); } slice[upto++] = b; BOOST_ASSERT(upto != slice.size()); } void ByteSliceWriter::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { int32_t offsetEnd = offset + length; while (offset < offsetEnd) { if (slice[upto] != 0) { // End marker upto = pool->allocSlice(slice, upto); slice = pool->buffer; offset0 = pool->byteOffset; } slice[upto++] = b[offset++]; BOOST_ASSERT(upto != slice.size()); } } int32_t ByteSliceWriter::getAddress() { return upto + (offset0 & DocumentsWriter::BYTE_BLOCK_NOT_MASK); } void ByteSliceWriter::writeVInt(int32_t i) { while ((i & ~0x7f) != 0) { writeByte((uint8_t)((i & 0x7f) | 0x80)); i = MiscUtils::unsignedShift(i, 7); } writeByte((uint8_t)i); } } LucenePlusPlus-rel_3.0.4/src/core/index/CharBlockPool.cpp000066400000000000000000000027221217574114600233320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharBlockPool.h" #include "DocumentsWriter.h" namespace Lucene { CharBlockPool::CharBlockPool(DocumentsWriterPtr docWriter) { numBuffer = 0; bufferUpto = -1; charUpto = DocumentsWriter::CHAR_BLOCK_SIZE; charOffset = -DocumentsWriter::CHAR_BLOCK_SIZE; buffers = Collection::newInstance(10); this->_docWriter = docWriter; } CharBlockPool::~CharBlockPool() { } void CharBlockPool::reset() { DocumentsWriterPtr(_docWriter)->recycleCharBlocks(buffers, 1 + bufferUpto); bufferUpto = -1; charUpto = DocumentsWriter::CHAR_BLOCK_SIZE; charOffset = -DocumentsWriter::CHAR_BLOCK_SIZE; } void CharBlockPool::nextBuffer() { if (1 + bufferUpto == buffers.size()) buffers.resize((int32_t)((double)buffers.size() * 1.5)); buffers[1 + bufferUpto] = DocumentsWriterPtr(_docWriter)->getCharBlock(); buffer = buffers[1 + bufferUpto]; ++bufferUpto; charUpto = 0; charOffset += DocumentsWriter::CHAR_BLOCK_SIZE; } } LucenePlusPlus-rel_3.0.4/src/core/index/CheckIndex.cpp000066400000000000000000000726671217574114600226740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "CheckIndex.h" #include "_CheckIndex.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "SegmentReader.h" #include "Directory.h" #include "IndexInput.h" #include "BitVector.h" #include "Term.h" #include "TermEnum.h" #include "TermPositions.h" #include "Document.h" #include "FSDirectory.h" #include "InfoStream.h" #include "StringUtils.h" namespace Lucene { bool CheckIndex::_assertsOn = false; CheckIndex::CheckIndex(DirectoryPtr dir) { this->dir = dir; } CheckIndex::~CheckIndex() { } void CheckIndex::setInfoStream(InfoStreamPtr out) { infoStream = out; } void CheckIndex::msg(const String& msg) { if (infoStream) *infoStream << msg << L"\n"; } IndexStatusPtr CheckIndex::checkIndex() { return checkIndex(Collection()); } IndexStatusPtr CheckIndex::checkIndex(Collection onlySegments) { SegmentInfosPtr sis(newLucene()); IndexStatusPtr result(newLucene()); result->dir = dir; try { sis->read(dir); } catch (...) { msg(L"ERROR: could not read any segments file in directory"); result->missingSegments = true; return result; } int32_t numSegments = sis->size(); String segmentsFileName(sis->getCurrentSegmentFileName()); IndexInputPtr input; try { input = dir->openInput(segmentsFileName); } catch (...) { msg(L"ERROR: could not open segments file in directory"); result->cantOpenSegments = true; return result; } int32_t format = 0; try { format = input->readInt(); } catch (...) { msg(L"ERROR: could not read segment file version in directory"); result->missingSegmentVersion = true; if (input) input->close(); return result; } if (input) input->close(); String sFormat; bool skip = false; if (format == SegmentInfos::FORMAT) sFormat = L"FORMAT [Lucene Pre-2.1]"; if (format == SegmentInfos::FORMAT_LOCKLESS) sFormat = L"FORMAT_LOCKLESS [Lucene 2.1]"; else if (format == SegmentInfos::FORMAT_SINGLE_NORM_FILE) sFormat = L"FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; else if (format == SegmentInfos::FORMAT_SHARED_DOC_STORE) sFormat = L"FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; else { if (format == SegmentInfos::FORMAT_CHECKSUM) sFormat = L"FORMAT_CHECKSUM [Lucene 2.4]"; else if (format == SegmentInfos::FORMAT_DEL_COUNT) sFormat = L"FORMAT_DEL_COUNT [Lucene 2.4]"; else if (format == SegmentInfos::FORMAT_HAS_PROX) sFormat = L"FORMAT_HAS_PROX [Lucene 2.4]"; else if (format == SegmentInfos::FORMAT_USER_DATA) sFormat = L"FORMAT_USER_DATA [Lucene 2.9]"; else if (format == SegmentInfos::FORMAT_DIAGNOSTICS) sFormat = L"FORMAT_DIAGNOSTICS [Lucene 2.9]"; else if (format < SegmentInfos::CURRENT_FORMAT) { sFormat = L"int=" + StringUtils::toString(format) + L" [newer version of Lucene than this tool]"; skip = true; } else sFormat = StringUtils::toString(format) + L" [Lucene 1.3 or prior]"; } result->segmentsFileName = segmentsFileName; result->numSegments = numSegments; result->segmentFormat = sFormat; result->userData = sis->getUserData(); String userDataString; if (!sis->getUserData().empty()) userDataString = L" userData(size)=" + StringUtils::toString(sis->getUserData().size()); msg(L"Segments file=" + segmentsFileName + L" numSegments=" + StringUtils::toString(numSegments) + L" version=" + sFormat + userDataString); if (onlySegments) { result->partial = true; msg(L"\nChecking only these segments:"); for (Collection::iterator s = onlySegments.begin(); s != onlySegments.end(); ++s) msg(L" " + *s); result->segmentsChecked.addAll(onlySegments.begin(), onlySegments.end()); msg(L":"); } if (skip) { msg(L"\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on;" \ L" please re-compile this tool on the matching version of Lucene; exiting"); result->toolOutOfDate = true; return result; } result->newSegments = boost::dynamic_pointer_cast(sis->clone()); result->newSegments->clear(); for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr info(sis->info(i)); if (onlySegments && !onlySegments.contains(info->name)) continue; SegmentInfoStatusPtr segInfoStat(newLucene()); result->segmentInfos.add(segInfoStat); msg(L" name=" + info->name + L" docCount=" + StringUtils::toString(info->docCount)); segInfoStat->name = info->name; segInfoStat->docCount = info->docCount; int32_t toLoseDocCount = info->docCount; SegmentReaderPtr reader; try { msg(L" compound=" + StringUtils::toString(info->getUseCompoundFile())); segInfoStat->compound = info->getUseCompoundFile(); msg(L" hasProx=" + StringUtils::toString(info->getHasProx())); segInfoStat->hasProx = info->getHasProx(); msg(L" numFiles=" + StringUtils::toString(info->files().size())); segInfoStat->numFiles = info->files().size(); msg(L" size (MB)=" + StringUtils::toString((double)info->sizeInBytes() / (double)(1024 * 1024))); segInfoStat->sizeMB = (double)info->sizeInBytes() / (double)(1024 * 1024); MapStringString diagnostics(info->getDiagnostics()); segInfoStat->diagnostics = diagnostics; if (!diagnostics.empty()) msg(L" diagnostics (size)= " + StringUtils::toString(diagnostics.size())); int32_t docStoreOffset = info->getDocStoreOffset(); if (docStoreOffset != -1) { msg(L" docStoreOffset=" + StringUtils::toString(docStoreOffset)); segInfoStat->docStoreOffset = docStoreOffset; msg(L" docStoreSegment=" + info->getDocStoreSegment()); segInfoStat->docStoreSegment = info->getDocStoreSegment(); msg(L" docStoreIsCompoundFile=" + StringUtils::toString(info->getDocStoreIsCompoundFile())); segInfoStat->docStoreCompoundFile = info->getDocStoreIsCompoundFile(); } String delFileName(info->getDelFileName()); if (delFileName.empty()) { msg(L" no deletions"); segInfoStat->hasDeletions = false; } else { msg(L" has deletions [delFileName=" + delFileName + L"]"); segInfoStat->hasDeletions = true; segInfoStat->deletionsFileName = delFileName; } msg(L" test: open reader........."); reader = SegmentReader::get(true, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); segInfoStat->openReaderPassed = true; int32_t numDocs = reader->numDocs(); toLoseDocCount = numDocs; if (reader->hasDeletions()) { if (reader->deletedDocs->count() != info->getDelCount()) { boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + L" vs deletedDocs.count()=" + StringUtils::toString(reader->deletedDocs->count()))); } if (reader->deletedDocs->count() > reader->maxDoc()) { boost::throw_exception(RuntimeException(L"too many deleted docs: maxDoc()=" + StringUtils::toString(reader->maxDoc()) + L" vs deletedDocs.count()=" + StringUtils::toString(reader->deletedDocs->count()))); } if (info->docCount - numDocs != info->getDelCount()) { boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + L" vs reader=" + StringUtils::toString((info->docCount - numDocs)))); } segInfoStat->numDeleted = info->docCount - numDocs; msg(L"OK [" + StringUtils::toString(segInfoStat->numDeleted) + L" deleted docs]"); } else { if (info->getDelCount() != 0) { boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + L" vs reader=" + StringUtils::toString(info->docCount - numDocs))); } msg(L"OK"); } if (reader->maxDoc() != info->docCount) { boost::throw_exception(RuntimeException(L"SegmentReader.maxDoc() " + StringUtils::toString(reader->maxDoc()) + L" != SegmentInfos.docCount " + StringUtils::toString(info->docCount))); } msg(L" test: fields.............."); HashSet fieldNames(reader->getFieldNames(IndexReader::FIELD_OPTION_ALL)); msg(L"OK [" + StringUtils::toString(fieldNames.size()) + L" fields]"); segInfoStat->numFields = fieldNames.size(); // Test Field Norms segInfoStat->fieldNormStatus = testFieldNorms(Collection::newInstance(fieldNames.begin(), fieldNames.end()), reader); // Test the Term Index segInfoStat->termIndexStatus = testTermIndex(info, reader); // Test Stored Fields segInfoStat->storedFieldStatus = testStoredFields(info, reader); // Test Term Vectors segInfoStat->termVectorStatus = testTermVectors(info, reader); // Rethrow the first exception we encountered. This will cause stats for failed segments to be incremented properly if (!segInfoStat->fieldNormStatus->error.isNull()) boost::throw_exception(RuntimeException(L"Field Norm test failed")); else if (!segInfoStat->termIndexStatus->error.isNull()) boost::throw_exception(RuntimeException(L"Term Index test failed")); else if (!segInfoStat->storedFieldStatus->error.isNull()) boost::throw_exception(RuntimeException(L"Stored Field test failed")); else if (!segInfoStat->termVectorStatus->error.isNull()) boost::throw_exception(RuntimeException(L"Term Vector test failed")); msg(L""); } catch (...) { msg(L"FAILED"); String comment(L"fixIndex() would remove reference to this segment"); msg(L" WARNING: " + comment + L"; full exception:"); msg(L""); result->totLoseDocCount += toLoseDocCount; ++result->numBadSegments; if (reader) reader->close(); continue; } if (reader) reader->close(); // Keeper result->newSegments->add(boost::dynamic_pointer_cast(info->clone())); } if (result->numBadSegments == 0) { result->clean = true; msg(L"No problems were detected with this index.\n"); } else { msg(L"WARNING: " + StringUtils::toString(result->numBadSegments) + L" broken segments (containing " + StringUtils::toString(result->totLoseDocCount) + L" documents) detected"); } return result; } FieldNormStatusPtr CheckIndex::testFieldNorms(Collection fieldNames, SegmentReaderPtr reader) { FieldNormStatusPtr status(newLucene()); try { // Test Field Norms msg(L" test: field norms........."); ByteArray b(ByteArray::newInstance(reader->maxDoc())); for (Collection::iterator fieldName = fieldNames.begin(); fieldName != fieldNames.end(); ++fieldName) { if (reader->hasNorms(*fieldName)) { reader->norms(*fieldName, b, 0); ++status->totFields; } } msg(L"OK [" + StringUtils::toString(status->totFields) + L" fields]"); } catch (LuceneException& e) { msg(L"ERROR [" + e.getError() + L"]"); status->error = e; } return status; } TermIndexStatusPtr CheckIndex::testTermIndex(SegmentInfoPtr info, SegmentReaderPtr reader) { TermIndexStatusPtr status(newLucene()); try { msg(L" test: terms, freq, prox..."); TermEnumPtr termEnum(reader->terms()); TermPositionsPtr termPositions(reader->termPositions()); // Used only to count up # deleted docs for this term MySegmentTermDocsPtr myTermDocs(newLucene(reader)); int32_t maxDoc = reader->maxDoc(); while (termEnum->next()) { ++status->termCount; TermPtr term(termEnum->term()); int32_t docFreq = termEnum->docFreq(); termPositions->seek(term); int32_t lastDoc = -1; int32_t freq0 = 0; status->totFreq += docFreq; while (termPositions->next()) { ++freq0; int32_t doc = termPositions->doc(); int32_t freq = termPositions->freq(); if (doc <= lastDoc) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L" <= lastDoc " + StringUtils::toString(lastDoc))); } if (doc >= maxDoc) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L" >= maxDoc " + StringUtils::toString(maxDoc))); } lastDoc = doc; if (freq <= 0) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L": freq " + StringUtils::toString(freq) + L" is out of bounds")); } int32_t lastPos = -1; status->totPos += freq; for (int32_t j = 0; j < freq; ++j) { int32_t pos = termPositions->nextPosition(); if (pos < -1) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L": pos " + StringUtils::toString(pos) + L" is out of bounds")); } if (pos < lastPos) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L": pos " + StringUtils::toString(pos) + L" < lastPos " + StringUtils::toString(lastPos))); } lastPos = pos; } } // Now count how many deleted docs occurred in this term int32_t delCount; if (reader->hasDeletions()) { myTermDocs->seek(term); while (myTermDocs->next()) { } delCount = myTermDocs->delCount; } else delCount = 0; if (freq0 + delCount != docFreq) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L"docFreq=" + StringUtils::toString(docFreq) + L" != num docs seen " + StringUtils::toString(freq0) + L" + num docs deleted " + StringUtils::toString(delCount))); } } msg(L"OK [" + StringUtils::toString(status->termCount) + L" terms; " + StringUtils::toString(status->totFreq) + L" terms/docs pairs; " + StringUtils::toString(status->totPos) + L" tokens]"); } catch (LuceneException& e) { msg(L"ERROR [" + e.getError() + L"]"); status->error = e; } return status; } StoredFieldStatusPtr CheckIndex::testStoredFields(SegmentInfoPtr info, SegmentReaderPtr reader) { StoredFieldStatusPtr status(newLucene()); try { msg(L" test: stored fields......."); // Scan stored fields for all documents for (int32_t j = 0; j < info->docCount; ++j) { if (!reader->isDeleted(j)) { ++status->docCount; DocumentPtr doc(reader->document(j, FieldSelectorPtr())); status->totFields += doc->getFields().size(); } } // Validate docCount if (status->docCount != reader->numDocs()) { boost::throw_exception(RuntimeException(L"docCount=" + StringUtils::toString(status->docCount) + L" but saw " + StringUtils::toString(status->docCount) + L" undeleted docs")); } msg(L"OK [" + StringUtils::toString(status->totFields) + L" total field count; avg " + StringUtils::toString((double)status->totFields / (double)status->docCount) + L" fields per doc]"); } catch (LuceneException& e) { msg(L"ERROR [" + e.getError() + L"]"); status->error = e; } return status; } TermVectorStatusPtr CheckIndex::testTermVectors(SegmentInfoPtr info, SegmentReaderPtr reader) { TermVectorStatusPtr status(newLucene()); try { msg(L" test: term vectors........"); for (int32_t j = 0; j < info->docCount; ++j) { if (!reader->isDeleted(j)) { ++status->docCount; Collection tfv(reader->getTermFreqVectors(j)); if (tfv) status->totVectors += tfv.size(); } } msg(L"OK [" + StringUtils::toString(status->totVectors) + L" total vector count; avg " + StringUtils::toString((double)status->totVectors / (double)status->docCount) + L" term/freq vector fields per doc]"); } catch (LuceneException& e) { msg(L"ERROR [" + e.getError() + L"]"); status->error = e; } return status; } void CheckIndex::fixIndex(IndexStatusPtr result) { if (result->partial) boost::throw_exception(IllegalArgumentException(L"can only fix an index that was fully checked (this status checked a subset of segments)")); result->newSegments->commit(result->dir); } bool CheckIndex::testAsserts() { _assertsOn = true; return true; } bool CheckIndex::assertsOn() { BOOST_ASSERT(testAsserts()); return _assertsOn; } int CheckIndex::main(Collection args) { bool doFix = false; Collection onlySegments(Collection::newInstance()); String indexPath; for (Collection::iterator arg = args.begin(); arg != args.end(); ++arg) { if (*arg == L"-fix") doFix = true; else if (*arg == L"-segment") { if (arg + 1 == args.end()) { std::wcout << L"ERROR: missing name for -segment option\n"; return 1; } ++arg; onlySegments.add(*arg); } else { if (!indexPath.empty()) { std::wcout << L"ERROR: unexpected extra argument '" << *arg << L"'\n"; return 1; } indexPath = *arg; } } if (indexPath.empty()) { std::wcout << L"\nERROR: index path not specified\n"; std::wcout << L"Usage: CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n"; std::wcout << L"\n"; std::wcout << L" -fix: actually write a new segments_N file, removing any problematic segments\n"; std::wcout << L" -segment X: only check the specified segments. This can be specified multiple\n"; std::wcout << L" times, to check more than one segment, eg '-segment _2 -segment _a'.\n"; std::wcout << L" You can't use this with the -fix option\n"; std::wcout << L"\n"; std::wcout << L"**WARNING**: -fix should only be used on an emergency basis as it will cause\n"; std::wcout << L"documents (perhaps many) to be permanently removed from the index. Always make\n"; std::wcout << L"a backup copy of your index before running this! Do not run this tool on an index\n"; std::wcout << L"that is actively being written to. You have been warned!\n"; std::wcout << L"\n"; std::wcout << L"Run without -fix, this tool will open the index, report version information\n"; std::wcout << L"and report any exceptions it hits and what action it would take if -fix were\n"; std::wcout << L"specified. With -fix, this tool will remove any segments that have issues and\n"; std::wcout << L"write a new segments_N file. This means all documents contained in the affected\n"; std::wcout << L"segments will be removed.\n"; std::wcout << L"\n"; std::wcout << L"This tool exits with exit code 1 if the index cannot be opened or has any\n"; std::wcout << L"corruption, else 0.\n\n"; return 1; } if (!assertsOn()) std::wcout << L"\nNOTE: testing will be more thorough if you run with '-ea', so assertions are enabled\n"; if (onlySegments.empty()) onlySegments.reset(); else if (doFix) { std::wcout << L"ERROR: cannot specify both -fix and -segment\n"; return 1; } std::wcout << L"\nOpening index @ " << indexPath << L"\n\n"; DirectoryPtr dir; try { dir = FSDirectory::open(indexPath); } catch (...) { std::wcout << L"ERROR: could not open directory \"" << indexPath << L"\"; exiting\n"; return 1; } CheckIndexPtr checker(newLucene(dir)); checker->setInfoStream(newLucene()); IndexStatusPtr result(checker->checkIndex(onlySegments)); if (result->missingSegments) return 1; if (!result->clean) { if (!doFix) std::wcout << L"WARNING: would write new segments file, and " << result->totLoseDocCount << L" documents would be lost, if -fix were specified\n\n"; else { std::wcout << L"WARNING: " << result->totLoseDocCount + L" documents will be lost\n"; std::wcout << L"NOTE: will write new segments file in 5 seconds; this will remove " << result->totLoseDocCount; std::wcout << L" docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!\n"; for (int32_t sec = 0; sec < 5; ++sec) { LuceneThread::threadSleep(1000); std::wcout << L" " << (5 - sec) << L"...\n"; } std::wcout << L"Writing...\n"; checker->fixIndex(result); std::wcout << L"OK\n"; std::wcout << L"Wrote new segments file \"" << result->newSegments->getCurrentSegmentFileName() << L"\"\n"; } } std::wcout << L"\n"; return ((result && result->clean) ? 0 : 1); } IndexStatus::IndexStatus() { clean = false; missingSegments = false; cantOpenSegments = false; missingSegmentVersion = false; numSegments = false; segmentInfos = Collection::newInstance(); segmentsChecked = Collection::newInstance(); toolOutOfDate = false; totLoseDocCount = 0; numBadSegments = 0; partial = false; } IndexStatus::~IndexStatus() { } SegmentInfoStatus::SegmentInfoStatus() { docCount = 0; compound = false; numFiles = 0; sizeMB = 0; docStoreOffset = -1; docStoreCompoundFile = false; hasDeletions = false; numDeleted = 0; openReaderPassed = false; numFields = 0; hasProx = false; } SegmentInfoStatus::~SegmentInfoStatus() { } FieldNormStatus::FieldNormStatus() { totFields = 0; } FieldNormStatus::~FieldNormStatus() { } TermIndexStatus::TermIndexStatus() { termCount = 0; totFreq = 0; totPos = 0; } TermIndexStatus::~TermIndexStatus() { } StoredFieldStatus::StoredFieldStatus() { docCount = 0; totFields = 0; } StoredFieldStatus::~StoredFieldStatus() { } TermVectorStatus::TermVectorStatus() { docCount = 0; totVectors = 0; } TermVectorStatus::~TermVectorStatus() { } MySegmentTermDocs::MySegmentTermDocs(SegmentReaderPtr p) : SegmentTermDocs(p) { delCount = 0; } MySegmentTermDocs::~MySegmentTermDocs() { } void MySegmentTermDocs::seek(TermPtr term) { SegmentTermDocs::seek(term); delCount = 0; } void MySegmentTermDocs::skippingDoc() { ++delCount; } } LucenePlusPlus-rel_3.0.4/src/core/index/CompoundFileReader.cpp000066400000000000000000000156701217574114600243650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CompoundFileReader.h" namespace Lucene { CompoundFileReader::CompoundFileReader(DirectoryPtr dir, const String& name) { ConstructReader(dir, name, BufferedIndexInput::BUFFER_SIZE); } CompoundFileReader::CompoundFileReader(DirectoryPtr dir, const String& name, int32_t readBufferSize) { ConstructReader(dir, name, readBufferSize); } CompoundFileReader::~CompoundFileReader() { } void CompoundFileReader::ConstructReader(DirectoryPtr dir, const String& name, int32_t readBufferSize) { directory = dir; fileName = name; this->readBufferSize = readBufferSize; this->entries = MapStringFileEntryPtr::newInstance(); bool success = false; LuceneException finally; try { stream = dir->openInput(name, readBufferSize); // read the directory and init files int32_t count = stream->readVInt(); FileEntryPtr entry; for (int32_t i = 0; i < count; ++i) { int64_t offset = stream->readLong(); String id(stream->readString()); if (entry) { // set length of the previous entry entry->length = offset - entry->offset; } entry = newInstance(); entry->offset = offset; entries.put(id, entry); } // set the length of the final entry if (entry) entry->length = stream->length() - entry->offset; success = true; } catch (LuceneException& e) { finally = e; } if (!success && stream) { try { stream->close(); } catch (...) { } } finally.throwException(); } DirectoryPtr CompoundFileReader::getDirectory() { return directory; } String CompoundFileReader::getName() { return fileName; } void CompoundFileReader::close() { SyncLock syncLock(this); if (!stream) boost::throw_exception(IOException(L"Already closed")); entries.clear(); stream->close(); stream.reset(); } IndexInputPtr CompoundFileReader::openInput(const String& name) { SyncLock syncLock(this); // Default to readBufferSize passed in when we were opened return openInput(name, readBufferSize); } IndexInputPtr CompoundFileReader::openInput(const String& name, int32_t bufferSize) { SyncLock syncLock(this); if (!stream) boost::throw_exception(IOException(L"Stream closed")); MapStringFileEntryPtr::iterator entry = entries.find(name); if (entry == entries.end()) boost::throw_exception(IOException(L"No sub-file with id " + name + L" found")); return newLucene(stream, entry->second->offset, entry->second->length, readBufferSize); } HashSet CompoundFileReader::listAll() { HashSet res(HashSet::newInstance()); for (MapStringFileEntryPtr::iterator entry = entries.begin(); entry != entries.end(); ++entry) res.add(entry->first); return res; } bool CompoundFileReader::fileExists(const String& name) { return entries.contains(name); } uint64_t CompoundFileReader::fileModified(const String& name) { return directory->fileModified(fileName); } void CompoundFileReader::touchFile(const String& name) { directory->touchFile(fileName); } void CompoundFileReader::deleteFile(const String& name) { boost::throw_exception(UnsupportedOperationException()); } void CompoundFileReader::renameFile(const String& from, const String& to) { boost::throw_exception(UnsupportedOperationException()); } int64_t CompoundFileReader::fileLength(const String& name) { MapStringFileEntryPtr::iterator entry = entries.find(name); if (entry == entries.end()) boost::throw_exception(IOException(L"File " + name + L" does not exist")); return entry->second->length; } IndexOutputPtr CompoundFileReader::createOutput(const String& name) { boost::throw_exception(UnsupportedOperationException()); return IndexOutputPtr(); } LockPtr CompoundFileReader::makeLock(const String& name) { boost::throw_exception(UnsupportedOperationException()); return LockPtr(); } CSIndexInput::CSIndexInput() { fileOffset = 0; _length = 0; } CSIndexInput::CSIndexInput(IndexInputPtr base, int64_t fileOffset, int64_t length) : BufferedIndexInput(BufferedIndexInput::BUFFER_SIZE) { this->base = boost::dynamic_pointer_cast(base->clone()); this->fileOffset = fileOffset; this->_length = length; } CSIndexInput::CSIndexInput(IndexInputPtr base, int64_t fileOffset, int64_t length, int32_t readBufferSize) : BufferedIndexInput(readBufferSize) { this->base = boost::dynamic_pointer_cast(base->clone()); this->fileOffset = fileOffset; this->_length = length; } CSIndexInput::~CSIndexInput() { } void CSIndexInput::readInternal(uint8_t* b, int32_t offset, int32_t length) { int64_t start = getFilePointer(); if (start + length > _length) boost::throw_exception(IOException(L"read past EOF")); base->seek(fileOffset + start); base->readBytes(b, offset, length, false); } void CSIndexInput::seekInternal(int64_t pos) { } void CSIndexInput::close() { base->close(); } int64_t CSIndexInput::length() { return _length; } LuceneObjectPtr CSIndexInput::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); CSIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(BufferedIndexInput::clone(clone))); cloneIndexInput->base = boost::dynamic_pointer_cast(this->base->clone()); cloneIndexInput->fileOffset = fileOffset; cloneIndexInput->_length = _length; return cloneIndexInput; } } LucenePlusPlus-rel_3.0.4/src/core/index/CompoundFileWriter.cpp000066400000000000000000000152661217574114600244400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CompoundFileWriter.h" #include "SegmentMerger.h" #include "Directory.h" #include "IndexInput.h" #include "IndexOutput.h" #include "StringUtils.h" namespace Lucene { CompoundFileWriter::CompoundFileWriter(DirectoryPtr dir, const String& name, CheckAbortPtr checkAbort) { if (!dir) boost::throw_exception(IllegalArgumentException(L"directory cannot be empty")); if (name.empty()) boost::throw_exception(IllegalArgumentException(L"name cannot be empty")); this->checkAbort = checkAbort; _directory = dir; fileName = name; ids = HashSet::newInstance(); entries = Collection::newInstance(); merged = false; } CompoundFileWriter::~CompoundFileWriter() { } DirectoryPtr CompoundFileWriter::getDirectory() { return DirectoryPtr(_directory); } String CompoundFileWriter::getName() { return fileName; } void CompoundFileWriter::addFile(const String& file) { if (merged) boost::throw_exception(IllegalStateException(L"Can't add extensions after merge has been called")); if (file.empty()) boost::throw_exception(IllegalArgumentException(L"file cannot be empty")); if (!ids.add(file)) boost::throw_exception(IllegalArgumentException(L"File " + file + L" already added")); FileEntry entry; entry.file = file; entries.add(entry); } void CompoundFileWriter::close() { if (merged) boost::throw_exception(IllegalStateException(L"Merge already performed")); if (entries.empty()) boost::throw_exception(IllegalStateException(L"No entries to merge have been defined")); merged = true; DirectoryPtr directory(_directory); // open the compound stream IndexOutputPtr os; LuceneException finally; try { os = directory->createOutput(fileName); // Write the number of entries os->writeVInt(entries.size()); // Write the directory with all offsets at 0. Remember the positions of directory entries so that we // can adjust the offsets later int64_t totalSize = 0; for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) { fe->directoryOffset = os->getFilePointer(); os->writeLong(0); // for now os->writeString(fe->file); totalSize += directory->fileLength(fe->file); } // Pre-allocate size of file as optimization - this can potentially help IO performance as we write the // file and also later during searching. It also uncovers a disk-full situation earlier and hopefully // without actually filling disk to 100% int64_t finalLength = totalSize + os->getFilePointer(); os->setLength(finalLength); // Open the files and copy their data into the stream. Remember the locations of each file's data section. ByteArray buffer(ByteArray::newInstance(16384)); for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) { fe->dataOffset = os->getFilePointer(); copyFile(*fe, os, buffer); } // Write the data offsets into the directory of the compound stream for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) { os->seek(fe->directoryOffset); os->writeLong(fe->dataOffset); } BOOST_ASSERT(finalLength == os->length()); // Close the output stream. Set the os to null before trying to close so that if an exception occurs during // the close, the finally clause below will not attempt to close the stream the second time. IndexOutputPtr tmp(os); os.reset(); tmp->close(); } catch (LuceneException& e) { finally = e; } if (os) { try { os->close(); } catch (LuceneException&) { } } finally.throwException(); } void CompoundFileWriter::copyFile(const FileEntry& source, IndexOutputPtr os, ByteArray buffer) { IndexInputPtr is; DirectoryPtr directory(_directory); LuceneException finally; try { int64_t startPtr = os->getFilePointer(); is = directory->openInput(source.file); int64_t length = is->length(); int64_t remainder = length; int64_t chunk = buffer.size(); while (remainder > 0) { int32_t len = (int32_t)std::min(chunk, remainder); is->readBytes(buffer.get(), 0, len, false); os->writeBytes(buffer.get(), len); remainder -= len; if (checkAbort) { // Roughly every 2 MB we will check if it's time to abort checkAbort->work(80); } } // Verify that remainder is 0 if (remainder != 0) { boost::throw_exception(IOException(L"Non-zero remainder length after copying: " + StringUtils::toString(remainder) + L" (id: " + source.file + L", length: " + StringUtils::toString(length) + L", buffer size: " + StringUtils::toString(chunk) + L")")); } // Verify that the output length diff is equal to original file int64_t endPtr = os->getFilePointer(); int64_t diff = endPtr - startPtr; if (diff != length) { boost::throw_exception(IOException(L"Difference in the output file offsets " + StringUtils::toString(diff) + L" does not match the original file length " + StringUtils::toString(length))); } } catch (LuceneException& e) { finally = e; } if (is) is->close(); finally.throwException(); } } LucenePlusPlus-rel_3.0.4/src/core/index/ConcurrentMergeScheduler.cpp000066400000000000000000000262021217574114600256100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ConcurrentMergeScheduler.h" #include "_ConcurrentMergeScheduler.h" #include "IndexWriter.h" #include "TestPoint.h" #include "StringUtils.h" namespace Lucene { Collection ConcurrentMergeScheduler::allInstances; bool ConcurrentMergeScheduler::anyExceptions = false; ConcurrentMergeScheduler::ConcurrentMergeScheduler() { mergeThreadPriority = -1; mergeThreads = SetMergeThread::newInstance(); maxThreadCount = 1; suppressExceptions = false; closed = false; } ConcurrentMergeScheduler::~ConcurrentMergeScheduler() { } void ConcurrentMergeScheduler::initialize() { // Only for testing if (allInstances) addMyself(); } void ConcurrentMergeScheduler::setMaxThreadCount(int32_t count) { if (count < 1) boost::throw_exception(IllegalArgumentException(L"count should be at least 1")); maxThreadCount = count; } int32_t ConcurrentMergeScheduler::getMaxThreadCount() { return maxThreadCount; } int32_t ConcurrentMergeScheduler::getMergeThreadPriority() { SyncLock syncLock(this); initMergeThreadPriority(); return mergeThreadPriority; } void ConcurrentMergeScheduler::setMergeThreadPriority(int32_t pri) { SyncLock syncLock(this); if (pri > LuceneThread::MAX_PRIORITY || pri < LuceneThread::MIN_PRIORITY) { boost::throw_exception(IllegalArgumentException(L"priority must be in range " + StringUtils::toString(LuceneThread::MIN_PRIORITY) + L" .. " + StringUtils::toString(LuceneThread::MAX_PRIORITY) + L" inclusive")); } mergeThreadPriority = pri; for (SetMergeThread::iterator merge = mergeThreads.begin(); merge != mergeThreads.end(); ++merge) (*merge)->setThreadPriority(pri); } bool ConcurrentMergeScheduler::verbose() { return (!_writer.expired() && IndexWriterPtr(_writer)->verbose()); } void ConcurrentMergeScheduler::message(const String& message) { if (verbose() && !_writer.expired()) IndexWriterPtr(_writer)->message(L"CMS: " + message); } void ConcurrentMergeScheduler::initMergeThreadPriority() { SyncLock syncLock(this); if (mergeThreadPriority == -1) { // Default to slightly higher priority than our calling thread mergeThreadPriority = std::min(LuceneThread::NORM_PRIORITY + 1, LuceneThread::MAX_PRIORITY); } } void ConcurrentMergeScheduler::close() { sync(); closed = true; } void ConcurrentMergeScheduler::sync() { SyncLock syncLock(this); while (mergeThreadCount() > 0) { message(L"now wait for threads; currently " + StringUtils::toString(mergeThreads.size()) + L" still running"); wait(1000); } mergeThreads.clear(); } int32_t ConcurrentMergeScheduler::mergeThreadCount() { SyncLock syncLock(this); int32_t count = 0; for (SetMergeThread::iterator merge = mergeThreads.begin(); merge != mergeThreads.end(); ++merge) { if ((*merge)->isAlive()) ++count; } return count; } void ConcurrentMergeScheduler::merge(IndexWriterPtr writer) { BOOST_ASSERT(!writer->holdsLock()); this->_writer = writer; initMergeThreadPriority(); dir = writer->getDirectory(); // First, quickly run through the newly proposed merges and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to the queue. If we are way behind on merging, // many of these newly proposed merges will likely already be registered. message(L"now merge"); message(L" index: " + writer->segString()); // Iterate, pulling from the IndexWriter's queue of pending merges, until it's empty while (true) { OneMergePtr merge(writer->getNextMerge()); if (!merge) { message(L" no more merges pending; now return"); return; } // We do this with the primary thread to keep deterministic assignment of segment names writer->mergeInit(merge); bool success = false; LuceneException finally; try { SyncLock syncLock(this); MergeThreadPtr merger; while (mergeThreadCount() >= maxThreadCount) { message(L" too many merge threads running; stalling..."); wait(1000); } message(L" consider merge " + merge->segString(dir)); BOOST_ASSERT(mergeThreadCount() < maxThreadCount); // OK to spawn a new merge thread to handle this merge merger = getMergeThread(writer, merge); mergeThreads.add(merger); message(L" launch new thread"); merger->start(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) writer->mergeFinish(merge); finally.throwException(); } } void ConcurrentMergeScheduler::doMerge(OneMergePtr merge) { TestScope testScope(L"ConcurrentMergeScheduler", L"doMerge"); IndexWriterPtr(_writer)->merge(merge); } MergeThreadPtr ConcurrentMergeScheduler::getMergeThread(IndexWriterPtr writer, OneMergePtr merge) { SyncLock syncLock(this); MergeThreadPtr thread(newLucene(shared_from_this(), writer, merge)); thread->setThreadPriority(mergeThreadPriority); return thread; } void ConcurrentMergeScheduler::handleMergeException(const LuceneException& exc) { // When an exception is hit during merge, IndexWriter removes any partial files and then // allows another merge to run. If whatever caused the error is not transient then the // exception will keep happening, so, we sleep here to avoid saturating CPU in such cases LuceneThread::threadSleep(250); // pause 250 msec boost::throw_exception(MergeException()); } bool ConcurrentMergeScheduler::anyUnhandledExceptions() { if (!allInstances) boost::throw_exception(RuntimeException(L"setTestMode() was not called")); SyncLock instancesLock(&allInstances); for (Collection::iterator instance = allInstances.begin(); instance != allInstances.end(); ++instance) (*instance)->sync(); bool v = anyExceptions; anyExceptions = false; return v; } void ConcurrentMergeScheduler::clearUnhandledExceptions() { SyncLock instancesLock(&allInstances); anyExceptions = false; } void ConcurrentMergeScheduler::addMyself() { SyncLock instancesLock(&allInstances); int32_t size = allInstances.size(); int32_t upto = 0; for (int32_t i = 0; i < size; ++i) { ConcurrentMergeSchedulerPtr other(allInstances[i]); if (!(other->closed && other->mergeThreadCount() == 0)) { // Keep this one for now: it still has threads or may spawn new threads allInstances[upto++] = other; } allInstances.remove(allInstances.begin() + upto, allInstances.end()); allInstances.add(shared_from_this()); } } void ConcurrentMergeScheduler::setSuppressExceptions() { suppressExceptions = true; } void ConcurrentMergeScheduler::clearSuppressExceptions() { suppressExceptions = false; } void ConcurrentMergeScheduler::setTestMode() { allInstances = Collection::newInstance(); } MergeThread::MergeThread(ConcurrentMergeSchedulerPtr merger, IndexWriterPtr writer, OneMergePtr startMerge) { this->_merger = merger; this->_writer = writer; this->startMerge = startMerge; } MergeThread::~MergeThread() { } void MergeThread::setRunningMerge(OneMergePtr merge) { ConcurrentMergeSchedulerPtr merger(_merger); SyncLock syncLock(merger); runningMerge = merge; } OneMergePtr MergeThread::getRunningMerge() { ConcurrentMergeSchedulerPtr merger(_merger); SyncLock syncLock(merger); return runningMerge; } void MergeThread::setThreadPriority(int32_t pri) { try { setPriority(pri); } catch (...) { } } void MergeThread::run() { // First time through the while loop we do the merge that we were started with OneMergePtr merge(this->startMerge); ConcurrentMergeSchedulerPtr merger(_merger); LuceneException finally; try { merger->message(L" merge thread: start"); IndexWriterPtr writer(_writer); while (true) { setRunningMerge(merge); merger->doMerge(merge); // Subsequent times through the loop we do any new merge that writer says is necessary merge = writer->getNextMerge(); if (merge) { writer->mergeInit(merge); merger->message(L" merge thread: do another merge " + merge->segString(merger->dir)); } else break; } merger->message(L" merge thread: done"); } catch (MergeAbortedException&) { // Ignore the exception if it was due to abort } catch (LuceneException& e) { if (!merger->suppressExceptions) { // suppressExceptions is normally only set during testing. merger->anyExceptions = true; merger->handleMergeException(e); } else finally = e; } { SyncLock syncLock(merger); merger->notifyAll(); bool removed = merger->mergeThreads.remove(shared_from_this()); BOOST_ASSERT(removed); } finally.throwException(); } } LucenePlusPlus-rel_3.0.4/src/core/index/DefaultSkipListReader.cpp000066400000000000000000000070141217574114600250410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DefaultSkipListReader.h" #include "MiscUtils.h" namespace Lucene { DefaultSkipListReader::DefaultSkipListReader(IndexInputPtr skipStream, int32_t maxSkipLevels, int32_t skipInterval) : MultiLevelSkipListReader(skipStream, maxSkipLevels, skipInterval) { currentFieldStoresPayloads = false; lastFreqPointer = 0; lastProxPointer = 0; lastPayloadLength = 0; freqPointer = Collection::newInstance(maxSkipLevels); proxPointer = Collection::newInstance(maxSkipLevels); payloadLength = Collection::newInstance(maxSkipLevels); MiscUtils::arrayFill(freqPointer.begin(), 0, freqPointer.size(), 0); MiscUtils::arrayFill(proxPointer.begin(), 0, proxPointer.size(), 0); MiscUtils::arrayFill(payloadLength.begin(), 0, payloadLength.size(), 0); } DefaultSkipListReader::~DefaultSkipListReader() { } void DefaultSkipListReader::init(int64_t skipPointer, int64_t freqBasePointer, int64_t proxBasePointer, int32_t df, bool storesPayloads) { MultiLevelSkipListReader::init(skipPointer, df); this->currentFieldStoresPayloads = storesPayloads; lastFreqPointer = freqBasePointer; lastProxPointer = proxBasePointer; MiscUtils::arrayFill(freqPointer.begin(), 0, freqPointer.size(), freqBasePointer); MiscUtils::arrayFill(proxPointer.begin(), 0, proxPointer.size(), proxBasePointer); MiscUtils::arrayFill(payloadLength.begin(), 0, payloadLength.size(), 0); } int64_t DefaultSkipListReader::getFreqPointer() { return lastFreqPointer; } int64_t DefaultSkipListReader::getProxPointer() { return lastProxPointer; } int32_t DefaultSkipListReader::getPayloadLength() { return lastPayloadLength; } void DefaultSkipListReader::seekChild(int32_t level) { MultiLevelSkipListReader::seekChild(level); freqPointer[level] = lastFreqPointer; proxPointer[level] = lastProxPointer; payloadLength[level] = lastPayloadLength; } void DefaultSkipListReader::setLastSkipData(int32_t level) { MultiLevelSkipListReader::setLastSkipData(level); lastFreqPointer = freqPointer[level]; lastProxPointer = proxPointer[level]; lastPayloadLength = payloadLength[level]; } int32_t DefaultSkipListReader::readSkipData(int32_t level, IndexInputPtr skipStream) { int32_t delta; if (currentFieldStoresPayloads) { // The current field stores payloads. If the doc delta is odd then we have to read the current // payload length because it differs from the length of the previous payload delta = skipStream->readVInt(); if ((delta & 1) != 0) payloadLength[level] = skipStream->readVInt(); delta = MiscUtils::unsignedShift(delta, 1); } else delta = skipStream->readVInt(); freqPointer[level] += skipStream->readVInt(); proxPointer[level] += skipStream->readVInt(); return delta; } } LucenePlusPlus-rel_3.0.4/src/core/index/DefaultSkipListWriter.cpp000066400000000000000000000125331217574114600251150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DefaultSkipListWriter.h" #include "IndexOutput.h" #include "MiscUtils.h" namespace Lucene { DefaultSkipListWriter::DefaultSkipListWriter(int32_t skipInterval, int32_t numberOfSkipLevels, int32_t docCount, IndexOutputPtr freqOutput, IndexOutputPtr proxOutput) : MultiLevelSkipListWriter(skipInterval, numberOfSkipLevels, docCount) { curDoc = 0; curStorePayloads = false; curPayloadLength = 0; curFreqPointer = 0; curProxPointer = 0; this->freqOutput = freqOutput; this->proxOutput = proxOutput; lastSkipDoc = Collection::newInstance(numberOfSkipLevels); lastSkipPayloadLength = Collection::newInstance(numberOfSkipLevels); lastSkipFreqPointer = Collection::newInstance(numberOfSkipLevels); lastSkipProxPointer = Collection::newInstance(numberOfSkipLevels); } DefaultSkipListWriter::~DefaultSkipListWriter() { } void DefaultSkipListWriter::setFreqOutput(IndexOutputPtr freqOutput) { this->freqOutput = freqOutput; } void DefaultSkipListWriter::setProxOutput(IndexOutputPtr proxOutput) { this->proxOutput = proxOutput; } void DefaultSkipListWriter::setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength) { this->curDoc = doc; this->curStorePayloads = storePayloads; this->curPayloadLength = payloadLength; this->curFreqPointer = freqOutput->getFilePointer(); if (proxOutput) this->curProxPointer = proxOutput->getFilePointer(); } void DefaultSkipListWriter::resetSkip() { MultiLevelSkipListWriter::resetSkip(); MiscUtils::arrayFill(lastSkipDoc.begin(), 0, lastSkipDoc.size(), 0); MiscUtils::arrayFill(lastSkipPayloadLength.begin(), 0, lastSkipPayloadLength.size(), -1); // we don't have to write the first length in the skip list MiscUtils::arrayFill(lastSkipFreqPointer.begin(), 0, lastSkipFreqPointer.size(), freqOutput->getFilePointer()); if (proxOutput) MiscUtils::arrayFill(lastSkipProxPointer.begin(), 0, lastSkipProxPointer.size(), proxOutput->getFilePointer()); } void DefaultSkipListWriter::writeSkipData(int32_t level, IndexOutputPtr skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point if (curStorePayloads) { int32_t delta = curDoc - lastSkipDoc[level]; if (curPayloadLength == lastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, so we don't store // the length again skipBuffer->writeVInt(delta * 2); } else { // the payload length is different from the previous one. We shift the DocSkip, set the lowest // bit and store the current payload length as VInt. skipBuffer->writeVInt(delta * 2 + 1); skipBuffer->writeVInt(curPayloadLength); lastSkipPayloadLength[level] = curPayloadLength; } } else { // current field does not store payloads skipBuffer->writeVInt(curDoc - lastSkipDoc[level]); } skipBuffer->writeVInt((int32_t)(curFreqPointer - lastSkipFreqPointer[level])); skipBuffer->writeVInt((int32_t)(curProxPointer - lastSkipProxPointer[level])); lastSkipDoc[level] = curDoc; lastSkipFreqPointer[level] = curFreqPointer; lastSkipProxPointer[level] = curProxPointer; } } LucenePlusPlus-rel_3.0.4/src/core/index/DirectoryReader.cpp000066400000000000000000001321641217574114600237430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "DirectoryReader.h" #include "_DirectoryReader.h" #include "Directory.h" #include "ReadOnlyDirectoryReader.h" #include "IndexWriter.h" #include "_IndexWriter.h" #include "IndexCommit.h" #include "IndexDeletionPolicy.h" #include "IndexFileDeleter.h" #include "IndexFileNames.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "SegmentReader.h" #include "DefaultSimilarity.h" #include "ReadOnlySegmentReader.h" #include "SegmentMergeInfo.h" #include "Lock.h" #include "FieldCache.h" #include "MiscUtils.h" namespace Lucene { DirectoryReader::DirectoryReader(DirectoryPtr directory, SegmentInfosPtr sis, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) { normsCache = MapStringByteArray::newInstance(); _maxDoc = 0; _numDocs = -1; _hasDeletions = false; synced = HashSet::newInstance(); stale = false; rollbackHasChanges = false; this->_directory = directory; this->readOnly = readOnly; this->segmentInfos = sis; this->deletionPolicy = deletionPolicy; this->termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously properly sync'd HashSet files(sis->files(directory, true)); synced.addAll(files.begin(), files.end()); } // To reduce the chance of hitting FileNotFound (and having to retry), we open segments in // reverse because IndexWriter merges & deletes the newest segments first. Collection readers(Collection::newInstance(sis->size())); for (int32_t i = sis->size() - 1; i >= 0; --i) { bool success = false; LuceneException finally; try { readers[i] = SegmentReader::get(readOnly, sis->info(i), termInfosIndexDivisor); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // Close all readers we had opened for (Collection::iterator closeReader = readers.begin(); closeReader != readers.end(); ++closeReader) { try { if (*closeReader) (*closeReader)->close(); } catch (...) { // keep going - we want to clean up as much as possible } } } finally.throwException(); } _initialize(readers); } DirectoryReader::DirectoryReader(IndexWriterPtr writer, SegmentInfosPtr infos, int32_t termInfosIndexDivisor) { normsCache = MapStringByteArray::newInstance(); _maxDoc = 0; _numDocs = -1; _hasDeletions = false; synced = HashSet::newInstance(); stale = false; rollbackHasChanges = false; this->_directory = writer->getDirectory(); this->readOnly = true; this->segmentInfos = infos; this->segmentInfosStart = boost::dynamic_pointer_cast(infos->clone()); this->termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously properly sync'd HashSet files(infos->files(_directory, true)); synced.addAll(files.begin(), files.end()); } // IndexWriter synchronizes externally before calling us, which ensures infos will not change; so there's // no need to process segments in reverse order int32_t numSegments = infos->size(); Collection readers(Collection::newInstance(numSegments)); DirectoryPtr dir(writer->getDirectory()); int32_t upto = 0; for (int32_t i = 0; i < numSegments; ++i) { bool success = false; LuceneException finally; try { SegmentInfoPtr info(infos->info(i)); if (info->dir == dir) readers[upto++] = boost::dynamic_pointer_cast(writer->readerPool->getReadOnlyClone(info, true, termInfosIndexDivisor)); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // Close all readers we had opened for (--upto; upto >= 0; --upto) { try { if (readers[upto]) readers[upto]->close(); } catch (...) { // keep going - we want to clean up as much as possible } } } finally.throwException(); } this->_writer = writer; if (upto < readers.size()) { // This means some segments were in a foreign Directory readers.resize(upto); } _initialize(readers); } DirectoryReader::DirectoryReader(DirectoryPtr directory, SegmentInfosPtr infos, Collection oldReaders, Collection oldStarts, MapStringByteArray oldNormsCache, bool readOnly, bool doClone, int32_t termInfosIndexDivisor) { normsCache = MapStringByteArray::newInstance(); _maxDoc = 0; _numDocs = -1; _hasDeletions = false; synced = HashSet::newInstance(); stale = false; rollbackHasChanges = false; this->_directory = directory; this->readOnly = readOnly; this->segmentInfos = infos; this->termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously properly sync'd HashSet files(infos->files(directory, true)); synced.addAll(files.begin(), files.end()); } // we put the old SegmentReaders in a map, that allows us to lookup a reader using its segment name MapStringInt segmentReaders(MapStringInt::newInstance()); if (oldReaders) { int32_t segReader = 0; // create a Map SegmentName->SegmentReader for (Collection::iterator reader = oldReaders.begin(); reader != oldReaders.end(); ++reader) segmentReaders.put((*reader)->getSegmentName(), segReader++); } Collection newReaders(Collection::newInstance(infos->size())); // remember which readers are shared between the old and the re-opened DirectoryReader - we have to incRef those readers Collection readerShared(Collection::newInstance(infos->size())); for (int32_t i = infos->size() - 1; i >= 0; --i) { // find SegmentReader for this segment MapStringInt::iterator oldReaderIndex = segmentReaders.find(infos->info(i)->name); if (oldReaderIndex == segmentReaders.end()) { // this is a new segment, no old SegmentReader can be reused newReaders[i].reset(); } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[oldReaderIndex->second]; } bool success = false; LuceneException finally; try { SegmentReaderPtr newReader; if (!newReaders[i] || infos->info(i)->getUseCompoundFile() != newReaders[i]->getSegmentInfo()->getUseCompoundFile()) { // We should never see a totally new segment during cloning BOOST_ASSERT(!doClone); // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader::get(readOnly, infos->info(i), termInfosIndexDivisor); } else newReader = newReaders[i]->reopenSegment(infos->info(i), doClone, readOnly); if (newReader == newReaders[i]) { // this reader will be shared between the old and the new one, so we must incRef it readerShared[i] = true; newReader->incRef(); } else { readerShared[i] = false; newReaders[i] = newReader; } success = true; } catch (LuceneException& e) { finally = e; } if (!success) { for (++i; i < infos->size(); ++i) { if (newReaders[i]) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, we can close it newReaders[i]->close(); } else { // this subReader is also used by the old reader, so instead closing we must decRef it newReaders[i]->decRef(); } } catch (...) { // keep going - we want to clean up as much as possible } } } } finally.throwException(); } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache _initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache) { for (MapStringByteArray::iterator entry = oldNormsCache.begin(); entry != oldNormsCache.end(); ++entry) { if (!hasNorms(entry->first)) continue; ByteArray bytes(ByteArray::newInstance(maxDoc())); for (int32_t i = 0; i < subReaders.size(); ++i) { MapStringInt::iterator oldReaderIndex = segmentReaders.find(subReaders[i]->getSegmentName()); // this SegmentReader was not re-opened, we can copy all of its norms if (oldReaderIndex != segmentReaders.end() && (oldReaders[oldReaderIndex->second] == subReaders[i] || oldReaders[oldReaderIndex->second]->_norms.get(entry->first) == subReaders[i]->_norms.get(entry->first))) { // we don't have to synchronize here: either this constructor is called from a SegmentReader, in which // case no old norms cache is present, or it is called from MultiReader.reopen(), which is synchronized MiscUtils::arrayCopy(entry->second.get(), oldStarts[oldReaderIndex->second], bytes.get(), starts[i], starts[i + 1] - starts[i]); } else subReaders[i]->norms(entry->first, bytes, starts[i]); } normsCache.put(entry->first, bytes); // update cache } } } DirectoryReader::~DirectoryReader() { } void DirectoryReader::_initialize(Collection subReaders) { this->subReaders = subReaders; starts = Collection::newInstance(subReaders.size() + 1); for (int32_t i = 0; i < subReaders.size(); ++i) { starts[i] = _maxDoc; _maxDoc += subReaders[i]->maxDoc(); // compute maxDocs if (subReaders[i]->hasDeletions()) _hasDeletions = true; } starts[subReaders.size()] = _maxDoc; if (!readOnly) maxIndexVersion = SegmentInfos::readCurrentVersion(_directory); } IndexReaderPtr DirectoryReader::open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, IndexCommitPtr commit, bool readOnly, int32_t termInfosIndexDivisor) { return newLucene(readOnly, deletionPolicy, termInfosIndexDivisor, newLucene(), directory)->run(commit); } LuceneObjectPtr DirectoryReader::clone(LuceneObjectPtr other) { try { return DirectoryReader::clone(readOnly, other); // Preserve current readOnly } catch (LuceneException& e) { boost::throw_exception(RuntimeException(e.getError())); } return DirectoryReaderPtr(); } LuceneObjectPtr DirectoryReader::clone(bool openReadOnly, LuceneObjectPtr other) { SyncLock syncLock(this); DirectoryReaderPtr newReader(doReopen(boost::dynamic_pointer_cast(segmentInfos->clone()), true, openReadOnly)); if (shared_from_this() != newReader) newReader->deletionPolicy = deletionPolicy; newReader->_writer = _writer; // If we're cloning a non-readOnly reader, move the writeLock (if there is one) to the new reader if (!openReadOnly && writeLock) { // In near real-time search, reader is always readonly BOOST_ASSERT(_writer.expired()); newReader->writeLock = writeLock; newReader->_hasChanges = _hasChanges; newReader->_hasDeletions = _hasDeletions; writeLock.reset(); _hasChanges = false; } return newReader; } IndexReaderPtr DirectoryReader::reopen() { // Preserve current readOnly return doReopen(readOnly, IndexCommitPtr()); } IndexReaderPtr DirectoryReader::reopen(bool openReadOnly) { return doReopen(openReadOnly, IndexCommitPtr()); } IndexReaderPtr DirectoryReader::reopen(IndexCommitPtr commit) { return doReopen(true, commit); } IndexReaderPtr DirectoryReader::doReopenFromWriter(bool openReadOnly, IndexCommitPtr commit) { BOOST_ASSERT(readOnly); if (!openReadOnly) boost::throw_exception(IllegalArgumentException(L"a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)")); if (commit) boost::throw_exception(IllegalArgumentException(L"a reader obtained from IndexWriter.getReader() cannot currently accept a commit")); return IndexWriterPtr(_writer)->getReader(); } IndexReaderPtr DirectoryReader::doReopen(bool openReadOnly, IndexCommitPtr commit) { ensureOpen(); BOOST_ASSERT(!commit || openReadOnly); IndexWriterPtr writer(_writer.lock()); // If we were obtained by writer.getReader(), re-ask the writer to get a new reader. if (writer) return doReopenFromWriter(openReadOnly, commit); else return doReopenNoWriter(openReadOnly, commit); } IndexReaderPtr DirectoryReader::doReopenNoWriter(bool openReadOnly, IndexCommitPtr commit) { SyncLock syncLock(this); if (!commit) { if (_hasChanges) { // We have changes, which means we are not readOnly BOOST_ASSERT(!readOnly); // and we hold the write lock BOOST_ASSERT(writeLock); // so no other writer holds the write lock, which means no changes could have been done to the index BOOST_ASSERT(isCurrent()); if (openReadOnly) return boost::dynamic_pointer_cast(clone(openReadOnly)); else return shared_from_this(); } else if (isCurrent()) { if (openReadOnly != readOnly) { // Just fallback to clone return boost::dynamic_pointer_cast(clone(openReadOnly)); } else return shared_from_this(); } } else { if (_directory != commit->getDirectory()) boost::throw_exception(IOException(L"the specified commit does not match the specified Directory")); if (segmentInfos && commit->getSegmentsFileName() == segmentInfos->getCurrentSegmentFileName()) { if (readOnly != openReadOnly) { // Just fallback to clone return boost::dynamic_pointer_cast(clone(openReadOnly)); } else return shared_from_this(); } } return newLucene(shared_from_this(), openReadOnly, newLucene(), _directory)->run(commit); } DirectoryReaderPtr DirectoryReader::doReopen(SegmentInfosPtr infos, bool doClone, bool openReadOnly) { SyncLock syncLock(this); if (openReadOnly) return newLucene(_directory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor); else return newLucene(_directory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor); } int64_t DirectoryReader::getVersion() { ensureOpen(); return segmentInfos->getVersion(); } Collection DirectoryReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num return subReaders[i]->getTermFreqVectors(docNumber - starts[i]); // dispatch to segment } TermFreqVectorPtr DirectoryReader::getTermFreqVector(int32_t docNumber, const String& field) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num return subReaders[i]->getTermFreqVector(docNumber - starts[i], field); } void DirectoryReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num subReaders[i]->getTermFreqVector(docNumber - starts[i], field, mapper); } void DirectoryReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num subReaders[i]->getTermFreqVector(docNumber - starts[i], mapper); } bool DirectoryReader::isOptimized() { ensureOpen(); return (segmentInfos->size() == 1 && !hasDeletions()); } int32_t DirectoryReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) // NOTE: multiple threads may wind up init'ing numDocs... but that's harmless if (_numDocs == -1) // check cache { int32_t n = 0; // cache miss - recompute for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) n += (*reader)->numDocs(); // sum from readers _numDocs = n; } return _numDocs; } int32_t DirectoryReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return _maxDoc; } DocumentPtr DirectoryReader::document(int32_t n, FieldSelectorPtr fieldSelector) { ensureOpen(); int32_t i = readerIndex(n); // find segment num return subReaders[i]->document(n - starts[i], fieldSelector); // dispatch to segment reader } bool DirectoryReader::isDeleted(int32_t n) { // Don't call ensureOpen() here (it could affect performance) int32_t i = readerIndex(n); // find segment num return subReaders[i]->isDeleted(n - starts[i]); // dispatch to segment reader } bool DirectoryReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return _hasDeletions; } void DirectoryReader::doDelete(int32_t docNum) { _numDocs = -1; // invalidate cache int32_t i = readerIndex(docNum); // find segment num subReaders[i]->deleteDocument(docNum - starts[i]); // dispatch to segment reader _hasDeletions = true; } void DirectoryReader::doUndeleteAll() { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) (*reader)->undeleteAll(); _hasDeletions = false; _numDocs = -1; // invalidate cache } int32_t DirectoryReader::readerIndex(int32_t n) { return readerIndex(n, this->starts, this->subReaders.size()); } int32_t DirectoryReader::readerIndex(int32_t n, Collection starts, int32_t numSubReaders) { // Binary search to locate reader Collection::iterator reader = std::upper_bound(starts.begin(), starts.begin() + numSubReaders, n); return (int32_t)(std::distance(starts.begin(), reader) - 1); } bool DirectoryReader::hasNorms(const String& field) { ensureOpen(); for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { if ((*reader)->hasNorms(field)) return true; } return false; } ByteArray DirectoryReader::norms(const String& field) { SyncLock syncLock(this); ensureOpen(); ByteArray bytes(normsCache.get(field)); if (bytes) return bytes; // cache hit if (!hasNorms(field)) return ByteArray(); bytes = ByteArray::newInstance(maxDoc()); for (int32_t i = 0; i < subReaders.size(); ++i) subReaders[i]->norms(field, bytes, starts[i]); normsCache.put(field, bytes); // update cache return bytes; } void DirectoryReader::norms(const String& field, ByteArray norms, int32_t offset) { SyncLock syncLock(this); ensureOpen(); ByteArray bytes(normsCache.get(field)); if (!bytes && !hasNorms(field)) MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); else if (bytes) // cache hit MiscUtils::arrayCopy(bytes.get(), 0, norms.get(), offset, maxDoc()); else { for (int32_t i = 0; i < subReaders.size(); ++i) // read from segments subReaders[i]->norms(field, norms, offset + starts[i]); } } void DirectoryReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { { SyncLock normsLock(&normsCache); normsCache.remove(field); // clear cache } int32_t i = readerIndex(doc); // find segment num subReaders[i]->setNorm(doc - starts[i], field, value); // dispatch } TermEnumPtr DirectoryReader::terms() { ensureOpen(); return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts, TermPtr()); } TermEnumPtr DirectoryReader::terms(TermPtr t) { ensureOpen(); return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts, t); } int32_t DirectoryReader::docFreq(TermPtr t) { ensureOpen(); int32_t total = 0; // sum freqs in segments for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) total += (*reader)->docFreq(t); return total; } TermDocsPtr DirectoryReader::termDocs() { ensureOpen(); return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts); } TermPositionsPtr DirectoryReader::termPositions() { ensureOpen(); return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts); } void DirectoryReader::acquireWriteLock() { if (readOnly) { // NOTE: we should not reach this code with the core IndexReader classes; // however, an external subclass of IndexReader could reach this. ReadOnlySegmentReader::noWrite(); } if (segmentInfos) { ensureOpen(); if (stale) boost::throw_exception(StaleReaderException(L"IndexReader out of date and no longer valid for delete, undelete, or setNorm operations")); if (!writeLock) { LockPtr writeLock(_directory->makeLock(IndexWriter::WRITE_LOCK_NAME)); if (!writeLock->obtain((int32_t)IndexWriter::WRITE_LOCK_TIMEOUT)) // obtain write lock boost::throw_exception(LockObtainFailedException(L"Index locked for write: " + writeLock->toString())); this->writeLock = writeLock; // we have to check whether index has changed since this reader was opened. // if so, this reader is no longer valid for deletion if (SegmentInfos::readCurrentVersion(_directory) > maxIndexVersion) { stale = true; this->writeLock->release(); this->writeLock.reset(); boost::throw_exception(StaleReaderException(L"IndexReader out of date and no longer valid for delete, undelete, or setNorm operations")); } } } } void DirectoryReader::doCommit(MapStringString commitUserData) { if (_hasChanges) { segmentInfos->setUserData(commitUserData); // Default deleter (for backwards compatibility) is KeepOnlyLastCommitDeleter IndexFileDeleterPtr deleter(newLucene(_directory, deletionPolicy ? deletionPolicy : newLucene(), segmentInfos, InfoStreamPtr(), DocumentsWriterPtr(), synced)); segmentInfos->updateGeneration(deleter->getLastSegmentInfos()); // Checkpoint the state we are about to change, in case we have to roll back startCommit(); bool success = false; LuceneException finally; try { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) (*reader)->commit(); // Sync all files we just wrote HashSet files(segmentInfos->files(_directory, false)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (!synced.contains(*fileName)) { BOOST_ASSERT(_directory->fileExists(*fileName)); _directory->sync(*fileName); synced.add(*fileName); } } segmentInfos->commit(_directory); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // Rollback changes that were made to SegmentInfos but failed to get [fully] // committed. This way this reader instance remains consistent (matched to what's // actually in the index) rollbackCommit(); // Recompute deletable files & remove them (so partially written .del files, etc, // are removed) deleter->refresh(); } finally.throwException(); // Have the deleter remove any now unreferenced files due to this commit deleter->checkpoint(segmentInfos, true); deleter->close(); maxIndexVersion = segmentInfos->getVersion(); if (writeLock) { writeLock->release(); // release write lock writeLock.reset(); } } _hasChanges = false; } void DirectoryReader::startCommit() { rollbackHasChanges = _hasChanges; for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) (*reader)->startCommit(); } void DirectoryReader::rollbackCommit() { _hasChanges = rollbackHasChanges; for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) (*reader)->rollbackCommit(); } MapStringString DirectoryReader::getCommitUserData() { ensureOpen(); return segmentInfos->getUserData(); } bool DirectoryReader::isCurrent() { ensureOpen(); IndexWriterPtr writer(_writer.lock()); if (!writer || writer->isClosed()) { // we loaded SegmentInfos from the directory return (SegmentInfos::readCurrentVersion(_directory) == segmentInfos->getVersion()); } else return writer->nrtIsCurrent(segmentInfosStart); } void DirectoryReader::doClose() { SyncLock syncLock(this); LuceneException ioe; normsCache.reset(); for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { // try to close each reader, even if an exception is thrown try { (*reader)->decRef(); } catch (LuceneException& e) { if (ioe.isNull()) ioe = e; } } // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is // generally not a good idea): FieldCache::DEFAULT()->purge(shared_from_this()); // throw the first exception ioe.throwException(); } HashSet DirectoryReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); return getFieldNames(fieldOption, Collection::newInstance(subReaders.begin(), subReaders.end())); } HashSet DirectoryReader::getFieldNames(FieldOption fieldOption, Collection subReaders) { // maintain a unique set of field names HashSet fieldSet(HashSet::newInstance()); for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { HashSet names((*reader)->getFieldNames(fieldOption)); fieldSet.addAll(names.begin(), names.end()); } return fieldSet; } Collection DirectoryReader::getSequentialSubReaders() { return Collection::newInstance(subReaders.begin(), subReaders.end()); } DirectoryPtr DirectoryReader::directory() { // Don't ensureOpen here -- in certain cases, when a cloned/reopened reader needs to commit, it may call // this method on the closed original reader return _directory; } int32_t DirectoryReader::getTermInfosIndexDivisor() { return termInfosIndexDivisor; } IndexCommitPtr DirectoryReader::getIndexCommit() { return newLucene(segmentInfos, _directory); } Collection DirectoryReader::listCommits(DirectoryPtr dir) { HashSet files(dir->listAll()); Collection commits(Collection::newInstance()); SegmentInfosPtr latest(newLucene()); latest->read(dir); int64_t currentGen = latest->getGeneration(); commits.add(newLucene(latest, dir)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (boost::starts_with(*fileName, IndexFileNames::SEGMENTS()) && *fileName != IndexFileNames::SEGMENTS_GEN() && SegmentInfos::generationFromSegmentsFileName(*fileName) < currentGen) { SegmentInfosPtr sis(newLucene()); try { // IOException allowed to throw there, in case segments_N is corrupt sis->read(dir, *fileName); } catch (FileNotFoundException&) { sis.reset(); } if (sis) commits.add(newLucene(sis, dir)); } } return commits; } FindSegmentsOpen::FindSegmentsOpen(bool readOnly, IndexDeletionPolicyPtr deletionPolicy, int32_t termInfosIndexDivisor, SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFileT(infos, directory) { this->readOnly = readOnly; this->deletionPolicy = deletionPolicy; this->termInfosIndexDivisor = termInfosIndexDivisor; } FindSegmentsOpen::~FindSegmentsOpen() { } IndexReaderPtr FindSegmentsOpen::doBody(const String& segmentFileName) { SegmentInfosPtr segmentInfos(_segmentInfos); segmentInfos->read(directory, segmentFileName); if (readOnly) return newLucene(directory, segmentInfos, deletionPolicy, termInfosIndexDivisor); else return newLucene(directory, segmentInfos, deletionPolicy, false, termInfosIndexDivisor); } FindSegmentsReopen::FindSegmentsReopen(DirectoryReaderPtr reader, bool openReadOnly, SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFileT(infos, directory) { this->_reader = reader; this->openReadOnly = openReadOnly; } FindSegmentsReopen::~FindSegmentsReopen() { } DirectoryReaderPtr FindSegmentsReopen::doBody(const String& segmentFileName) { SegmentInfosPtr segmentInfos(_segmentInfos); segmentInfos->read(directory, segmentFileName); return DirectoryReaderPtr(_reader)->doReopen(segmentInfos, false, openReadOnly); } MultiTermEnum::MultiTermEnum(IndexReaderPtr topReader, Collection readers, Collection starts, TermPtr t) { _docFreq = 0; this->_topReader = topReader; queue = newLucene(readers.size()); matchingSegments = Collection::newInstance(readers.size() + 1); for (int32_t i = 0; i < readers.size(); ++i) { IndexReaderPtr reader(readers[i]); TermEnumPtr termEnum; if (t) termEnum = reader->terms(t); else termEnum = reader->terms(); SegmentMergeInfoPtr smi(newLucene(starts[i], termEnum, reader)); smi->ord = i; if (t ? termEnum->term() : smi->next()) queue->add(smi); // initialize queue else smi->close(); } if (t && !queue->empty()) next(); } MultiTermEnum::~MultiTermEnum() { } bool MultiTermEnum::next() { for (Collection::iterator smi = matchingSegments.begin(); smi != matchingSegments.end(); ++smi) { if (!(*smi)) break; if ((*smi)->next()) queue->add(*smi); else (*smi)->close(); // done with segment } int32_t numMatchingSegments = 0; matchingSegments[0].reset(); SegmentMergeInfoPtr top(queue->top()); if (!top) { _term.reset(); return false; } _term = top->term; _docFreq = 0; while (top && _term->compareTo(top->term) == 0) { matchingSegments[numMatchingSegments++] = top; queue->pop(); _docFreq += top->termEnum->docFreq(); // increment freq top = queue->top(); } matchingSegments[numMatchingSegments].reset(); return true; } TermPtr MultiTermEnum::term() { return _term; } int32_t MultiTermEnum::docFreq() { return _docFreq; } void MultiTermEnum::close() { queue->close(); } MultiTermDocs::MultiTermDocs(IndexReaderPtr topReader, Collection r, Collection s) { this->_topReader = topReader; readers = r; starts = s; base = 0; pointer = 0; readerTermDocs = Collection::newInstance(r.size()); } MultiTermDocs::~MultiTermDocs() { } int32_t MultiTermDocs::doc() { return base + current->doc(); } int32_t MultiTermDocs::freq() { return current->freq(); } void MultiTermDocs::seek(TermPtr term) { this->term = term; this->base = 0; this->pointer = 0; this->current.reset(); this->tenum.reset(); this->smi.reset(); this->matchingSegmentPos = 0; } void MultiTermDocs::seek(TermEnumPtr termEnum) { seek(termEnum->term()); MultiTermEnumPtr multiTermEnum(boost::dynamic_pointer_cast(termEnum)); if (multiTermEnum) { tenum = multiTermEnum; if (IndexReaderPtr(_topReader) != IndexReaderPtr(tenum->_topReader)) tenum.reset(); } } bool MultiTermDocs::next() { while (true) { if (current && current->next()) return true; else if (pointer < readers.size()) { if (tenum) { smi = tenum->matchingSegments[matchingSegmentPos++]; if (!smi) { pointer = readers.size(); return false; } pointer = smi->ord; } base = starts[pointer]; current = termDocs(pointer++); } else return false; } } int32_t MultiTermDocs::read(Collection docs, Collection freqs) { while (true) { while (!current) { if (pointer < readers.size()) // try next segment { if (tenum) { smi = tenum->matchingSegments[matchingSegmentPos++]; if (!smi) { pointer = readers.size(); return 0; } pointer = smi->ord; } base = starts[pointer]; current = termDocs(pointer++); } else return 0; } int32_t end = current->read(docs, freqs); if (end == 0) // none left in segment current.reset(); else // got some { for (int32_t i = 0; i < end; ++i) // adjust doc numbers docs[i] += base; return end; } } } bool MultiTermDocs::skipTo(int32_t target) { while (true) { if (current && current->skipTo(target - base)) return true; else if (pointer < readers.size()) { if (tenum) { smi = tenum->matchingSegments[matchingSegmentPos++]; if (!smi) { pointer = readers.size(); return false; } pointer = smi->ord; } base = starts[pointer]; current = termDocs(pointer++); } else return false; } } TermDocsPtr MultiTermDocs::termDocs(int32_t i) { TermDocsPtr result(readerTermDocs[i]); if (!result) { readerTermDocs[i] = termDocs(readers[i]); result = readerTermDocs[i]; } if (smi) { BOOST_ASSERT(smi->ord == i); BOOST_ASSERT(smi->termEnum->term()->equals(term)); result->seek(smi->termEnum); } else result->seek(term); return result; } TermDocsPtr MultiTermDocs::termDocs(IndexReaderPtr reader) { return term ? reader->termDocs() : reader->termDocs(TermPtr()); } void MultiTermDocs::close() { for (Collection::iterator termDoc = readerTermDocs.begin(); termDoc != readerTermDocs.end(); ++termDoc) { if (*termDoc) (*termDoc)->close(); } } MultiTermPositions::MultiTermPositions(IndexReaderPtr topReader, Collection r, Collection s) : MultiTermDocs(topReader, r, s) { } MultiTermPositions::~MultiTermPositions() { } TermDocsPtr MultiTermPositions::termDocs(IndexReaderPtr reader) { return reader->termPositions(); } int32_t MultiTermPositions::nextPosition() { return boost::static_pointer_cast(current)->nextPosition(); } int32_t MultiTermPositions::getPayloadLength() { return boost::static_pointer_cast(current)->getPayloadLength(); } ByteArray MultiTermPositions::getPayload(ByteArray data, int32_t offset) { return boost::static_pointer_cast(current)->getPayload(data, offset); } bool MultiTermPositions::isPayloadAvailable() { return boost::static_pointer_cast(current)->isPayloadAvailable(); } ReaderCommit::ReaderCommit(SegmentInfosPtr infos, DirectoryPtr dir) { segmentsFileName = infos->getCurrentSegmentFileName(); this->dir = dir; userData = infos->getUserData(); HashSet files(infos->files(dir, true)); this->files = HashSet::newInstance(files.begin(), files.end()); version = infos->getVersion(); generation = infos->getGeneration(); _isOptimized = infos->size() == 1 && !infos->info(0)->hasDeletions(); } ReaderCommit::~ReaderCommit() { } String ReaderCommit::toString() { return L"DirectoryReader::ReaderCommit(" + segmentsFileName + L")"; } bool ReaderCommit::isOptimized() { return _isOptimized; } String ReaderCommit::getSegmentsFileName() { return segmentsFileName; } HashSet ReaderCommit::getFileNames() { return files; } DirectoryPtr ReaderCommit::getDirectory() { return dir; } int64_t ReaderCommit::getVersion() { return version; } int64_t ReaderCommit::getGeneration() { return generation; } bool ReaderCommit::isDeleted() { return false; } MapStringString ReaderCommit::getUserData() { return userData; } void ReaderCommit::deleteCommit() { boost::throw_exception(UnsupportedOperationException(L"This IndexCommit does not support deletions.")); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocConsumer.cpp000066400000000000000000000007051217574114600230700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocConsumer.h" namespace Lucene { DocConsumer::~DocConsumer() { } } LucenePlusPlus-rel_3.0.4/src/core/index/DocConsumerPerThread.cpp000066400000000000000000000007401217574114600246660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocConsumerPerThread.h" namespace Lucene { DocConsumerPerThread::~DocConsumerPerThread() { } } LucenePlusPlus-rel_3.0.4/src/core/index/DocFieldConsumer.cpp000066400000000000000000000011171217574114600240320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumer.h" namespace Lucene { DocFieldConsumer::~DocFieldConsumer() { } void DocFieldConsumer::setFieldInfos(FieldInfosPtr fieldInfos) { this->fieldInfos = fieldInfos; } } LucenePlusPlus-rel_3.0.4/src/core/index/DocFieldConsumerPerField.cpp000066400000000000000000000007541217574114600254530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumerPerField.h" namespace Lucene { DocFieldConsumerPerField::~DocFieldConsumerPerField() { } } LucenePlusPlus-rel_3.0.4/src/core/index/DocFieldConsumerPerThread.cpp000066400000000000000000000007571217574114600256420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumerPerThread.h" namespace Lucene { DocFieldConsumerPerThread::~DocFieldConsumerPerThread() { } } LucenePlusPlus-rel_3.0.4/src/core/index/DocFieldConsumers.cpp000066400000000000000000000133551217574114600242240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumers.h" #include "DocFieldConsumersPerField.h" #include "DocFieldConsumersPerThread.h" #include "MiscUtils.h" namespace Lucene { DocFieldConsumers::DocFieldConsumers(DocFieldConsumerPtr one, DocFieldConsumerPtr two) { freeCount = 0; allocCount = 0; docFreeList = Collection::newInstance(1); this->one = one; this->two = two; } DocFieldConsumers::~DocFieldConsumers() { } void DocFieldConsumers::setFieldInfos(FieldInfosPtr fieldInfos) { DocFieldConsumer::setFieldInfos(fieldInfos); one->setFieldInfos(fieldInfos); two->setFieldInfos(fieldInfos); } void DocFieldConsumers::flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state) { MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField oneThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField twoThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); for (MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { Collection oneFields(Collection::newInstance()); Collection twoFields(Collection::newInstance()); for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { oneFields.add(boost::static_pointer_cast(*perField)->one); twoFields.add(boost::static_pointer_cast(*perField)->two); } oneThreadsAndFields.put(boost::static_pointer_cast(entry->first)->one, oneFields); twoThreadsAndFields.put(boost::static_pointer_cast(entry->first)->two, oneFields); } one->flush(oneThreadsAndFields, state); two->flush(twoThreadsAndFields, state); } void DocFieldConsumers::closeDocStore(SegmentWriteStatePtr state) { LuceneException finally; try { one->closeDocStore(state); } catch (LuceneException& e) { finally = e; } try { two->closeDocStore(state); } catch (LuceneException& e) { finally = e; } finally.throwException(); } bool DocFieldConsumers::freeRAM() { return (one->freeRAM() || two->freeRAM()); } DocFieldConsumerPerThreadPtr DocFieldConsumers::addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread) { return newLucene(docFieldProcessorPerThread, shared_from_this(), one->addThread(docFieldProcessorPerThread), two->addThread(docFieldProcessorPerThread)); } DocFieldConsumersPerDocPtr DocFieldConsumers::getPerDoc() { SyncLock syncLock(this); if (freeCount == 0) { ++allocCount; if (allocCount > docFreeList.size()) { // Grow our free list up front to make sure we have enough space to recycle all outstanding // PerDoc instances BOOST_ASSERT(allocCount == 1 + docFreeList.size()); docFreeList.resize(MiscUtils::getNextSize(allocCount)); } return newLucene(shared_from_this()); } else return docFreeList[--freeCount]; } void DocFieldConsumers::freePerDoc(DocFieldConsumersPerDocPtr perDoc) { SyncLock syncLock(this); BOOST_ASSERT(freeCount < docFreeList.size()); docFreeList[freeCount++] = perDoc; } DocFieldConsumersPerDoc::DocFieldConsumersPerDoc(DocFieldConsumersPtr fieldConsumers) { this->_fieldConsumers = fieldConsumers; } DocFieldConsumersPerDoc::~DocFieldConsumersPerDoc() { } int64_t DocFieldConsumersPerDoc::sizeInBytes() { return one->sizeInBytes() + two->sizeInBytes(); } void DocFieldConsumersPerDoc::finish() { LuceneException finally; try { one->finish(); } catch (LuceneException& e) { finally = e; } try { two->finish(); } catch (LuceneException& e) { finally = e; } DocFieldConsumersPtr(_fieldConsumers)->freePerDoc(shared_from_this()); finally.throwException(); } void DocFieldConsumersPerDoc::abort() { LuceneException finally; try { one->abort(); } catch (LuceneException& e) { finally = e; } try { two->abort(); } catch (LuceneException& e) { finally = e; } DocFieldConsumersPtr(_fieldConsumers)->freePerDoc(shared_from_this()); finally.throwException(); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocFieldConsumersPerField.cpp000066400000000000000000000025311217574114600256310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumersPerField.h" namespace Lucene { DocFieldConsumersPerField::DocFieldConsumersPerField(DocFieldConsumersPerThreadPtr perThread, DocFieldConsumerPerFieldPtr one, DocFieldConsumerPerFieldPtr two) { this->_perThread = perThread; this->one = one; this->two = two; } DocFieldConsumersPerField::~DocFieldConsumersPerField() { } void DocFieldConsumersPerField::processFields(Collection fields, int32_t count) { one->processFields(fields, count); two->processFields(fields, count); } void DocFieldConsumersPerField::abort() { LuceneException finally; try { one->abort(); } catch (LuceneException& e) { finally = e; } try { two->abort(); } catch (LuceneException& e) { finally = e; } finally.throwException(); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocFieldConsumersPerThread.cpp000066400000000000000000000047111217574114600260170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumersPerThread.h" #include "DocFieldProcessorPerThread.h" #include "DocFieldConsumers.h" #include "DocFieldConsumersPerField.h" namespace Lucene { DocFieldConsumersPerThread::DocFieldConsumersPerThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread, DocFieldConsumersPtr parent, DocFieldConsumerPerThreadPtr one, DocFieldConsumerPerThreadPtr two) { this->_parent = parent; this->one = one; this->two = two; docState = docFieldProcessorPerThread->docState; } DocFieldConsumersPerThread::~DocFieldConsumersPerThread() { } void DocFieldConsumersPerThread::startDocument() { one->startDocument(); two->startDocument(); } void DocFieldConsumersPerThread::abort() { LuceneException finally; try { one->abort(); } catch (LuceneException& e) { finally = e; } try { two->abort(); } catch (LuceneException& e) { finally = e; } finally.throwException(); } DocWriterPtr DocFieldConsumersPerThread::finishDocument() { DocWriterPtr oneDoc(one->finishDocument()); DocWriterPtr twoDoc(two->finishDocument()); if (!oneDoc) return twoDoc; else if (!twoDoc) return oneDoc; else { DocFieldConsumersPerDocPtr both(DocFieldConsumersPtr(_parent)->getPerDoc()); both->docID = docState->docID; BOOST_ASSERT(oneDoc->docID == docState->docID); BOOST_ASSERT(twoDoc->docID == docState->docID); both->one = oneDoc; both->two = twoDoc; return both; } } DocFieldConsumerPerFieldPtr DocFieldConsumersPerThread::addField(FieldInfoPtr fi) { return newLucene(shared_from_this(), one->addField(fi), two->addField(fi)); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocFieldProcessor.cpp000066400000000000000000000054741217574114600242300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldProcessor.h" #include "DocFieldProcessorPerThread.h" #include "DocFieldConsumerPerThread.h" #include "DocFieldConsumer.h" #include "StoredFieldsWriter.h" #include "SegmentWriteState.h" #include "IndexFileNames.h" #include "FieldInfos.h" #include "TestPoint.h" namespace Lucene { DocFieldProcessor::DocFieldProcessor(DocumentsWriterPtr docWriter, DocFieldConsumerPtr consumer) { this->fieldInfos = newLucene(); this->_docWriter = docWriter; this->consumer = consumer; consumer->setFieldInfos(fieldInfos); fieldsWriter = newLucene(docWriter, fieldInfos); } DocFieldProcessor::~DocFieldProcessor() { } void DocFieldProcessor::closeDocStore(SegmentWriteStatePtr state) { consumer->closeDocStore(state); fieldsWriter->closeDocStore(state); } void DocFieldProcessor::flush(Collection threads, SegmentWriteStatePtr state) { TestScope testScope(L"DocFieldProcessor", L"flush"); MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField childThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); for (Collection::iterator thread = threads.begin(); thread != threads.end(); ++thread) { DocFieldProcessorPerThreadPtr perThread(boost::static_pointer_cast(*thread)); childThreadsAndFields.put(perThread->consumer, perThread->fields()); perThread->trimFields(state); } fieldsWriter->flush(state); consumer->flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so consumer can alter the FieldInfo* if necessary. // eg FreqProxTermsWriter does this with FieldInfo.storePayload. String fileName(state->segmentFileName(IndexFileNames::FIELD_INFOS_EXTENSION())); fieldInfos->write(state->directory, fileName); state->flushedFiles.add(fileName); } void DocFieldProcessor::abort() { fieldsWriter->abort(); consumer->abort(); } bool DocFieldProcessor::freeRAM() { return consumer->freeRAM(); } DocConsumerPerThreadPtr DocFieldProcessor::addThread(DocumentsWriterThreadStatePtr perThread) { return newLucene(perThread, shared_from_this()); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocFieldProcessorPerField.cpp000066400000000000000000000020301217574114600256240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldProcessorPerField.h" #include "DocFieldProcessorPerThread.h" #include "DocFieldConsumerPerThread.h" #include "DocFieldConsumerPerField.h" namespace Lucene { DocFieldProcessorPerField::DocFieldProcessorPerField(DocFieldProcessorPerThreadPtr perThread, FieldInfoPtr fieldInfo) { lastGen = -1; fieldCount = 0; fields = Collection::newInstance(1); this->consumer = perThread->consumer->addField(fieldInfo); this->fieldInfo = fieldInfo; } DocFieldProcessorPerField::~DocFieldProcessorPerField() { } void DocFieldProcessorPerField::abort() { consumer->abort(); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocFieldProcessorPerThread.cpp000066400000000000000000000313341217574114600260210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldProcessorPerThread.h" #include "DocFieldProcessorPerField.h" #include "DocFieldProcessor.h" #include "DocFieldConsumer.h" #include "DocFieldConsumerPerThread.h" #include "DocFieldConsumerPerField.h" #include "DocumentsWriterThreadState.h" #include "DocumentsWriter.h" #include "StoredFieldsWriter.h" #include "StoredFieldsWriterPerThread.h" #include "SegmentWriteState.h" #include "FieldInfo.h" #include "FieldInfos.h" #include "Fieldable.h" #include "IndexWriter.h" #include "Document.h" #include "InfoStream.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DocFieldProcessorPerThread::DocFieldProcessorPerThread(DocumentsWriterThreadStatePtr threadState, DocFieldProcessorPtr docFieldProcessor) { _fields = Collection::newInstance(1); fieldHash = Collection::newInstance(2); hashMask = 1; fieldGen = 0; fieldCount = 0; totalFieldCount = 0; this->docState = threadState->docState; this->_docFieldProcessor = docFieldProcessor; this->fieldInfos = docFieldProcessor->fieldInfos; docFreeList = Collection::newInstance(1); freeCount = 0; allocCount = 0; } DocFieldProcessorPerThread::~DocFieldProcessorPerThread() { } void DocFieldProcessorPerThread::initialize() { DocFieldProcessorPtr docFieldProcessor(_docFieldProcessor); consumer = docFieldProcessor->consumer->addThread(shared_from_this()); fieldsWriter = docFieldProcessor->fieldsWriter->addThread(docState); } void DocFieldProcessorPerThread::abort() { for (Collection::iterator field = fieldHash.begin(); field != fieldHash.end(); ++field) { DocFieldProcessorPerFieldPtr current(*field); while (current) { DocFieldProcessorPerFieldPtr next(current->next); current->abort(); current = next; } } fieldsWriter->abort(); consumer->abort(); } Collection DocFieldProcessorPerThread::fields() { Collection fields(Collection::newInstance()); for (Collection::iterator field = fieldHash.begin(); field != fieldHash.end(); ++field) { DocFieldProcessorPerFieldPtr current(*field); while (current) { fields.add(current->consumer); current = current->next; } } BOOST_ASSERT(fields.size() == totalFieldCount); return fields; } void DocFieldProcessorPerThread::trimFields(SegmentWriteStatePtr state) { for (Collection::iterator perField = fieldHash.begin(); perField != fieldHash.end(); ++perField) { DocFieldProcessorPerFieldPtr current(*perField); DocFieldProcessorPerFieldPtr lastPerField; while (current) { if (current->lastGen == -1) { // This field was not seen since the previous flush, so, free up its resources now // Unhash if (!lastPerField) *perField = current->next; else lastPerField->next = current->next; DocumentsWriterPtr docWriter(state->_docWriter); if (docWriter->infoStream) *(docWriter->infoStream) << L" purge field=" << current->fieldInfo->name << L"\n"; --totalFieldCount; } else { // Reset current->lastGen = -1; lastPerField = current; } current = current->next; } } } void DocFieldProcessorPerThread::rehash() { int32_t newHashSize = (fieldHash.size() * 2); BOOST_ASSERT(newHashSize > fieldHash.size()); Collection newHashArray(Collection::newInstance(newHashSize)); // Rehash int32_t newHashMask = newHashSize - 1; for (Collection::iterator fp0 = fieldHash.begin(); fp0 != fieldHash.end(); ++fp0) { DocFieldProcessorPerFieldPtr current(*fp0); while (current) { int32_t hashPos2 = StringUtils::hashCode(current->fieldInfo->name) & newHashMask; DocFieldProcessorPerFieldPtr nextFP0(current->next); current->next = newHashArray[hashPos2]; newHashArray[hashPos2] = current; current = nextFP0; } } fieldHash = newHashArray; hashMask = newHashMask; } struct lessFieldInfoName { inline bool operator()(const DocFieldProcessorPerFieldPtr& first, const DocFieldProcessorPerFieldPtr& second) const { return (first->fieldInfo->name < second->fieldInfo->name); } }; DocWriterPtr DocFieldProcessorPerThread::processDocument() { consumer->startDocument(); fieldsWriter->startDocument(); DocumentPtr doc(docState->doc); DocFieldProcessorPtr docFieldProcessor(_docFieldProcessor); DocumentsWriterPtr docWriter(docFieldProcessor->_docWriter); bool testPoint = IndexWriterPtr(docWriter->_writer)->testPoint(L"DocumentsWriter.ThreadState.init start"); BOOST_ASSERT(testPoint); fieldCount = 0; int32_t thisFieldGen = fieldGen++; Collection docFields(doc->getFields()); // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already seen before (eg suddenly turning on norms or // vectors, etc.) for (Collection::iterator field = docFields.begin(); field != docFields.end(); ++field) { String fieldName((*field)->name()); // Make sure we have a PerField allocated int32_t hashPos = StringUtils::hashCode(fieldName) & hashMask; DocFieldProcessorPerFieldPtr fp(fieldHash[hashPos]); while (fp && fp->fieldInfo->name != fieldName) fp = fp->next; if (!fp) { FieldInfoPtr fi(fieldInfos->add(fieldName, (*field)->isIndexed(), (*field)->isTermVectorStored(), (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions())); fp = newLucene(shared_from_this(), fi); fp->next = fieldHash[hashPos]; fieldHash[hashPos] = fp; ++totalFieldCount; if (totalFieldCount >= fieldHash.size() / 2) rehash(); } else { fp->fieldInfo->update((*field)->isIndexed(), (*field)->isTermVectorStored(), (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions()); } if (thisFieldGen != fp->lastGen) { // First time we're seeing this field for this doc fp->fieldCount = 0; if (fieldCount == _fields.size()) _fields.resize(_fields.size() * 2); _fields[fieldCount++] = fp; fp->lastGen = thisFieldGen; } if (fp->fieldCount == fp->fields.size()) fp->fields.resize(fp->fields.size() * 2); fp->fields[fp->fieldCount++] = *field; if ((*field)->isStored()) fieldsWriter->addField(*field, fp->fieldInfo); } // If we are writing vectors then we must visit fields in sorted order so they are written in sorted order. std::sort(_fields.begin(), _fields.begin() + fieldCount, lessFieldInfoName()); for (int32_t i = 0; i < fieldCount; ++i) _fields[i]->consumer->processFields(_fields[i]->fields, _fields[i]->fieldCount); if (!docState->maxTermPrefix.empty() && docState->infoStream) { *(docState->infoStream) << L"WARNING: document contains at least one immense term (longer than the max length " << StringUtils::toString(DocumentsWriter::MAX_TERM_LENGTH) << L"), all of which were skipped. " << L"Please correct the analyzer to not produce such terms. The prefix of the first immense " << L"term is: '" << StringUtils::toString(docState->maxTermPrefix) << L"...'\n"; docState->maxTermPrefix.clear(); } DocWriterPtr one(fieldsWriter->finishDocument()); DocWriterPtr two(consumer->finishDocument()); if (!one) return two; else if (!two) return one; else { DocFieldProcessorPerThreadPerDocPtr both(getPerDoc()); both->docID = docState->docID; BOOST_ASSERT(one->docID == docState->docID); BOOST_ASSERT(two->docID == docState->docID); both->one = one; both->two = two; return both; } } DocFieldProcessorPerThreadPerDocPtr DocFieldProcessorPerThread::getPerDoc() { SyncLock syncLock(this); if (freeCount == 0) { ++allocCount; if (allocCount > docFreeList.size()) { // Grow our free list up front to make sure we have enough space to recycle all // outstanding PerDoc instances BOOST_ASSERT(allocCount == docFreeList.size() + 1); docFreeList.resize(MiscUtils::getNextSize(allocCount)); } return newLucene(shared_from_this()); } else return docFreeList[--freeCount]; } void DocFieldProcessorPerThread::freePerDoc(DocFieldProcessorPerThreadPerDocPtr perDoc) { SyncLock syncLock(this); BOOST_ASSERT(freeCount < docFreeList.size()); docFreeList[freeCount++] = perDoc; } DocFieldProcessorPerThreadPerDoc::DocFieldProcessorPerThreadPerDoc(DocFieldProcessorPerThreadPtr docProcessor) { this->_docProcessor = docProcessor; } DocFieldProcessorPerThreadPerDoc::~DocFieldProcessorPerThreadPerDoc() { } int64_t DocFieldProcessorPerThreadPerDoc::sizeInBytes() { return one->sizeInBytes() + two->sizeInBytes(); } void DocFieldProcessorPerThreadPerDoc::finish() { LuceneException finally; try { try { one->finish(); } catch (LuceneException& e) { finally = e; } two->finish(); } catch (LuceneException& e) { finally = e; } DocFieldProcessorPerThreadPtr(_docProcessor)->freePerDoc(shared_from_this()); finally.throwException(); } void DocFieldProcessorPerThreadPerDoc::abort() { LuceneException finally; try { try { one->abort(); } catch (LuceneException& e) { finally = e; } two->abort(); } catch (LuceneException& e) { finally = e; } DocFieldProcessorPerThreadPtr(_docProcessor)->freePerDoc(shared_from_this()); finally.throwException(); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocInverter.cpp000066400000000000000000000067231217574114600231010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocInverter.h" #include "InvertedDocConsumer.h" #include "InvertedDocEndConsumer.h" #include "InvertedDocConsumerPerThread.h" #include "InvertedDocEndConsumerPerThread.h" #include "DocFieldConsumerPerThread.h" #include "DocFieldConsumerPerField.h" #include "DocInverterPerField.h" #include "DocInverterPerThread.h" namespace Lucene { DocInverter::DocInverter(InvertedDocConsumerPtr consumer, InvertedDocEndConsumerPtr endConsumer) { this->consumer = consumer; this->endConsumer = endConsumer; } DocInverter::~DocInverter() { } void DocInverter::setFieldInfos(FieldInfosPtr fieldInfos) { DocFieldConsumer::setFieldInfos(fieldInfos); consumer->setFieldInfos(fieldInfos); endConsumer->setFieldInfos(fieldInfos); } void DocInverter::flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state) { MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField childThreadsAndFields(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::newInstance()); MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField endChildThreadsAndFields(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField::newInstance()); for (MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { Collection childFields(Collection::newInstance()); Collection endChildFields(Collection::newInstance()); for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { childFields.add(boost::static_pointer_cast(*perField)->consumer); endChildFields.add(boost::static_pointer_cast(*perField)->endConsumer); } childThreadsAndFields.put(boost::static_pointer_cast(entry->first)->consumer, childFields); endChildThreadsAndFields.put(boost::static_pointer_cast(entry->first)->endConsumer, endChildFields); } consumer->flush(childThreadsAndFields, state); endConsumer->flush(endChildThreadsAndFields, state); } void DocInverter::closeDocStore(SegmentWriteStatePtr state) { consumer->closeDocStore(state); endConsumer->closeDocStore(state); } void DocInverter::abort() { consumer->abort(); endConsumer->abort(); } bool DocInverter::freeRAM() { return consumer->freeRAM(); } DocFieldConsumerPerThreadPtr DocInverter::addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread) { return newLucene(docFieldProcessorPerThread, shared_from_this()); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocInverterPerField.cpp000066400000000000000000000217631217574114600245150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocInverterPerField.h" #include "DocInverterPerThread.h" #include "InvertedDocConsumerPerThread.h" #include "InvertedDocEndConsumerPerThread.h" #include "InvertedDocConsumerPerField.h" #include "InvertedDocEndConsumerPerField.h" #include "Fieldable.h" #include "FieldInfo.h" #include "FieldInvertState.h" #include "DocumentsWriter.h" #include "Document.h" #include "Analyzer.h" #include "ReusableStringReader.h" #include "TokenStream.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "AttributeSource.h" #include "InfoStream.h" #include "StringUtils.h" namespace Lucene { DocInverterPerField::DocInverterPerField(DocInverterPerThreadPtr perThread, FieldInfoPtr fieldInfo) { this->_perThread = perThread; this->fieldInfo = fieldInfo; docState = perThread->docState; fieldState = perThread->fieldState; } DocInverterPerField::~DocInverterPerField() { } void DocInverterPerField::initialize() { DocInverterPerThreadPtr perThread(_perThread); consumer = perThread->consumer->addField(shared_from_this(), fieldInfo); endConsumer = perThread->endConsumer->addField(shared_from_this(), fieldInfo); } void DocInverterPerField::abort() { consumer->abort(); endConsumer->abort(); } void DocInverterPerField::processFields(Collection fields, int32_t count) { fieldState->reset(docState->doc->getBoost()); int32_t maxFieldLength = docState->maxFieldLength; bool doInvert = consumer->start(fields, count); DocumentsWriterPtr docWriter(docState->_docWriter); DocInverterPerThreadPtr perThread(_perThread); for (int32_t i = 0; i < count; ++i) { FieldablePtr field = fields[i]; if (field->isIndexed() && doInvert) { bool anyToken; if (fieldState->length > 0) fieldState->position += docState->analyzer->getPositionIncrementGap(fieldInfo->name); if (!field->isTokenized()) { // un-tokenized field String stringValue(field->stringValue()); int32_t valueLength = (int32_t)stringValue.length(); perThread->singleToken->reinit(stringValue, 0, valueLength); fieldState->attributeSource = perThread->singleToken; consumer->start(field); bool success = false; LuceneException finally; try { consumer->add(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) docWriter->setAborting(); finally.throwException(); fieldState->offset += valueLength; ++fieldState->length; ++fieldState->position; anyToken = (valueLength > 0); } else { // tokenized field TokenStreamPtr stream; TokenStreamPtr streamValue(field->tokenStreamValue()); if (streamValue) stream = streamValue; else { // the field does not have a TokenStream, so we have to obtain one from the analyzer ReaderPtr reader; // find or make Reader ReaderPtr readerValue(field->readerValue()); if (readerValue) reader = readerValue; else { String stringValue(field->stringValue()); perThread->stringReader->init(stringValue); reader = perThread->stringReader; } // Tokenize field and add to postingTable stream = docState->analyzer->reusableTokenStream(fieldInfo->name, reader); } // reset the TokenStream to the first token stream->reset(); int32_t startLength = fieldState->length; LuceneException finally; try { int32_t offsetEnd = fieldState->offset - 1; bool hasMoreTokens = stream->incrementToken(); fieldState->attributeSource = stream; OffsetAttributePtr offsetAttribute(fieldState->attributeSource->addAttribute()); PositionIncrementAttributePtr posIncrAttribute(fieldState->attributeSource->addAttribute()); consumer->start(field); while (true) { // If we hit an exception in stream.next below (which is fairly common, eg if analyzer // chokes on a given document), then it's non-aborting and (above) this one document // will be marked as deleted, but still consume a docID if (!hasMoreTokens) break; int32_t posIncr = posIncrAttribute->getPositionIncrement(); fieldState->position += posIncr; if (fieldState->position > 0) --fieldState->position; if (posIncr == 0) ++fieldState->numOverlap; bool success = false; try { // If we hit an exception in here, we abort all buffered documents since the last // flush, on the likelihood that the internal state of the consumer is now corrupt // and should not be flushed to a new segment consumer->add(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) docWriter->setAborting(); finally.throwException(); ++fieldState->position; offsetEnd = fieldState->offset + offsetAttribute->endOffset(); if (++fieldState->length >= maxFieldLength) { if (docState->infoStream) *docState->infoStream << L"maxFieldLength " << StringUtils::toString(maxFieldLength) << L" reached for field " << fieldInfo->name << L", ignoring following tokens\n"; break; } hasMoreTokens = stream->incrementToken(); } // trigger streams to perform end-of-stream operations stream->end(); fieldState->offset += offsetAttribute->endOffset(); anyToken = (fieldState->length > startLength); } catch (LuceneException& e) { finally = e; } stream->close(); finally.throwException(); } if (anyToken) fieldState->offset += docState->analyzer->getOffsetGap(field); fieldState->boost *= field->getBoost(); } // don't hang onto the field fields[i].reset(); } consumer->finish(); endConsumer->finish(); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocInverterPerThread.cpp000066400000000000000000000056051217574114600246760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocInverterPerThread.h" #include "DocInverterPerField.h" #include "DocInverter.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "DocFieldProcessorPerThread.h" #include "InvertedDocConsumer.h" #include "InvertedDocEndConsumer.h" #include "InvertedDocConsumerPerThread.h" #include "InvertedDocEndConsumerPerThread.h" #include "FieldInvertState.h" #include "ReusableStringReader.h" namespace Lucene { DocInverterPerThread::DocInverterPerThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread, DocInverterPtr docInverter) { this->fieldState = newLucene(); this->stringReader = newLucene(); this->singleToken = newLucene(); this->_docInverter = docInverter; this->docState = docFieldProcessorPerThread->docState; } DocInverterPerThread::~DocInverterPerThread() { } void DocInverterPerThread::initialize() { DocInverterPtr docInverter(_docInverter); consumer = docInverter->consumer->addThread(shared_from_this()); endConsumer = docInverter->endConsumer->addThread(shared_from_this()); } void DocInverterPerThread::startDocument() { consumer->startDocument(); endConsumer->startDocument(); } DocWriterPtr DocInverterPerThread::finishDocument() { endConsumer->finishDocument(); return consumer->finishDocument(); } void DocInverterPerThread::abort() { LuceneException finally; try { consumer->abort(); } catch (LuceneException& e) { finally = e; } try { endConsumer->abort(); } catch (LuceneException& e) { finally = e; } finally.throwException(); } DocFieldConsumerPerFieldPtr DocInverterPerThread::addField(FieldInfoPtr fi) { return newLucene(shared_from_this(), fi); } SingleTokenAttributeSource::SingleTokenAttributeSource() { termAttribute = addAttribute(); offsetAttribute = addAttribute(); } SingleTokenAttributeSource::~SingleTokenAttributeSource() { } void SingleTokenAttributeSource::reinit(const String& stringValue, int32_t startOffset, int32_t endOffset) { termAttribute->setTermBuffer(stringValue); offsetAttribute->setOffset(startOffset, endOffset); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocumentsWriter.cpp000066400000000000000000001571071217574114600240160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocumentsWriter.h" #include "DocumentsWriterThreadState.h" #include "LuceneThread.h" #include "IndexWriter.h" #include "_IndexWriter.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "DocFieldProcessor.h" #include "Term.h" #include "TermDocs.h" #include "TermVectorsTermsWriter.h" #include "FreqProxTermsWriter.h" #include "TermsHashConsumer.h" #include "InvertedDocConsumer.h" #include "TermsHash.h" #include "DocInverter.h" #include "NormsWriter.h" #include "BufferedDeletes.h" #include "FieldInfos.h" #include "InfoStream.h" #include "DocConsumerPerThread.h" #include "SegmentWriteState.h" #include "IndexFileNames.h" #include "CompoundFileWriter.h" #include "MergeDocIDRemapper.h" #include "SegmentReader.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "Query.h" #include "Weight.h" #include "Scorer.h" #include "TestPoint.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// Max # ThreadState instances; if there are more threads than this they share ThreadStates const int32_t DocumentsWriter::MAX_THREAD_STATE = 5; /// Coarse estimates used to measure RAM usage of buffered deletes const int32_t DocumentsWriter::OBJECT_HEADER_BYTES = 8; #ifdef LPP_BUILD_64 const int32_t DocumentsWriter::POINTER_NUM_BYTE = 8; #else const int32_t DocumentsWriter::POINTER_NUM_BYTE = 4; #endif const int32_t DocumentsWriter::INT_NUM_BYTE = 4; #ifdef LPP_UNICODE_CHAR_SIZE_4 const int32_t DocumentsWriter::CHAR_NUM_BYTE = 4; #else const int32_t DocumentsWriter::CHAR_NUM_BYTE = 2; #endif /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object /// with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is /// object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since /// it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). /// BufferedDeletes.num is OBJ_HEADER + INT. const int32_t DocumentsWriter::BYTES_PER_DEL_TERM = 8 * DocumentsWriter::POINTER_NUM_BYTE + 5 * DocumentsWriter::OBJECT_HEADER_BYTES + 6 * DocumentsWriter::INT_NUM_BYTE; /// Rough logic: del docIDs are List. Say list allocates ~2X size (2*POINTER). Integer is /// OBJ_HEADER + int const int32_t DocumentsWriter::BYTES_PER_DEL_DOCID = 2 * DocumentsWriter::POINTER_NUM_BYTE + DocumentsWriter::OBJECT_HEADER_BYTES + DocumentsWriter::INT_NUM_BYTE; /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object /// with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount /// (say 24 bytes). Integer is OBJ_HEADER + INT. const int32_t DocumentsWriter::BYTES_PER_DEL_QUERY = 5 * DocumentsWriter::POINTER_NUM_BYTE + 2 * DocumentsWriter::OBJECT_HEADER_BYTES + 2 * DocumentsWriter::INT_NUM_BYTE + 24; /// Initial chunks size of the shared byte[] blocks used to store postings data const int32_t DocumentsWriter::BYTE_BLOCK_SHIFT = 15; const int32_t DocumentsWriter::BYTE_BLOCK_SIZE = 1 << DocumentsWriter::BYTE_BLOCK_SHIFT; const int32_t DocumentsWriter::BYTE_BLOCK_MASK = DocumentsWriter::BYTE_BLOCK_SIZE - 1; const int32_t DocumentsWriter::BYTE_BLOCK_NOT_MASK = ~DocumentsWriter::BYTE_BLOCK_MASK; /// Initial chunk size of the shared char[] blocks used to store term text const int32_t DocumentsWriter::CHAR_BLOCK_SHIFT = 14; const int32_t DocumentsWriter::CHAR_BLOCK_SIZE = 1 << DocumentsWriter::CHAR_BLOCK_SHIFT; const int32_t DocumentsWriter::CHAR_BLOCK_MASK = DocumentsWriter::CHAR_BLOCK_SIZE - 1; const int32_t DocumentsWriter::MAX_TERM_LENGTH = DocumentsWriter::CHAR_BLOCK_SIZE - 1; /// Initial chunks size of the shared int[] blocks used to store postings data const int32_t DocumentsWriter::INT_BLOCK_SHIFT = 13; const int32_t DocumentsWriter::INT_BLOCK_SIZE = 1 << DocumentsWriter::INT_BLOCK_SHIFT; const int32_t DocumentsWriter::INT_BLOCK_MASK = DocumentsWriter::INT_BLOCK_SIZE - 1; const int32_t DocumentsWriter::PER_DOC_BLOCK_SIZE = 1024; DocumentsWriter::DocumentsWriter(DirectoryPtr directory, IndexWriterPtr writer, IndexingChainPtr indexingChain) { this->threadStates = Collection::newInstance(); this->threadBindings = MapThreadDocumentsWriterThreadState::newInstance(); this->_openFiles = HashSet::newInstance(); this->_closedFiles = HashSet::newInstance(); this->freeIntBlocks = Collection::newInstance(); this->freeCharBlocks = Collection::newInstance(); this->directory = directory; this->_writer = writer; this->indexingChain = indexingChain; } DocumentsWriter::~DocumentsWriter() { } void DocumentsWriter::initialize() { docStoreOffset = 0; nextDocID = 0; numDocsInRAM = 0; numDocsInStore = 0; pauseThreads = 0; flushPending = false; bufferIsFull = false; aborting = false; maxFieldLength = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; deletesInRAM = newLucene(false); deletesFlushed = newLucene(true); maxBufferedDeleteTerms = IndexWriter::DEFAULT_MAX_BUFFERED_DELETE_TERMS; ramBufferSize = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024); waitQueuePauseBytes = (int64_t)((double)ramBufferSize * 0.1); waitQueueResumeBytes = (int64_t)((double)ramBufferSize * 0.05); freeTrigger = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024.0 * 1024.0 * 1.05); freeLevel = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024.0 * 1024.0 * 0.95); maxBufferedDocs = IndexWriter::DEFAULT_MAX_BUFFERED_DOCS; flushedDocCount = 0; closed = false; waitQueue = newLucene(shared_from_this()); skipDocWriter = newLucene(); numBytesAlloc = 0; numBytesUsed = 0; byteBlockAllocator = newLucene(shared_from_this(), BYTE_BLOCK_SIZE); perDocAllocator = newLucene(shared_from_this(), PER_DOC_BLOCK_SIZE); IndexWriterPtr writer(_writer); this->similarity = writer->getSimilarity(); flushedDocCount = writer->maxDoc(); consumer = indexingChain->getChain(shared_from_this()); docFieldProcessor = boost::dynamic_pointer_cast(consumer); } PerDocBufferPtr DocumentsWriter::newPerDocBuffer() { return newLucene(shared_from_this()); } IndexingChainPtr DocumentsWriter::getDefaultIndexingChain() { static DefaultIndexingChainPtr defaultIndexingChain; if (!defaultIndexingChain) { defaultIndexingChain = newLucene(); CycleCheck::addStatic(defaultIndexingChain); } return defaultIndexingChain; } void DocumentsWriter::updateFlushedDocCount(int32_t n) { SyncLock syncLock(this); flushedDocCount += n; } int32_t DocumentsWriter::getFlushedDocCount() { SyncLock syncLock(this); return flushedDocCount; } void DocumentsWriter::setFlushedDocCount(int32_t n) { SyncLock syncLock(this); flushedDocCount = n; } bool DocumentsWriter::hasProx() { return docFieldProcessor ? docFieldProcessor->fieldInfos->hasProx() : true; } void DocumentsWriter::setInfoStream(InfoStreamPtr infoStream) { SyncLock syncLock(this); this->infoStream = infoStream; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) (*threadState)->docState->infoStream = infoStream; } void DocumentsWriter::setMaxFieldLength(int32_t maxFieldLength) { SyncLock syncLock(this); this->maxFieldLength = maxFieldLength; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) (*threadState)->docState->maxFieldLength = maxFieldLength; } void DocumentsWriter::setSimilarity(SimilarityPtr similarity) { SyncLock syncLock(this); this->similarity = similarity; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) (*threadState)->docState->similarity = similarity; } void DocumentsWriter::setRAMBufferSizeMB(double mb) { SyncLock syncLock(this); if (mb == IndexWriter::DISABLE_AUTO_FLUSH) { ramBufferSize = IndexWriter::DISABLE_AUTO_FLUSH; waitQueuePauseBytes = 4 * 1024 * 1024; waitQueueResumeBytes = 2 * 1024 * 1024; } else { ramBufferSize = (int64_t)(mb * 1024.0 * 1024.0); waitQueuePauseBytes = (int64_t)((double)ramBufferSize * 0.1); waitQueueResumeBytes = (int64_t)((double)ramBufferSize * 0.05); freeTrigger = (int64_t)(1.05 * (double)ramBufferSize); freeLevel = (int64_t)(0.95 * (double)ramBufferSize); } } double DocumentsWriter::getRAMBufferSizeMB() { SyncLock syncLock(this); if (ramBufferSize == IndexWriter::DISABLE_AUTO_FLUSH) return (double)ramBufferSize; else return (double)ramBufferSize / 1024.0 / 1024.0; } void DocumentsWriter::setMaxBufferedDocs(int32_t count) { maxBufferedDocs = count; } int32_t DocumentsWriter::getMaxBufferedDocs() { return maxBufferedDocs; } String DocumentsWriter::getSegment() { return segment; } int32_t DocumentsWriter::getNumDocsInRAM() { return numDocsInRAM; } String DocumentsWriter::getDocStoreSegment() { SyncLock syncLock(this); return docStoreSegment; } int32_t DocumentsWriter::getDocStoreOffset() { return docStoreOffset; } String DocumentsWriter::closeDocStore() { TestScope testScope(L"DocumentsWriter", L"closeDocStore"); SyncLock syncLock(this); BOOST_ASSERT(allThreadsIdle()); if (infoStream) { message(L"closeDocStore: " + StringUtils::toString(_openFiles.size()) + L" files to flush to segment " + docStoreSegment + L" numDocs=" + StringUtils::toString(numDocsInStore)); } bool success = false; LuceneException finally; String s; try { initFlushState(true); _closedFiles.clear(); consumer->closeDocStore(flushState); BOOST_ASSERT(_openFiles.empty()); s = docStoreSegment; docStoreSegment.clear(); docStoreOffset = 0; numDocsInStore = 0; success = true; } catch (LuceneException& e) { finally = e; } if (!success) abort(); finally.throwException(); return s; } HashSet DocumentsWriter::abortedFiles() { return _abortedFiles; } void DocumentsWriter::message(const String& message) { if (infoStream) *infoStream << L"DW " << message << L"\n"; } HashSet DocumentsWriter::openFiles() { SyncLock syncLock(this); return HashSet::newInstance(_openFiles.begin(), _openFiles.end()); } HashSet DocumentsWriter::closedFiles() { SyncLock syncLock(this); return HashSet::newInstance(_closedFiles.begin(), _closedFiles.end()); } void DocumentsWriter::addOpenFile(const String& name) { SyncLock syncLock(this); BOOST_ASSERT(!_openFiles.contains(name)); _openFiles.add(name); } void DocumentsWriter::removeOpenFile(const String& name) { SyncLock syncLock(this); BOOST_ASSERT(_openFiles.contains(name)); _openFiles.remove(name); _closedFiles.add(name); } void DocumentsWriter::setAborting() { SyncLock syncLock(this); aborting = true; } void DocumentsWriter::abort() { TestScope testScope(L"DocumentsWriter", L"abort"); SyncLock syncLock(this); LuceneException finally; try { if (infoStream) message(L"docWriter: now abort"); // Forcefully remove waiting ThreadStates from line waitQueue->abort(); // Wait for all other threads to finish with DocumentsWriter pauseAllThreads(); try { BOOST_ASSERT(waitQueue->numWaiting == 0); waitQueue->waitingBytes = 0; try { _abortedFiles = openFiles(); } catch (...) { _abortedFiles.reset(); } deletesInRAM->clear(); deletesFlushed->clear(); _openFiles.clear(); for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { try { (*threadState)->consumer->abort(); } catch (...) { } } try { consumer->abort(); } catch (...) { } docStoreSegment.clear(); numDocsInStore = 0; docStoreOffset = 0; // Reset all postings data doAfterFlush(); } catch (LuceneException& e) { finally = e; } resumeAllThreads(); } catch (LuceneException& e) { if (finally.isNull()) finally = e; } aborting = false; notifyAll(); if (infoStream) message(L"docWriter: done abort"); finally.throwException(); } void DocumentsWriter::doAfterFlush() { // All ThreadStates should be idle when we are called BOOST_ASSERT(allThreadsIdle()); threadBindings.clear(); waitQueue->reset(); segment.clear(); numDocsInRAM = 0; nextDocID = 0; bufferIsFull = false; flushPending = false; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) (*threadState)->doAfterFlush(); numBytesUsed = 0; } bool DocumentsWriter::pauseAllThreads() { SyncLock syncLock(this); ++pauseThreads; while (!allThreadsIdle()) wait(1000); return aborting; } void DocumentsWriter::resumeAllThreads() { SyncLock syncLock(this); --pauseThreads; BOOST_ASSERT(pauseThreads >= 0); if (pauseThreads == 0) notifyAll(); } bool DocumentsWriter::allThreadsIdle() { SyncLock syncLock(this); for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { if (!(*threadState)->isIdle) return false; } return true; } bool DocumentsWriter::anyChanges() { SyncLock syncLock(this); return (numDocsInRAM != 0 || deletesInRAM->numTerms != 0 || !deletesInRAM->docIDs.empty() || !deletesInRAM->queries.empty()); } void DocumentsWriter::initFlushState(bool onlyDocStore) { SyncLock syncLock(this); initSegmentName(onlyDocStore); flushState = newLucene(shared_from_this(), directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, IndexWriterPtr(_writer)->getTermIndexInterval()); } int32_t DocumentsWriter::flush(bool _closeDocStore) { SyncLock syncLock(this); BOOST_ASSERT(allThreadsIdle()); BOOST_ASSERT(numDocsInRAM > 0); BOOST_ASSERT(nextDocID == numDocsInRAM); BOOST_ASSERT(waitQueue->numWaiting == 0); BOOST_ASSERT(waitQueue->waitingBytes == 0); initFlushState(false); docStoreOffset = numDocsInStore; if (infoStream) message(L"flush postings as segment " + flushState->segmentName + L" numDocs=" + StringUtils::toString(numDocsInRAM)); bool success = false; LuceneException finally; try { if (_closeDocStore) { BOOST_ASSERT(!flushState->docStoreSegmentName.empty()); BOOST_ASSERT(flushState->docStoreSegmentName == flushState->segmentName); closeDocStore(); flushState->numDocsInStore = 0; } Collection threads(Collection::newInstance()); for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) threads.add((*threadState)->consumer); consumer->flush(threads, flushState); if (infoStream) { SegmentInfoPtr si(newLucene(flushState->segmentName, flushState->numDocs, directory)); int64_t newSegmentSize = si->sizeInBytes(); if (infoStream) { message(L" oldRAMSize=" + StringUtils::toString(numBytesUsed) + L" newFlushedSize=" + StringUtils::toString(newSegmentSize) + L" docs/MB=" + StringUtils::toString((double)numDocsInRAM / ((double)newSegmentSize / 1024.0 / 1024.0)) + L" new/old=" + StringUtils::toString(100.0 * (double)newSegmentSize / (double)numBytesUsed) + L"%"); } } flushedDocCount += flushState->numDocs; doAfterFlush(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) abort(); finally.throwException(); BOOST_ASSERT(waitQueue->waitingBytes == 0); return flushState->numDocs; } HashSet DocumentsWriter::getFlushedFiles() { return flushState->flushedFiles; } void DocumentsWriter::createCompoundFile(const String& segment) { CompoundFileWriterPtr cfsWriter(newLucene(directory, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION())); for (HashSet::iterator flushedFile = flushState->flushedFiles.begin(); flushedFile != flushState->flushedFiles.end(); ++flushedFile) cfsWriter->addFile(*flushedFile); // Perform the merge cfsWriter->close(); } bool DocumentsWriter::setFlushPending() { SyncLock syncLock(this); if (flushPending) return false; else { flushPending = true; return true; } } void DocumentsWriter::clearFlushPending() { SyncLock syncLock(this); flushPending = false; } void DocumentsWriter::pushDeletes() { SyncLock syncLock(this); deletesFlushed->update(deletesInRAM); } void DocumentsWriter::close() { SyncLock syncLock(this); closed = true; notifyAll(); } void DocumentsWriter::initSegmentName(bool onlyDocStore) { SyncLock syncLock(this); if (segment.empty() && (!onlyDocStore || docStoreSegment.empty())) { segment = IndexWriterPtr(_writer)->newSegmentName(); BOOST_ASSERT(numDocsInRAM == 0); } if (docStoreSegment.empty()) { docStoreSegment = segment; BOOST_ASSERT(numDocsInStore == 0); } } DocumentsWriterThreadStatePtr DocumentsWriter::getThreadState(DocumentPtr doc, TermPtr delTerm) { SyncLock syncLock(this); // First, find a thread state. If this thread already has affinity to a specific ThreadState, use that one again. DocumentsWriterThreadStatePtr state(threadBindings.get(LuceneThread::currentId())); if (!state) { // First time this thread has called us since last flush. Find the least loaded thread state DocumentsWriterThreadStatePtr minThreadState; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { if (!minThreadState || (*threadState)->numThreads < minThreadState->numThreads) minThreadState = *threadState; } if (minThreadState && (minThreadState->numThreads == 0 || threadStates.size() >= MAX_THREAD_STATE)) { state = minThreadState; ++state->numThreads; } else { // Just create a new "private" thread state threadStates.resize(threadStates.size() + 1); state = newLucene(shared_from_this()); threadStates[threadStates.size() - 1] = state; } threadBindings.put(LuceneThread::currentId(), state); } // Next, wait until my thread state is idle (in case it's shared with other threads) and for threads to // not be paused nor a flush pending waitReady(state); // Allocate segment name if this is the first doc since last flush initSegmentName(false); state->isIdle = false; bool success = false; LuceneException finally; try { state->docState->docID = nextDocID; BOOST_ASSERT(IndexWriterPtr(_writer)->testPoint(L"DocumentsWriter.ThreadState.init start")); if (delTerm) { addDeleteTerm(delTerm, state->docState->docID); state->doFlushAfter = timeToFlushDeletes(); } BOOST_ASSERT(IndexWriterPtr(_writer)->testPoint(L"DocumentsWriter.ThreadState.init after delTerm")); ++nextDocID; ++numDocsInRAM; // We must at this point commit to flushing to ensure we always get N docs when we flush by doc // count, even if > 1 thread is adding documents if (!flushPending && maxBufferedDocs != IndexWriter::DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs) { flushPending = true; state->doFlushAfter = true; } success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // Forcefully idle this ThreadState state->isIdle = true; notifyAll(); if (state->doFlushAfter) { state->doFlushAfter = false; flushPending = false; } } finally.throwException(); return state; } bool DocumentsWriter::addDocument(DocumentPtr doc, AnalyzerPtr analyzer) { return updateDocument(doc, analyzer, TermPtr()); } bool DocumentsWriter::updateDocument(TermPtr t, DocumentPtr doc, AnalyzerPtr analyzer) { return updateDocument(doc, analyzer, t); } bool DocumentsWriter::updateDocument(DocumentPtr doc, AnalyzerPtr analyzer, TermPtr delTerm) { // This call is synchronized but fast DocumentsWriterThreadStatePtr state(getThreadState(doc, delTerm)); DocStatePtr docState(state->docState); docState->doc = doc; docState->analyzer = analyzer; bool success = false; LuceneException finally; try { // This call is not synchronized and does all the work DocWriterPtr perDoc; try { perDoc = state->consumer->processDocument(); } catch (LuceneException& e) { finally = e; } docState->clear(); finally.throwException(); // This call is synchronized but fast finishDocument(state, perDoc); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { SyncLock syncLock(this); if (aborting) { state->isIdle = true; notifyAll(); abort(); } else { skipDocWriter->docID = docState->docID; bool success2 = false; try { waitQueue->add(skipDocWriter); success2 = true; } catch (LuceneException& e) { finally = e; } if (!success2) { state->isIdle = true; notifyAll(); abort(); return false; } state->isIdle = true; notifyAll(); // If this thread state had decided to flush, we must clear it so another thread can flush if (state->doFlushAfter) { state->doFlushAfter = false; flushPending = false; notifyAll(); } // Immediately mark this document as deleted since likely it was partially added. This keeps // indexing as "all or none" (atomic) when adding a document addDeleteDocID(state->docState->docID); } } finally.throwException(); return (state->doFlushAfter || timeToFlushDeletes()); } int32_t DocumentsWriter::getNumBufferedDeleteTerms() { SyncLock syncLock(this); return deletesInRAM->numTerms; } MapTermNum DocumentsWriter::getBufferedDeleteTerms() { SyncLock syncLock(this); return deletesInRAM->terms; } void DocumentsWriter::remapDeletes(SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergeDocCount) { SyncLock syncLock(this); if (!docMaps) { // The merged segments had no deletes so docIDs did not change and we have nothing to do return; } MergeDocIDRemapperPtr mapper(newLucene(infos, docMaps, delCounts, merge, mergeDocCount)); deletesInRAM->remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); deletesFlushed->remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); flushedDocCount -= mapper->docShift; } void DocumentsWriter::waitReady(DocumentsWriterThreadStatePtr state) { SyncLock syncLock(this); while (!closed && ((state && !state->isIdle) || pauseThreads != 0 || flushPending || aborting)) wait(1000); if (closed) boost::throw_exception(AlreadyClosedException(L"this IndexWriter is closed")); } bool DocumentsWriter::bufferDeleteTerms(Collection terms) { SyncLock syncLock(this); waitReady(DocumentsWriterThreadStatePtr()); for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) addDeleteTerm(*term, numDocsInRAM); return timeToFlushDeletes(); } bool DocumentsWriter::bufferDeleteTerm(TermPtr term) { SyncLock syncLock(this); waitReady(DocumentsWriterThreadStatePtr()); addDeleteTerm(term, numDocsInRAM); return timeToFlushDeletes(); } bool DocumentsWriter::bufferDeleteQueries(Collection queries) { SyncLock syncLock(this); waitReady(DocumentsWriterThreadStatePtr()); for (Collection::iterator query = queries.begin(); query != queries.end(); ++query) addDeleteQuery(*query, numDocsInRAM); return timeToFlushDeletes(); } bool DocumentsWriter::bufferDeleteQuery(QueryPtr query) { SyncLock syncLock(this); waitReady(DocumentsWriterThreadStatePtr()); addDeleteQuery(query, numDocsInRAM); return timeToFlushDeletes(); } bool DocumentsWriter::deletesFull() { SyncLock syncLock(this); return ((ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && (deletesInRAM->bytesUsed + deletesFlushed->bytesUsed + numBytesUsed) >= ramBufferSize) || (maxBufferedDeleteTerms != IndexWriter::DISABLE_AUTO_FLUSH && ((deletesInRAM->size() + deletesFlushed->size()) >= maxBufferedDeleteTerms))); } bool DocumentsWriter::doApplyDeletes() { SyncLock syncLock(this); // Very similar to deletesFull(), except we don't count numBytesAlloc, because we are checking whether // deletes (alone) are consuming too many resources now and thus should be applied. We apply deletes // if RAM usage is > 1/2 of our allowed RAM buffer, to prevent too-frequent flushing of a long tail of // tiny segments when merges (which always apply deletes) are infrequent. return ((ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && (deletesInRAM->bytesUsed + deletesFlushed->bytesUsed) >= ramBufferSize / 2) || (maxBufferedDeleteTerms != IndexWriter::DISABLE_AUTO_FLUSH && ((deletesInRAM->size() + deletesFlushed->size()) >= maxBufferedDeleteTerms))); } bool DocumentsWriter::timeToFlushDeletes() { SyncLock syncLock(this); return ((bufferIsFull || deletesFull()) && setFlushPending()); } bool DocumentsWriter::checkDeleteTerm(TermPtr term) { if (term) BOOST_ASSERT(!lastDeleteTerm || term->compareTo(lastDeleteTerm) > 0); lastDeleteTerm = term; return true; } void DocumentsWriter::setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms) { this->maxBufferedDeleteTerms = maxBufferedDeleteTerms; } int32_t DocumentsWriter::getMaxBufferedDeleteTerms() { return maxBufferedDeleteTerms; } bool DocumentsWriter::hasDeletes() { SyncLock syncLock(this); return deletesFlushed->any(); } bool DocumentsWriter::applyDeletes(SegmentInfosPtr infos) { SyncLock syncLock(this); if (!hasDeletes()) return false; if (infoStream) { message(L"apply " + StringUtils::toString(deletesFlushed->numTerms) + L" buffered deleted terms and " + StringUtils::toString(deletesFlushed->docIDs.size()) + L" deleted docIDs and " + StringUtils::toString(deletesFlushed->queries.size()) + L" deleted queries on " + StringUtils::toString(infos->size()) + L" segments."); } int32_t infosEnd = infos->size(); int32_t docStart = 0; bool any = false; IndexWriterPtr writer(_writer); for (int32_t i = 0; i < infosEnd; ++i) { // Make sure we never attempt to apply deletes to segment in external dir BOOST_ASSERT(infos->info(i)->dir == directory); SegmentReaderPtr reader(writer->readerPool->get(infos->info(i), false)); LuceneException finally; try { if (applyDeletes(reader, docStart)) any = true; docStart += reader->maxDoc(); } catch (LuceneException& e) { finally = e; } writer->readerPool->release(reader); finally.throwException(); } deletesFlushed->clear(); return any; } bool DocumentsWriter::applyDeletes(IndexReaderPtr reader, int32_t docIDStart) { SyncLock syncLock(this); int32_t docEnd = docIDStart + reader->maxDoc(); bool any = false; BOOST_ASSERT(checkDeleteTerm(TermPtr())); // Delete by term TermDocsPtr docs(reader->termDocs()); LuceneException finally; try { for (MapTermNum::iterator entry = deletesFlushed->terms.begin(); entry != deletesFlushed->terms.end(); ++entry) { // we should be iterating a Map here, so terms better be in order BOOST_ASSERT(checkDeleteTerm(entry->first)); docs->seek(entry->first); int32_t limit = entry->second->getNum(); while (docs->next()) { int32_t docID = docs->doc(); if (docIDStart + docID >= limit) break; reader->deleteDocument(docID); any = true; } } } catch (LuceneException& e) { finally = e; } docs->close(); finally.throwException(); // Delete by docID for (Collection::iterator docID = deletesFlushed->docIDs.begin(); docID != deletesFlushed->docIDs.end(); ++docID) { if (*docID >= docIDStart && *docID < docEnd) { reader->deleteDocument(*docID - docIDStart); any = true; } } // Delete by query IndexSearcherPtr searcher(newLucene(reader)); for (MapQueryInt::iterator entry = deletesFlushed->queries.begin(); entry != deletesFlushed->queries.end(); ++entry) { WeightPtr weight(entry->first->weight(searcher)); ScorerPtr scorer(weight->scorer(reader, true, false)); if (scorer) { while (true) { int32_t doc = scorer->nextDoc(); if ((int64_t)docIDStart + doc >= entry->second) break; reader->deleteDocument(doc); any = true; } } } searcher->close(); return any; } void DocumentsWriter::addDeleteTerm(TermPtr term, int32_t docCount) { SyncLock syncLock(this); NumPtr num(deletesInRAM->terms.get(term)); int32_t docIDUpto = flushedDocCount + docCount; if (!num) deletesInRAM->terms.put(term, newLucene(docIDUpto)); else num->setNum(docIDUpto); ++deletesInRAM->numTerms; deletesInRAM->addBytesUsed(BYTES_PER_DEL_TERM + term->_text.length() * CHAR_NUM_BYTE); } void DocumentsWriter::addDeleteDocID(int32_t docID) { SyncLock syncLock(this); deletesInRAM->docIDs.add(flushedDocCount + docID); deletesInRAM->addBytesUsed(BYTES_PER_DEL_DOCID); } void DocumentsWriter::addDeleteQuery(QueryPtr query, int32_t docID) { SyncLock syncLock(this); deletesInRAM->queries.put(query, flushedDocCount + docID); deletesInRAM->addBytesUsed(BYTES_PER_DEL_QUERY); } bool DocumentsWriter::doBalanceRAM() { SyncLock syncLock(this); return (ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && !bufferIsFull && (numBytesUsed + deletesInRAM->bytesUsed + deletesFlushed->bytesUsed >= ramBufferSize || numBytesAlloc >= freeTrigger)); } void DocumentsWriter::finishDocument(DocumentsWriterThreadStatePtr perThread, DocWriterPtr docWriter) { if (doBalanceRAM()) { // Must call this without holding synchronized(this) else we'll hit deadlock balanceRAM(); } { SyncLock syncLock(this); BOOST_ASSERT(!docWriter || docWriter->docID == perThread->docState->docID); if (aborting) { // We are currently aborting, and another thread is waiting for me to become idle. We // just forcefully idle this threadState; it will be fully reset by abort() if (docWriter) { try { docWriter->abort(); } catch (...) { } } perThread->isIdle = true; notifyAll(); return; } bool doPause; if (docWriter) doPause = waitQueue->add(docWriter); else { skipDocWriter->docID = perThread->docState->docID; doPause = waitQueue->add(skipDocWriter); } if (doPause) waitForWaitQueue(); if (bufferIsFull && !flushPending) { flushPending = true; perThread->doFlushAfter = true; } perThread->isIdle = true; notifyAll(); } } void DocumentsWriter::waitForWaitQueue() { SyncLock syncLock(this); do { wait(1000); } while (!waitQueue->doResume()); } int64_t DocumentsWriter::getRAMUsed() { return numBytesUsed + deletesInRAM->bytesUsed + deletesFlushed->bytesUsed; } IntArray DocumentsWriter::getIntBlock(bool trackAllocations) { SyncLock syncLock(this); int32_t size = freeIntBlocks.size(); IntArray b; if (size == 0) { // Always record a block allocated, even if trackAllocations is false. This is necessary because // this block will be shared between things that don't track allocations (term vectors) and things // that do (freq/prox postings). numBytesAlloc += INT_BLOCK_SIZE * INT_NUM_BYTE; b = IntArray::newInstance(INT_BLOCK_SIZE); } else b = freeIntBlocks.removeLast(); if (trackAllocations) numBytesUsed += INT_BLOCK_SIZE * INT_NUM_BYTE; BOOST_ASSERT(numBytesUsed <= numBytesAlloc); return b; } void DocumentsWriter::bytesAllocated(int64_t numBytes) { SyncLock syncLock(this); numBytesAlloc += numBytes; } void DocumentsWriter::bytesUsed(int64_t numBytes) { SyncLock syncLock(this); numBytesUsed += numBytes; BOOST_ASSERT(numBytesUsed <= numBytesAlloc); } void DocumentsWriter::recycleIntBlocks(Collection blocks, int32_t start, int32_t end) { SyncLock syncLock(this); for (int32_t i = start; i < end; ++i) { freeIntBlocks.add(blocks[i]); blocks[i].reset(); } } CharArray DocumentsWriter::getCharBlock() { SyncLock syncLock(this); int32_t size = freeCharBlocks.size(); CharArray c; if (size == 0) { numBytesAlloc += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; c = CharArray::newInstance(CHAR_BLOCK_SIZE); } else c = freeCharBlocks.removeLast(); // We always track allocations of char blocks for now because nothing that skips allocation tracking // (currently only term vectors) uses its own char blocks. numBytesUsed += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; BOOST_ASSERT(numBytesUsed <= numBytesAlloc); return c; } void DocumentsWriter::recycleCharBlocks(Collection blocks, int32_t numBlocks) { SyncLock syncLock(this); for (int32_t i = 0; i < numBlocks; ++i) { freeCharBlocks.add(blocks[i]); blocks[i].reset(); } } String DocumentsWriter::toMB(int64_t v) { return StringUtils::toString((double)v / 1024.0 / 1024.0); } void DocumentsWriter::balanceRAM() { // We flush when we've used our target usage int64_t flushTrigger = ramBufferSize; int64_t deletesRAMUsed = deletesInRAM->bytesUsed + deletesFlushed->bytesUsed; if (numBytesAlloc + deletesRAMUsed > freeTrigger) { if (infoStream) { message(L" RAM: now balance allocations: usedMB=" + toMB(numBytesUsed) + L" vs trigger=" + toMB(flushTrigger) + L" allocMB=" + toMB(numBytesAlloc) + L" deletesMB=" + toMB(deletesRAMUsed) + L" vs trigger=" + toMB(freeTrigger) + L" byteBlockFree=" + toMB(byteBlockAllocator->freeByteBlocks.size() * BYTE_BLOCK_SIZE) + L" perDocFree=" + toMB(perDocAllocator->freeByteBlocks.size() * PER_DOC_BLOCK_SIZE) + L" charBlockFree=" + toMB(freeCharBlocks.size() * CHAR_BLOCK_SIZE * CHAR_NUM_BYTE)); } int64_t startBytesAlloc = numBytesAlloc + deletesRAMUsed; int32_t iter = 0; // We free equally from each pool in 32 KB chunks until we are below our threshold (freeLevel) bool any = true; while (numBytesAlloc + deletesRAMUsed > freeLevel) { { SyncLock syncLock(this); if (perDocAllocator->freeByteBlocks.empty() && byteBlockAllocator->freeByteBlocks.empty() && freeCharBlocks.empty() && freeIntBlocks.empty() && !any) { // Nothing else to free -- must flush now. bufferIsFull = (numBytesUsed + deletesRAMUsed > flushTrigger); if (infoStream) { if (bufferIsFull) message(L" nothing to free; now set bufferIsFull"); else message(L" nothing to free"); } BOOST_ASSERT(numBytesUsed <= numBytesAlloc); break; } if ((iter % 5) == 0 && !byteBlockAllocator->freeByteBlocks.empty()) { byteBlockAllocator->freeByteBlocks.removeLast(); numBytesAlloc -= BYTE_BLOCK_SIZE; } if ((iter % 5) == 1 && !freeCharBlocks.empty()) { freeCharBlocks.removeLast(); numBytesAlloc -= CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; } if ((iter % 5) == 2 && !freeIntBlocks.empty()) { freeIntBlocks.removeLast(); numBytesAlloc -= INT_BLOCK_SIZE * INT_NUM_BYTE; } if ((iter % 5) == 3 && !perDocAllocator->freeByteBlocks.empty()) { // Remove upwards of 32 blocks (each block is 1K) for (int32_t i = 0; i < 32; ++i) { perDocAllocator->freeByteBlocks.removeLast(); numBytesAlloc -= PER_DOC_BLOCK_SIZE; if (perDocAllocator->freeByteBlocks.empty()) break; } } } if ((iter % 5) == 4 && any) { // Ask consumer to free any recycled state any = consumer->freeRAM(); } ++iter; } if (infoStream) { message(L" after free: freedMB=" + StringUtils::toString((double)(startBytesAlloc - numBytesAlloc - deletesRAMUsed) / 1024.0 / 1024.0) + L" usedMB=" + StringUtils::toString((double)(numBytesUsed + deletesRAMUsed) / 1024.0 / 1024.0) + L" allocMB=" + StringUtils::toString((double)numBytesAlloc / 1024.0 / 1024.0)); } } else { // If we have not crossed the 100% mark, but have crossed the 95% mark of RAM we are actually // using, go ahead and flush. This prevents over-allocating and then freeing, with every flush. SyncLock syncLock(this); if (numBytesUsed + deletesRAMUsed > flushTrigger) { if (infoStream) { message(L" RAM: now flush @ usedMB=" + StringUtils::toString((double)numBytesUsed / 1024.0 / 1024.0) + L" allocMB=" + StringUtils::toString((double)numBytesAlloc / 1024.0 / 1024.0) + L" deletesMB=" + StringUtils::toString((double)deletesRAMUsed / 1024.0 / 1024.0) + L" triggerMB=" + StringUtils::toString((double)flushTrigger / 1024.0 / 1024.0)); } bufferIsFull = true; } } } DocState::DocState() { maxFieldLength = 0; docID = 0; } DocState::~DocState() { } bool DocState::testPoint(const String& name) { return IndexWriterPtr(DocumentsWriterPtr(_docWriter)->_writer)->testPoint(name); } void DocState::clear() { // don't hold onto doc nor analyzer, in case it is large doc.reset(); analyzer.reset(); } PerDocBuffer::PerDocBuffer(DocumentsWriterPtr docWriter) { _docWriter = docWriter; } PerDocBuffer::~PerDocBuffer() { } ByteArray PerDocBuffer::newBuffer(int32_t size) { BOOST_ASSERT(size == DocumentsWriter::PER_DOC_BLOCK_SIZE); return DocumentsWriterPtr(_docWriter)->perDocAllocator->getByteBlock(false); } void PerDocBuffer::recycle() { SyncLock syncLock(this); if (!buffers.empty()) { setLength(0); // Recycle the blocks DocumentsWriterPtr(_docWriter)->perDocAllocator->recycleByteBlocks(buffers); buffers.clear(); sizeInBytes = 0; BOOST_ASSERT(numBuffers() == 0); } } DocWriter::DocWriter() { docID = 0; } DocWriter::~DocWriter() { } void DocWriter::setNext(DocWriterPtr next) { this->next = next; } IndexingChain::~IndexingChain() { } DefaultIndexingChain::~DefaultIndexingChain() { } DocConsumerPtr DefaultIndexingChain::getChain(DocumentsWriterPtr documentsWriter) { TermsHashConsumerPtr termVectorsWriter(newLucene(documentsWriter)); TermsHashConsumerPtr freqProxWriter(newLucene()); InvertedDocConsumerPtr termsHash(newLucene(documentsWriter, true, freqProxWriter, newLucene(documentsWriter, false, termVectorsWriter, TermsHashPtr()))); DocInverterPtr docInverter(newLucene(termsHash, newLucene())); return newLucene(documentsWriter, docInverter); } SkipDocWriter::~SkipDocWriter() { } void SkipDocWriter::finish() { } void SkipDocWriter::abort() { } int64_t SkipDocWriter::sizeInBytes() { return 0; } WaitQueue::WaitQueue(DocumentsWriterPtr docWriter) { this->_docWriter = docWriter; waiting = Collection::newInstance(10); nextWriteDocID = 0; nextWriteLoc = 0; numWaiting = 0; waitingBytes = 0; } WaitQueue::~WaitQueue() { } void WaitQueue::reset() { SyncLock syncLock(this); // NOTE: nextWriteLoc doesn't need to be reset BOOST_ASSERT(numWaiting == 0); BOOST_ASSERT(waitingBytes == 0); nextWriteDocID = 0; } bool WaitQueue::doResume() { SyncLock syncLock(this); return (waitingBytes <= DocumentsWriterPtr(_docWriter)->waitQueueResumeBytes); } bool WaitQueue::doPause() { SyncLock syncLock(this); return (waitingBytes > DocumentsWriterPtr(_docWriter)->waitQueuePauseBytes); } void WaitQueue::abort() { SyncLock syncLock(this); int32_t count = 0; for (Collection::iterator doc = waiting.begin(); doc != waiting.end(); ++doc) { if (*doc) { (*doc)->abort(); doc->reset(); ++count; } } waitingBytes = 0; BOOST_ASSERT(count == numWaiting); numWaiting = 0; } void WaitQueue::writeDocument(DocWriterPtr doc) { DocumentsWriterPtr docWriter(_docWriter); BOOST_ASSERT(doc == DocumentsWriterPtr(docWriter)->skipDocWriter || nextWriteDocID == doc->docID); bool success = false; LuceneException finally; try { doc->finish(); ++nextWriteDocID; ++docWriter->numDocsInStore; ++nextWriteLoc; BOOST_ASSERT(nextWriteLoc <= waiting.size()); if (nextWriteLoc == waiting.size()) nextWriteLoc = 0; success = true; } catch (LuceneException& e) { finally = e; } if (!success) docWriter->setAborting(); finally.throwException(); } bool WaitQueue::add(DocWriterPtr doc) { SyncLock syncLock(this); BOOST_ASSERT(doc->docID >= nextWriteDocID); if (doc->docID == nextWriteDocID) { writeDocument(doc); while (true) { doc = waiting[nextWriteLoc]; if (doc) { --numWaiting; waiting[nextWriteLoc].reset(); waitingBytes -= doc->sizeInBytes(); writeDocument(doc); } else break; } } else { // I finished before documents that were added before me. This can easily happen when I am a small doc // and the docs before me were large, or just due to luck in the thread scheduling. Just add myself to // the queue and when that large doc finishes, it will flush me int32_t gap = doc->docID - nextWriteDocID; if (gap >= waiting.size()) { // Grow queue Collection newArray(Collection::newInstance(MiscUtils::getNextSize(gap))); BOOST_ASSERT(nextWriteLoc >= 0); MiscUtils::arrayCopy(waiting.begin(), nextWriteLoc, newArray.begin(), 0, waiting.size() - nextWriteLoc); MiscUtils::arrayCopy(waiting.begin(), 0, newArray.begin(), waiting.size() - nextWriteLoc, nextWriteLoc); nextWriteLoc = 0; waiting = newArray; gap = doc->docID - nextWriteDocID; } int32_t loc = nextWriteLoc + gap; if (loc >= waiting.size()) loc -= waiting.size(); // We should only wrap one time BOOST_ASSERT(loc < waiting.size()); // Nobody should be in my spot! BOOST_ASSERT(!waiting[loc]); waiting[loc] = doc; ++numWaiting; waitingBytes += doc->sizeInBytes(); } return doPause(); } ByteBlockAllocator::ByteBlockAllocator(DocumentsWriterPtr docWriter, int32_t blockSize) { this->blockSize = blockSize; this->freeByteBlocks = Collection::newInstance(); this->_docWriter = docWriter; } ByteBlockAllocator::~ByteBlockAllocator() { } ByteArray ByteBlockAllocator::getByteBlock(bool trackAllocations) { DocumentsWriterPtr docWriter(_docWriter); SyncLock syncLock(docWriter); int32_t size = freeByteBlocks.size(); ByteArray b; if (size == 0) { // Always record a block allocated, even if trackAllocations is false. This is necessary because this block will // be shared between things that don't track allocations (term vectors) and things that do (freq/prox postings). docWriter->numBytesAlloc += blockSize; b = ByteArray::newInstance(blockSize); MiscUtils::arrayFill(b.get(), 0, b.size(), 0); } else b = freeByteBlocks.removeLast(); if (trackAllocations) docWriter->numBytesUsed += blockSize; BOOST_ASSERT(docWriter->numBytesUsed <= docWriter->numBytesAlloc); return b; } void ByteBlockAllocator::recycleByteBlocks(Collection blocks, int32_t start, int32_t end) { DocumentsWriterPtr docWriter(_docWriter); SyncLock syncLock(docWriter); for (int32_t i = start; i < end; ++i) { freeByteBlocks.add(blocks[i]); blocks[i].reset(); } } void ByteBlockAllocator::recycleByteBlocks(Collection blocks) { DocumentsWriterPtr docWriter(_docWriter); SyncLock syncLock(docWriter); int32_t size = blocks.size(); for (int32_t i = 0; i < size; ++i) freeByteBlocks.add(blocks[i]); } } LucenePlusPlus-rel_3.0.4/src/core/index/DocumentsWriterThreadState.cpp000066400000000000000000000024721217574114600261410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocumentsWriterThreadState.h" #include "DocumentsWriter.h" #include "DocConsumer.h" namespace Lucene { DocumentsWriterThreadState::DocumentsWriterThreadState(DocumentsWriterPtr docWriter) { this->_docWriter = docWriter; } DocumentsWriterThreadState::~DocumentsWriterThreadState() { } void DocumentsWriterThreadState::initialize() { isIdle = true; doFlushAfter = false; numThreads = 1; DocumentsWriterPtr docWriter(_docWriter); docState = newLucene(); docState->maxFieldLength = docWriter->maxFieldLength; docState->infoStream = docWriter->infoStream; docState->similarity = docWriter->similarity; docState->_docWriter = docWriter; consumer = docWriter->consumer->addThread(shared_from_this()); } void DocumentsWriterThreadState::doAfterFlush() { numThreads = 0; doFlushAfter = false; } } LucenePlusPlus-rel_3.0.4/src/core/index/FieldInfo.cpp000066400000000000000000000057271217574114600225170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldInfo.h" namespace Lucene { FieldInfo::FieldInfo(const String& name, bool isIndexed, int32_t number, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { this->name = name; this->isIndexed = isIndexed; this->number = number; // for non-indexed fields, leave defaults this->storeTermVector = isIndexed ? storeTermVector : false; this->storeOffsetWithTermVector = isIndexed ? storeOffsetWithTermVector : false; this->storePositionWithTermVector = isIndexed ? storePositionWithTermVector : false; this->storePayloads = isIndexed ? storePayloads : false; this->omitNorms = isIndexed ? omitNorms : true; this->omitTermFreqAndPositions = isIndexed ? omitTermFreqAndPositions : false; } FieldInfo::~FieldInfo() { } LuceneObjectPtr FieldInfo::clone(LuceneObjectPtr other) { return newLucene(name, isIndexed, number, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } void FieldInfo::update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { if (this->isIndexed != isIndexed) this->isIndexed = true; // once indexed, always index if (isIndexed) // if updated field data is not for indexing, leave the updates out { if (this->storeTermVector != storeTermVector) this->storeTermVector = true; // once vector, always vector if (this->storePositionWithTermVector != storePositionWithTermVector) this->storePositionWithTermVector = true; // once vector, always vector if (this->storeOffsetWithTermVector != storeOffsetWithTermVector) this->storeOffsetWithTermVector = true; // once vector, always vector if (this->storePayloads != storePayloads) this->storePayloads = true; if (this->omitNorms != omitNorms) this->omitNorms = false; // once norms are stored, always store if (this->omitTermFreqAndPositions != omitTermFreqAndPositions) this->omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life } } } LucenePlusPlus-rel_3.0.4/src/core/index/FieldInfos.cpp000066400000000000000000000250611217574114600226730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "IndexInput.h" #include "IndexOutput.h" #include "Directory.h" #include "Document.h" #include "Fieldable.h" #include "StringUtils.h" namespace Lucene { // Used internally (ie not written to *.fnm files) for pre-2.9 files const int32_t FieldInfos::FORMAT_PRE = -1; // First used in 2.9; prior to 2.9 there was no format header const int32_t FieldInfos::FORMAT_START = -2; const int32_t FieldInfos::CURRENT_FORMAT = FieldInfos::FORMAT_START; const uint8_t FieldInfos::IS_INDEXED = 0x1; const uint8_t FieldInfos::STORE_TERMVECTOR = 0x2; const uint8_t FieldInfos::STORE_POSITIONS_WITH_TERMVECTOR = 0x4; const uint8_t FieldInfos::STORE_OFFSET_WITH_TERMVECTOR = 0x8; const uint8_t FieldInfos::OMIT_NORMS = 0x10; const uint8_t FieldInfos::STORE_PAYLOADS = 0x20; const uint8_t FieldInfos::OMIT_TERM_FREQ_AND_POSITIONS = 0x40; FieldInfos::FieldInfos() { format = 0; byNumber = Collection::newInstance(); byName = MapStringFieldInfo::newInstance(); } FieldInfos::FieldInfos(DirectoryPtr d, const String& name) { format = 0; byNumber = Collection::newInstance(); byName = MapStringFieldInfo::newInstance(); IndexInputPtr input(d->openInput(name)); LuceneException finally; try { try { read(input, name); } catch (IOException& e) { if (format == FORMAT_PRE) { input->seek(0); input->setModifiedUTF8StringsMode(); byNumber.clear(); byName.clear(); try { read(input, name); } catch (...) { // Ignore any new exception & throw original IOE finally = e; } } else finally = e; } } catch (LuceneException& e) { finally = e; } input->close(); finally.throwException(); } FieldInfos::~FieldInfos() { } LuceneObjectPtr FieldInfos::clone(LuceneObjectPtr other) { SyncLock syncLock(this); FieldInfosPtr fis(newLucene()); for (Collection::iterator field = byNumber.begin(); field != byNumber.end(); ++field) { FieldInfoPtr fi(boost::dynamic_pointer_cast((*field)->clone())); fis->byNumber.add(fi); fis->byName.put(fi->name, fi); } return fis; } void FieldInfos::add(DocumentPtr doc) { SyncLock syncLock(this); Collection fields(doc->getFields()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { add((*field)->name(), (*field)->isIndexed(), (*field)->isTermVectorStored(), (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions()); } } bool FieldInfos::hasProx() { for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) { if ((*fi)->isIndexed && !(*fi)->omitTermFreqAndPositions) return true; } return false; } void FieldInfos::addIndexed(HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector) { SyncLock syncLock(this); for (HashSet::iterator name = names.begin(); name != names.end(); ++name) add(*name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector); } void FieldInfos::add(HashSet names, bool isIndexed) { SyncLock syncLock(this); for (HashSet::iterator name = names.begin(); name != names.end(); ++name) add(*name, isIndexed); } void FieldInfos::add(const String& name, bool isIndexed) { add(name, isIndexed, false, false, false, false); } void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector) { add(name, isIndexed, storeTermVector, false, false, false); } void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector) { add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false); } void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms) { add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false); } FieldInfoPtr FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { SyncLock syncLock(this); FieldInfoPtr fi(fieldInfo(name)); if (!fi) return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); else fi->update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); return fi; } FieldInfoPtr FieldInfos::addInternal(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { FieldInfoPtr fi(newLucene(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions)); byNumber.add(fi); byName.put(name, fi); return fi; } int32_t FieldInfos::fieldNumber(const String& fieldName) { FieldInfoPtr fi(fieldInfo(fieldName)); return fi ? fi->number : -1; } FieldInfoPtr FieldInfos::fieldInfo(const String& fieldName) { return byName.get(fieldName); } String FieldInfos::fieldName(int32_t fieldNumber) { FieldInfoPtr fi(fieldInfo(fieldNumber)); return fi ? fi->name : L""; } FieldInfoPtr FieldInfos::fieldInfo(int32_t fieldNumber) { return (fieldNumber >= 0 && fieldNumber < byNumber.size()) ? byNumber[fieldNumber] : FieldInfoPtr(); } int32_t FieldInfos::size() { return byNumber.size(); } bool FieldInfos::hasVectors() { for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) { if ((*fi)->storeTermVector) return true; } return false; } void FieldInfos::write(DirectoryPtr d, const String& name) { IndexOutputPtr output(d->createOutput(name)); LuceneException finally; try { write(output); } catch (LuceneException& e) { finally = e; } output->close(); finally.throwException(); } void FieldInfos::write(IndexOutputPtr output) { output->writeVInt(CURRENT_FORMAT); output->writeVInt(size()); for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) { uint8_t bits = 0x0; if ((*fi)->isIndexed) bits |= IS_INDEXED; if ((*fi)->storeTermVector) bits |= STORE_TERMVECTOR; if ((*fi)->storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; if ((*fi)->storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; if ((*fi)->omitNorms) bits |= OMIT_NORMS; if ((*fi)->storePayloads) bits |= STORE_PAYLOADS; if ((*fi)->omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS; output->writeString((*fi)->name); output->writeByte(bits); } } void FieldInfos::read(IndexInputPtr input, const String& fileName) { int32_t firstInt = input->readVInt(); format = firstInt < 0 ? firstInt : FORMAT_PRE; // This is a real format? if (format != FORMAT_PRE && format != FORMAT_START) boost::throw_exception(CorruptIndexException(L"unrecognized format " + StringUtils::toString(format) + L" in file \"" + fileName + L"\"")); int32_t size = format == FORMAT_PRE ? firstInt : input->readVInt(); // read in the size if required for (int32_t i = 0; i < size; ++i) { String name(input->readString()); uint8_t bits = input->readByte(); addInternal(name, (bits & IS_INDEXED) != 0, (bits & STORE_TERMVECTOR) != 0, (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0, (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0, (bits & OMIT_NORMS) != 0, (bits & STORE_PAYLOADS) != 0, (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0); } if (input->getFilePointer() != input->length()) { boost::throw_exception(CorruptIndexException(L"did not read all bytes from file \"" + fileName + L"\": read " + StringUtils::toString(input->getFilePointer()) + L" vs size " + StringUtils::toString(input->length()))); } } } LucenePlusPlus-rel_3.0.4/src/core/index/FieldInvertState.cpp000066400000000000000000000027531217574114600240700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldInvertState.h" namespace Lucene { FieldInvertState::FieldInvertState(int32_t position, int32_t length, int32_t numOverlap, int32_t offset, double boost) { this->position = position; this->length = length; this->numOverlap = numOverlap; this->offset = offset; this->boost = boost; } FieldInvertState::~FieldInvertState() { } void FieldInvertState::reset(double docBoost) { position = 0; length = 0; numOverlap = 0; offset = 0; boost = docBoost; attributeSource.reset(); } int32_t FieldInvertState::getPosition() { return position; } int32_t FieldInvertState::getLength() { return length; } int32_t FieldInvertState::getNumOverlap() { return numOverlap; } int32_t FieldInvertState::getOffset() { return offset; } double FieldInvertState::getBoost() { return boost; } AttributeSourcePtr FieldInvertState::getAttributeSource() { return attributeSource; } } LucenePlusPlus-rel_3.0.4/src/core/index/FieldSortedTermVectorMapper.cpp000066400000000000000000000041751217574114600262400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldSortedTermVectorMapper.h" #include "TermVectorEntry.h" namespace Lucene { FieldSortedTermVectorMapper::FieldSortedTermVectorMapper(TermVectorEntryComparator comparator) : TermVectorMapper(false, false) { this->fieldToTerms = MapStringCollectionTermVectorEntry::newInstance(); this->comparator = comparator; } FieldSortedTermVectorMapper::FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator) : TermVectorMapper(ignoringPositions, ignoringOffsets) { this->fieldToTerms = MapStringCollectionTermVectorEntry::newInstance(); this->comparator = comparator; } FieldSortedTermVectorMapper::~FieldSortedTermVectorMapper() { } void FieldSortedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { TermVectorEntryPtr entry(newLucene(currentField, term, frequency, offsets, positions)); if (!currentSet.contains_if(luceneEqualTo(entry))) currentSet.insert(std::upper_bound(currentSet.begin(), currentSet.end(), entry, comparator), entry); } void FieldSortedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { currentSet = Collection::newInstance(); currentField = field; fieldToTerms.put(field, currentSet); } MapStringCollectionTermVectorEntry FieldSortedTermVectorMapper::getFieldToTerms() { return fieldToTerms; } TermVectorEntryComparator FieldSortedTermVectorMapper::getComparator() { return comparator; } } LucenePlusPlus-rel_3.0.4/src/core/index/FieldsReader.cpp000066400000000000000000000512261217574114600232040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldsReader.h" #include "BufferedIndexInput.h" #include "IndexFileNames.h" #include "FieldsWriter.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "FieldSelector.h" #include "Directory.h" #include "Document.h" #include "Field.h" #include "CompressionTools.h" #include "MiscUtils.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { FieldsReader::FieldsReader(FieldInfosPtr fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, int32_t formatSize, int32_t docStoreOffset, IndexInputPtr cloneableFieldsStream, IndexInputPtr cloneableIndexStream) { closed = false; isOriginal = false; this->fieldInfos = fieldInfos; this->numTotalDocs = numTotalDocs; this->_size = size; this->format = format; this->formatSize = formatSize; this->docStoreOffset = docStoreOffset; this->cloneableFieldsStream = cloneableFieldsStream; this->cloneableIndexStream = cloneableIndexStream; fieldsStream = boost::dynamic_pointer_cast(cloneableFieldsStream->clone()); indexStream = boost::dynamic_pointer_cast(cloneableIndexStream->clone()); } FieldsReader::FieldsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn) { ConstructReader(d, segment, fn, BufferedIndexInput::BUFFER_SIZE, -1, 0); } FieldsReader::FieldsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { ConstructReader(d, segment, fn, readBufferSize, docStoreOffset, size); } FieldsReader::~FieldsReader() { } void FieldsReader::ConstructReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { bool success = false; isOriginal = true; numTotalDocs = 0; _size = 0; closed = false; format = 0; formatSize = 0; docStoreOffset = docStoreOffset; LuceneException finally; try { fieldInfos = fn; cloneableFieldsStream = d->openInput(segment + L"." + IndexFileNames::FIELDS_EXTENSION(), readBufferSize); cloneableIndexStream = d->openInput(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION(), readBufferSize); // First version of fdx did not include a format header, but, the first int will always be 0 in that case format = cloneableIndexStream->readInt(); if (format > FieldsWriter::FORMAT_CURRENT) { boost::throw_exception(CorruptIndexException(L"Incompatible format version: " + StringUtils::toString(format) + L" expected " + StringUtils::toString(FieldsWriter::FORMAT_CURRENT) + L" or lower")); } formatSize = format > FieldsWriter::FORMAT ? 4 : 0; if (format < FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) cloneableFieldsStream->setModifiedUTF8StringsMode(); fieldsStream = boost::dynamic_pointer_cast(cloneableFieldsStream->clone()); int64_t indexSize = cloneableIndexStream->length() - formatSize; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this->docStoreOffset = docStoreOffset; this->_size = size; // Verify the file is long enough to hold all of our docs BOOST_ASSERT(((int32_t)((double)indexSize / 8.0)) >= _size + this->docStoreOffset); } else { this->docStoreOffset = 0; this->_size = (int32_t)(indexSize >> 3); } indexStream = boost::dynamic_pointer_cast(cloneableIndexStream->clone()); numTotalDocs = (int32_t)(indexSize >> 3); success = true; } catch (LuceneException& e) { finally = e; } // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. // In this case, we want to explicitly close any subset of things that were opened if (!success) close(); finally.throwException(); } LuceneObjectPtr FieldsReader::clone(LuceneObjectPtr other) { ensureOpen(); return newLucene(fieldInfos, numTotalDocs, _size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream); } void FieldsReader::ensureOpen() { if (closed) boost::throw_exception(AlreadyClosedException(L"this FieldsReader is closed")); } void FieldsReader::close() { if (!closed) { if (fieldsStream) fieldsStream->close(); if (isOriginal) { if (cloneableFieldsStream) cloneableFieldsStream->close(); if (cloneableIndexStream) cloneableIndexStream->close(); } if (indexStream) indexStream->close(); fieldsStreamTL.close(); closed = true; } } int32_t FieldsReader::size() { return _size; } void FieldsReader::seekIndex(int32_t docID) { indexStream->seek(formatSize + (docID + docStoreOffset) * 8); } bool FieldsReader::canReadRawDocs() { // Disable reading raw docs in 2.x format, because of the removal of compressed fields in 3.0. // We don't want rawDocs() to decode field bits to figure out if a field was compressed, hence // we enforce ordinary (non-raw) stored field merges for <3.0 indexes. return (format >= FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS); } DocumentPtr FieldsReader::doc(int32_t n, FieldSelectorPtr fieldSelector) { seekIndex(n); int64_t position = indexStream->readLong(); fieldsStream->seek(position); DocumentPtr doc(newLucene()); int32_t numFields = fieldsStream->readVInt(); for (int32_t i = 0; i < numFields; ++i) { int32_t fieldNumber = fieldsStream->readVInt(); FieldInfoPtr fi = fieldInfos->fieldInfo(fieldNumber); FieldSelector::FieldSelectorResult acceptField = fieldSelector ? fieldSelector->accept(fi->name) : FieldSelector::SELECTOR_LOAD; uint8_t bits = fieldsStream->readByte(); BOOST_ASSERT(bits <= FieldsWriter::FIELD_IS_COMPRESSED + FieldsWriter::FIELD_IS_TOKENIZED + FieldsWriter::FIELD_IS_BINARY); bool compressed = ((bits & FieldsWriter::FIELD_IS_COMPRESSED) != 0); // compressed fields are only allowed in indexes of version <= 2.9 BOOST_ASSERT(compressed ? (format < FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS) : true); bool tokenize = ((bits & FieldsWriter::FIELD_IS_TOKENIZED) != 0); bool binary = ((bits & FieldsWriter::FIELD_IS_BINARY) != 0); if (acceptField == FieldSelector::SELECTOR_LOAD) addField(doc, fi, binary, compressed, tokenize); else if (acceptField == FieldSelector::SELECTOR_LOAD_AND_BREAK) { addField(doc, fi, binary, compressed, tokenize); break; // Get out of this loop } else if (acceptField == FieldSelector::SELECTOR_LAZY_LOAD) addFieldLazy(doc, fi, binary, compressed, tokenize); else if (acceptField == FieldSelector::SELECTOR_SIZE) skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); else if (acceptField == FieldSelector::SELECTOR_SIZE_AND_BREAK) { addFieldSize(doc, fi, binary, compressed); break; } else skipField(binary, compressed); } return doc; } IndexInputPtr FieldsReader::rawDocs(Collection lengths, int32_t startDocID, int32_t numDocs) { seekIndex(startDocID); int64_t startOffset = indexStream->readLong(); int64_t lastOffset = startOffset; int32_t count = 0; while (count < numDocs) { int32_t docID = docStoreOffset + startDocID + count + 1; BOOST_ASSERT(docID <= numTotalDocs); int64_t offset = docID < numTotalDocs ? indexStream->readLong() : fieldsStream->length(); lengths[count++] = (int32_t)(offset - lastOffset); lastOffset = offset; } fieldsStream->seek(startOffset); return fieldsStream; } void FieldsReader::skipField(bool binary, bool compressed) { skipField(binary, compressed, fieldsStream->readVInt()); } void FieldsReader::skipField(bool binary, bool compressed, int32_t toRead) { if (format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) fieldsStream->seek(fieldsStream->getFilePointer() + toRead); else { // We need to skip chars. This will slow us down, but still better fieldsStream->skipChars(toRead); } } void FieldsReader::addFieldLazy(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed, bool tokenize) { if (binary) { int32_t toRead = fieldsStream->readVInt(); int64_t pointer = fieldsStream->getFilePointer(); doc->add(newLucene(shared_from_this(), fi->name, Field::STORE_YES, toRead, pointer, binary, compressed)); fieldsStream->seek(pointer + toRead); } else { Field::Store store = Field::STORE_YES; Field::Index index = Field::toIndex(fi->isIndexed, tokenize); Field::TermVector termVector = Field::toTermVector(fi->storeTermVector, fi->storeOffsetWithTermVector, fi->storePositionWithTermVector); AbstractFieldPtr f; if (compressed) { int32_t toRead = fieldsStream->readVInt(); int64_t pointer = fieldsStream->getFilePointer(); f = newLucene(shared_from_this(), fi->name, store, toRead, pointer, binary, compressed); // skip over the part that we aren't loading fieldsStream->seek(pointer + toRead); f->setOmitNorms(fi->omitNorms); f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); } else { int32_t length = fieldsStream->readVInt(); int64_t pointer = fieldsStream->getFilePointer(); // skip ahead of where we are by the length of what is stored if (format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) fieldsStream->seek(pointer + length); else fieldsStream->skipChars(length); f = newLucene(shared_from_this(), fi->name, store, index, termVector, length, pointer, binary, compressed); f->setOmitNorms(fi->omitNorms); f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); } doc->add(f); } } void FieldsReader::addField(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed, bool tokenize) { // we have a binary stored field, and it may be compressed if (binary) { int32_t toRead = fieldsStream->readVInt(); ByteArray b(ByteArray::newInstance(toRead)); fieldsStream->readBytes(b.get(), 0, b.size()); if (compressed) doc->add(newLucene(fi->name, uncompress(b), Field::STORE_YES)); else doc->add(newLucene(fi->name, b, Field::STORE_YES)); } else { Field::Store store = Field::STORE_YES; Field::Index index = Field::toIndex(fi->isIndexed, tokenize); Field::TermVector termVector = Field::toTermVector(fi->storeTermVector, fi->storeOffsetWithTermVector, fi->storePositionWithTermVector); AbstractFieldPtr f; if (compressed) { int32_t toRead = fieldsStream->readVInt(); ByteArray b(ByteArray::newInstance(toRead)); fieldsStream->readBytes(b.get(), 0, b.size()); f = newLucene(fi->name, uncompressString(b), store, index, termVector); f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); f->setOmitNorms(fi->omitNorms); } else { f = newLucene(fi->name, fieldsStream->readString(), store, index, termVector); f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); f->setOmitNorms(fi->omitNorms); } doc->add(f); } } int32_t FieldsReader::addFieldSize(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed) { int32_t size = fieldsStream->readVInt(); int32_t bytesize = (binary || compressed) ? size : 2 * size; ByteArray sizebytes(ByteArray::newInstance(4)); sizebytes[0] = (uint8_t)MiscUtils::unsignedShift(bytesize, 24); sizebytes[1] = (uint8_t)MiscUtils::unsignedShift(bytesize, 16); sizebytes[2] = (uint8_t)MiscUtils::unsignedShift(bytesize, 8); sizebytes[3] = (uint8_t)(bytesize); doc->add(newLucene(fi->name, sizebytes, Field::STORE_YES)); return size; } ByteArray FieldsReader::uncompress(ByteArray b) { try { return CompressionTools::decompress(b); } catch (LuceneException& e) { boost::throw_exception(CorruptIndexException(L"field data are in wrong format [" + e.getError() + L"]")); } return ByteArray(); } String FieldsReader::uncompressString(ByteArray b) { try { return CompressionTools::decompressString(b); } catch (LuceneException& e) { boost::throw_exception(CorruptIndexException(L"field data are in wrong format [" + e.getError() + L"]")); } return L""; } LazyField::LazyField(FieldsReaderPtr reader, const String& name, Field::Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed) : AbstractField(name, store, Field::INDEX_NO, Field::TERM_VECTOR_NO) { this->_reader = reader; this->toRead = toRead; this->pointer = pointer; this->_isBinary = isBinary; if (isBinary) binaryLength = toRead; lazy = true; this->isCompressed = isCompressed; } LazyField::LazyField(FieldsReaderPtr reader, const String& name, Field::Store store, Field::Index index, Field::TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed) : AbstractField(name, store, index, termVector) { this->_reader = reader; this->toRead = toRead; this->pointer = pointer; this->_isBinary = isBinary; if (isBinary) binaryLength = toRead; lazy = true; this->isCompressed = isCompressed; } LazyField::~LazyField() { } IndexInputPtr LazyField::getFieldStream() { FieldsReaderPtr reader(_reader); IndexInputPtr localFieldsStream = reader->fieldsStreamTL.get(); if (!localFieldsStream) { localFieldsStream = boost::static_pointer_cast(reader->cloneableFieldsStream->clone()); reader->fieldsStreamTL.set(localFieldsStream); } return localFieldsStream; } ReaderPtr LazyField::readerValue() { FieldsReaderPtr(_reader)->ensureOpen(); return ReaderPtr(); } TokenStreamPtr LazyField::tokenStreamValue() { FieldsReaderPtr(_reader)->ensureOpen(); return TokenStreamPtr(); } String LazyField::stringValue() { FieldsReaderPtr reader(_reader); reader->ensureOpen(); if (_isBinary) return L""; else { if (VariantUtils::isNull(fieldsData)) { IndexInputPtr localFieldsStream(getFieldStream()); try { localFieldsStream->seek(pointer); if (isCompressed) { ByteArray b(ByteArray::newInstance(toRead)); localFieldsStream->readBytes(b.get(), 0, b.size()); fieldsData = reader->uncompressString(b); } else { if (reader->format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { ByteArray bytes(ByteArray::newInstance(toRead)); localFieldsStream->readBytes(bytes.get(), 0, toRead); fieldsData = StringUtils::toUnicode(bytes.get(), toRead); } else { // read in chars because we already know the length we need to read CharArray chars(CharArray::newInstance(toRead)); int32_t length = localFieldsStream->readChars(chars.get(), 0, toRead); fieldsData = String(chars.get(), length); } } } catch (IOException& e) { boost::throw_exception(FieldReaderException(e.getError())); } } return VariantUtils::get(fieldsData); } } int64_t LazyField::getPointer() { FieldsReaderPtr(_reader)->ensureOpen(); return pointer; } void LazyField::setPointer(int64_t pointer) { FieldsReaderPtr(_reader)->ensureOpen(); this->pointer = pointer; } int32_t LazyField::getToRead() { FieldsReaderPtr(_reader)->ensureOpen(); return toRead; } void LazyField::setToRead(int32_t toRead) { FieldsReaderPtr(_reader)->ensureOpen(); this->toRead = toRead; } ByteArray LazyField::getBinaryValue(ByteArray result) { FieldsReaderPtr reader(_reader); reader->ensureOpen(); if (_isBinary) { if (VariantUtils::isNull(fieldsData)) { ByteArray b; // Allocate new buffer if result is null or too small if (!result || result.size() < toRead) b = ByteArray::newInstance(toRead); else b = result; IndexInputPtr localFieldsStream(getFieldStream()); // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a // change for people since they are already handling this exception when getting the document. try { localFieldsStream->seek(pointer); localFieldsStream->readBytes(b.get(), 0, toRead); if (isCompressed) fieldsData = reader->uncompress(b); else fieldsData = b; } catch (IOException& e) { boost::throw_exception(FieldReaderException(e.getError())); } binaryOffset = 0; binaryLength = toRead; } return VariantUtils::get(fieldsData); } else return ByteArray(); } } LucenePlusPlus-rel_3.0.4/src/core/index/FieldsWriter.cpp000066400000000000000000000150551217574114600232560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldsWriter.h" #include "IndexFileNames.h" #include "Directory.h" #include "IndexOutput.h" #include "RAMOutputStream.h" #include "FieldInfo.h" #include "FieldInfos.h" #include "Fieldable.h" #include "Document.h" #include "TestPoint.h" namespace Lucene { const uint8_t FieldsWriter::FIELD_IS_TOKENIZED = 0x1; const uint8_t FieldsWriter::FIELD_IS_BINARY = 0x2; const uint8_t FieldsWriter::FIELD_IS_COMPRESSED = 0x4; const int32_t FieldsWriter::FORMAT = 0; // Original format const int32_t FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1; // Changed strings to UTF8 const int32_t FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; // Lucene 3.0: Removal of compressed fields // NOTE: if you introduce a new format, make it 1 higher than the current one, and always change this if you // switch to a new format! const int32_t FieldsWriter::FORMAT_CURRENT = FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; FieldsWriter::FieldsWriter(DirectoryPtr d, const String& segment, FieldInfosPtr fn) { fieldInfos = fn; bool success = false; String fieldsName(segment + L"." + IndexFileNames::FIELDS_EXTENSION()); LuceneException finally; try { fieldsStream = d->createOutput(fieldsName); fieldsStream->writeInt(FORMAT_CURRENT); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { try { close(); d->deleteFile(fieldsName); } catch (...) { // Suppress so we keep throwing the original exception } } finally.throwException(); success = false; String indexName(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); try { indexStream = d->createOutput(indexName); indexStream->writeInt(FORMAT_CURRENT); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { try { close(); d->deleteFile(fieldsName); d->deleteFile(indexName); } catch (...) { // Suppress so we keep throwing the original exception } } finally.throwException(); doClose = true; } FieldsWriter::FieldsWriter(IndexOutputPtr fdx, IndexOutputPtr fdt, FieldInfosPtr fn) { fieldInfos = fn; fieldsStream = fdt; indexStream = fdx; doClose = false; } FieldsWriter::~FieldsWriter() { } void FieldsWriter::setFieldsStream(IndexOutputPtr stream) { this->fieldsStream = stream; } void FieldsWriter::flushDocument(int32_t numStoredFields, RAMOutputStreamPtr buffer) { TestScope testScope(L"FieldsWriter", L"flushDocument"); indexStream->writeLong(fieldsStream->getFilePointer()); fieldsStream->writeVInt(numStoredFields); buffer->writeTo(fieldsStream); } void FieldsWriter::skipDocument() { indexStream->writeLong(fieldsStream->getFilePointer()); fieldsStream->writeVInt(0); } void FieldsWriter::flush() { indexStream->flush(); fieldsStream->flush(); } void FieldsWriter::close() { if (doClose) { LuceneException finally; if (fieldsStream) { try { fieldsStream->close(); } catch (LuceneException& e) { finally = e; } fieldsStream.reset(); } if (indexStream) { try { indexStream->close(); } catch (LuceneException& e) { if (finally.isNull()) // throw first exception hit finally = e; } indexStream.reset(); } finally.throwException(); } } void FieldsWriter::writeField(FieldInfoPtr fi, FieldablePtr field) { fieldsStream->writeVInt(fi->number); uint8_t bits = 0; if (field->isTokenized()) bits |= FIELD_IS_TOKENIZED; if (field->isBinary()) bits |= FIELD_IS_BINARY; fieldsStream->writeByte(bits); if (field->isBinary()) { ByteArray data(field->getBinaryValue()); int32_t len = field->getBinaryLength(); int32_t offset = field->getBinaryOffset(); fieldsStream->writeVInt(len); fieldsStream->writeBytes(data.get(), offset, len); } else fieldsStream->writeString(field->stringValue()); } void FieldsWriter::addRawDocuments(IndexInputPtr stream, Collection lengths, int32_t numDocs) { int64_t position = fieldsStream->getFilePointer(); int64_t start = position; for (int32_t i = 0; i < numDocs; ++i) { indexStream->writeLong(position); position += lengths[i]; } fieldsStream->copyBytes(stream, position - start); BOOST_ASSERT(fieldsStream->getFilePointer() == position); } void FieldsWriter::addDocument(DocumentPtr doc) { indexStream->writeLong(fieldsStream->getFilePointer()); int32_t storedCount = 0; Collection fields(doc->getFields()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->isStored()) ++storedCount; } fieldsStream->writeVInt(storedCount); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->isStored()) writeField(fieldInfos->fieldInfo((*field)->name()), *field); } } } LucenePlusPlus-rel_3.0.4/src/core/index/FilterIndexReader.cpp000066400000000000000000000160301217574114600242050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilterIndexReader.h" #include "FieldCache.h" namespace Lucene { FilterIndexReader::FilterIndexReader(IndexReaderPtr in) { this->in = in; } FilterIndexReader::~FilterIndexReader() { } DirectoryPtr FilterIndexReader::directory() { return in->directory(); } Collection FilterIndexReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); return in->getTermFreqVectors(docNumber); } TermFreqVectorPtr FilterIndexReader::getTermFreqVector(int32_t docNumber, const String& field) { ensureOpen(); return in->getTermFreqVector(docNumber, field); } void FilterIndexReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) { ensureOpen(); in->getTermFreqVector(docNumber, field, mapper); } void FilterIndexReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) { ensureOpen(); in->getTermFreqVector(docNumber, mapper); } int32_t FilterIndexReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) return in->numDocs(); } int32_t FilterIndexReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return in->maxDoc(); } DocumentPtr FilterIndexReader::document(int32_t n, FieldSelectorPtr fieldSelector) { ensureOpen(); return in->document(n, fieldSelector); } bool FilterIndexReader::isDeleted(int32_t n) { // Don't call ensureOpen() here (it could affect performance) return in->isDeleted(n); } bool FilterIndexReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return in->hasDeletions(); } void FilterIndexReader::doUndeleteAll() { in->undeleteAll(); } bool FilterIndexReader::hasNorms(const String& field) { ensureOpen(); return in->hasNorms(field); } ByteArray FilterIndexReader::norms(const String& field) { ensureOpen(); return in->norms(field); } void FilterIndexReader::norms(const String& field, ByteArray norms, int32_t offset) { ensureOpen(); in->norms(field, norms, offset); } void FilterIndexReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { in->setNorm(doc, field, value); } TermEnumPtr FilterIndexReader::terms() { ensureOpen(); return in->terms(); } TermEnumPtr FilterIndexReader::terms(TermPtr t) { ensureOpen(); return in->terms(t); } int32_t FilterIndexReader::docFreq(TermPtr t) { ensureOpen(); return in->docFreq(t); } TermDocsPtr FilterIndexReader::termDocs() { ensureOpen(); return in->termDocs(); } TermDocsPtr FilterIndexReader::termDocs(TermPtr term) { ensureOpen(); return in->termDocs(term); } TermPositionsPtr FilterIndexReader::termPositions() { ensureOpen(); return in->termPositions(); } void FilterIndexReader::doDelete(int32_t docNum) { in->deleteDocument(docNum); } void FilterIndexReader::doCommit(MapStringString commitUserData) { in->commit(commitUserData); } void FilterIndexReader::doClose() { in->close(); // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is // generally not a good idea) FieldCache::DEFAULT()->purge(shared_from_this()); } HashSet FilterIndexReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); return in->getFieldNames(fieldOption); } int64_t FilterIndexReader::getVersion() { ensureOpen(); return in->getVersion(); } bool FilterIndexReader::isCurrent() { ensureOpen(); return in->isCurrent(); } bool FilterIndexReader::isOptimized() { ensureOpen(); return in->isOptimized(); } Collection FilterIndexReader::getSequentialSubReaders() { return in->getSequentialSubReaders(); } LuceneObjectPtr FilterIndexReader::getFieldCacheKey() { return in->getFieldCacheKey(); } LuceneObjectPtr FilterIndexReader::getDeletesCacheKey() { return in->getDeletesCacheKey(); } FilterTermDocs::FilterTermDocs(TermDocsPtr in) { this->in = in; } FilterTermDocs::~FilterTermDocs() { } void FilterTermDocs::seek(TermPtr term) { in->seek(term); } void FilterTermDocs::seek(TermEnumPtr termEnum) { in->seek(termEnum); } int32_t FilterTermDocs::doc() { return in->doc(); } int32_t FilterTermDocs::freq() { return in->freq(); } bool FilterTermDocs::next() { return in->next(); } int32_t FilterTermDocs::read(Collection docs, Collection freqs) { return in->read(docs, freqs); } bool FilterTermDocs::skipTo(int32_t target) { return in->skipTo(target); } void FilterTermDocs::close() { in->close(); } FilterTermPositions::FilterTermPositions(TermPositionsPtr in) : FilterTermDocs(in) { } FilterTermPositions::~FilterTermPositions() { } int32_t FilterTermPositions::nextPosition() { return boost::static_pointer_cast(in)->nextPosition(); } int32_t FilterTermPositions::getPayloadLength() { return boost::static_pointer_cast(in)->getPayloadLength(); } ByteArray FilterTermPositions::getPayload(ByteArray data, int32_t offset) { return boost::static_pointer_cast(in)->getPayload(data, offset); } bool FilterTermPositions::isPayloadAvailable() { return boost::static_pointer_cast(in)->isPayloadAvailable(); } FilterTermEnum::FilterTermEnum(TermEnumPtr in) { this->in = in; } FilterTermEnum::~FilterTermEnum() { } bool FilterTermEnum::next() { return in->next(); } TermPtr FilterTermEnum::term() { return in->term(); } int32_t FilterTermEnum::docFreq() { return in->docFreq(); } void FilterTermEnum::close() { in->close(); } } LucenePlusPlus-rel_3.0.4/src/core/index/FormatPostingsDocsConsumer.cpp000066400000000000000000000007621217574114600261560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsDocsConsumer.h" namespace Lucene { FormatPostingsDocsConsumer::~FormatPostingsDocsConsumer() { } } LucenePlusPlus-rel_3.0.4/src/core/index/FormatPostingsDocsWriter.cpp000066400000000000000000000076471217574114600256500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsDocsWriter.h" #include "FormatPostingsTermsWriter.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsPositionsWriter.h" #include "IndexFileNames.h" #include "SegmentWriteState.h" #include "Directory.h" #include "TermInfosWriter.h" #include "DefaultSkipListWriter.h" #include "FieldInfo.h" #include "IndexOutput.h" #include "TermInfo.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { FormatPostingsDocsWriter::FormatPostingsDocsWriter(SegmentWriteStatePtr state, FormatPostingsTermsWriterPtr parent) { this->lastDocID = 0; this->df = 0; this->omitTermFreqAndPositions = false; this->storePayloads = false; this->freqStart = 0; FormatPostingsFieldsWriterPtr parentPostings(parent->_parent); this->_parent = parent; this->state = state; String fileName(IndexFileNames::segmentFileName(parentPostings->segment, IndexFileNames::FREQ_EXTENSION())); state->flushedFiles.add(fileName); out = parentPostings->dir->createOutput(fileName); totalNumDocs = parentPostings->totalNumDocs; skipInterval = parentPostings->termsOut->skipInterval; skipListWriter = parentPostings->skipListWriter; skipListWriter->setFreqOutput(out); termInfo = newLucene(); utf8 = newLucene(); } FormatPostingsDocsWriter::~FormatPostingsDocsWriter() { } void FormatPostingsDocsWriter::initialize() { posWriter = newLucene(state, shared_from_this()); } void FormatPostingsDocsWriter::setField(FieldInfoPtr fieldInfo) { this->fieldInfo = fieldInfo; omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; storePayloads = fieldInfo->storePayloads; posWriter->setField(fieldInfo); } FormatPostingsPositionsConsumerPtr FormatPostingsDocsWriter::addDoc(int32_t docID, int32_t termDocFreq) { int32_t delta = docID - lastDocID; if (docID < 0 || (df > 0 && delta <= 0)) boost::throw_exception(CorruptIndexException(L"docs out of order (" + StringUtils::toString(docID) + L" <= " + StringUtils::toString(lastDocID) + L" )")); if ((++df % skipInterval) == 0) { skipListWriter->setSkipData(lastDocID, storePayloads, posWriter->lastPayloadLength); skipListWriter->bufferSkip(df); } BOOST_ASSERT(docID < totalNumDocs); lastDocID = docID; if (omitTermFreqAndPositions) out->writeVInt(delta); else if (termDocFreq == 1) out->writeVInt((delta << 1) | 1); else { out->writeVInt(delta << 1); out->writeVInt(termDocFreq); } return posWriter; } void FormatPostingsDocsWriter::finish() { int64_t skipPointer = skipListWriter->writeSkip(out); FormatPostingsTermsWriterPtr parent(_parent); termInfo->set(df, parent->freqStart, parent->proxStart, (int32_t)(skipPointer - parent->freqStart)); StringUtils::toUTF8(parent->currentTerm.get() + parent->currentTermStart, parent->currentTerm.size(), utf8); if (df > 0) parent->termsOut->add(fieldInfo->number, utf8->result, utf8->length, termInfo); lastDocID = 0; df = 0; } void FormatPostingsDocsWriter::close() { out->close(); posWriter->close(); } } LucenePlusPlus-rel_3.0.4/src/core/index/FormatPostingsFieldsConsumer.cpp000066400000000000000000000007701217574114600264730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsFieldsConsumer.h" namespace Lucene { FormatPostingsFieldsConsumer::~FormatPostingsFieldsConsumer() { } } LucenePlusPlus-rel_3.0.4/src/core/index/FormatPostingsFieldsWriter.cpp000066400000000000000000000035111217574114600261500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsTermsWriter.h" #include "SegmentWriteState.h" #include "TermInfosWriter.h" #include "IndexFileNames.h" #include "DefaultSkipListWriter.h" namespace Lucene { FormatPostingsFieldsWriter::FormatPostingsFieldsWriter(SegmentWriteStatePtr state, FieldInfosPtr fieldInfos) { dir = state->directory; segment = state->segmentName; totalNumDocs = state->numDocs; this->state = state; this->fieldInfos = fieldInfos; termsOut = newLucene(dir, segment, fieldInfos, state->termIndexInterval); skipListWriter = newLucene(termsOut->skipInterval, termsOut->maxSkipLevels, totalNumDocs, IndexOutputPtr(), IndexOutputPtr()); state->flushedFiles.add(state->segmentFileName(IndexFileNames::TERMS_EXTENSION())); state->flushedFiles.add(state->segmentFileName(IndexFileNames::TERMS_INDEX_EXTENSION())); } FormatPostingsFieldsWriter::~FormatPostingsFieldsWriter() { } void FormatPostingsFieldsWriter::initialize() { termsWriter = newLucene(state, shared_from_this()); } FormatPostingsTermsConsumerPtr FormatPostingsFieldsWriter::addField(FieldInfoPtr field) { termsWriter->setField(field); return termsWriter; } void FormatPostingsFieldsWriter::finish() { termsOut->close(); termsWriter->close(); } } LucenePlusPlus-rel_3.0.4/src/core/index/FormatPostingsPositionsConsumer.cpp000066400000000000000000000010011217574114600272400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsPositionsConsumer.h" namespace Lucene { FormatPostingsPositionsConsumer::~FormatPostingsPositionsConsumer() { } } LucenePlusPlus-rel_3.0.4/src/core/index/FormatPostingsPositionsWriter.cpp000066400000000000000000000060641217574114600267370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsPositionsWriter.h" #include "FormatPostingsDocsWriter.h" #include "FormatPostingsTermsWriter.h" #include "FormatPostingsFieldsWriter.h" #include "IndexFileNames.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "SegmentWriteState.h" #include "Directory.h" #include "DefaultSkipListWriter.h" #include "IndexOutput.h" namespace Lucene { FormatPostingsPositionsWriter::FormatPostingsPositionsWriter(SegmentWriteStatePtr state, FormatPostingsDocsWriterPtr parent) { lastPosition = 0; storePayloads = false; lastPayloadLength = -1; this->_parent = parent; FormatPostingsFieldsWriterPtr parentFieldsWriter(FormatPostingsTermsWriterPtr(parent->_parent)->_parent); omitTermFreqAndPositions = parent->omitTermFreqAndPositions; if (parentFieldsWriter->fieldInfos->hasProx()) { // At least one field does not omit TF, so create the prox file String fileName(IndexFileNames::segmentFileName(parentFieldsWriter->segment, IndexFileNames::PROX_EXTENSION())); state->flushedFiles.add(fileName); out = parentFieldsWriter->dir->createOutput(fileName); parent->skipListWriter->setProxOutput(out); } else { // Every field omits TF so we will write no prox file } } FormatPostingsPositionsWriter::~FormatPostingsPositionsWriter() { } void FormatPostingsPositionsWriter::addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength) { BOOST_ASSERT(!omitTermFreqAndPositions); BOOST_ASSERT(out); int32_t delta = position - lastPosition; lastPosition = position; if (storePayloads) { if (payloadLength != lastPayloadLength) { lastPayloadLength = payloadLength; out->writeVInt((delta << 1) | 1); out->writeVInt(payloadLength); } else out->writeVInt(delta << 1); if (payloadLength > 0) out->writeBytes(payload.get(), payloadLength); } else out->writeVInt(delta); } void FormatPostingsPositionsWriter::setField(FieldInfoPtr fieldInfo) { omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; storePayloads = omitTermFreqAndPositions ? false : fieldInfo->storePayloads; } void FormatPostingsPositionsWriter::finish() { lastPosition = 0; lastPayloadLength = -1; } void FormatPostingsPositionsWriter::close() { if (out) out->close(); } } LucenePlusPlus-rel_3.0.4/src/core/index/FormatPostingsTermsConsumer.cpp000066400000000000000000000020611217574114600263520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsTermsConsumer.h" #include "UTF8Stream.h" #include "MiscUtils.h" namespace Lucene { FormatPostingsTermsConsumer::~FormatPostingsTermsConsumer() { } FormatPostingsDocsConsumerPtr FormatPostingsTermsConsumer::addTerm(const String& text) { int32_t len = text.length(); if (!termBuffer) termBuffer = CharArray::newInstance(MiscUtils::getNextSize(len + 1)); if (termBuffer.size() < len + 1) termBuffer.resize(MiscUtils::getNextSize(len + 1)); MiscUtils::arrayCopy(text.begin(), 0, termBuffer.get(), 0, len); termBuffer[len] = UTF8Base::UNICODE_TERMINATOR; return addTerm(termBuffer, 0); } } LucenePlusPlus-rel_3.0.4/src/core/index/FormatPostingsTermsWriter.cpp000066400000000000000000000036251217574114600260420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsTermsWriter.h" #include "FormatPostingsDocsWriter.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsPositionsWriter.h" #include "IndexOutput.h" #include "DefaultSkipListWriter.h" namespace Lucene { FormatPostingsTermsWriter::FormatPostingsTermsWriter(SegmentWriteStatePtr state, FormatPostingsFieldsWriterPtr parent) { currentTermStart = 0; freqStart = 0; proxStart = 0; this->_parent = parent; this->state = state; termsOut = parent->termsOut; } FormatPostingsTermsWriter::~FormatPostingsTermsWriter() { } void FormatPostingsTermsWriter::initialize() { docsWriter = newLucene(state, shared_from_this()); } void FormatPostingsTermsWriter::setField(FieldInfoPtr fieldInfo) { this->fieldInfo = fieldInfo; docsWriter->setField(fieldInfo); } FormatPostingsDocsConsumerPtr FormatPostingsTermsWriter::addTerm(CharArray text, int32_t start) { currentTerm = text; currentTermStart = start; freqStart = docsWriter->out->getFilePointer(); if (docsWriter->posWriter->out) proxStart = docsWriter->posWriter->out->getFilePointer(); FormatPostingsFieldsWriterPtr(_parent)->skipListWriter->resetSkip(); return docsWriter; } void FormatPostingsTermsWriter::finish() { } void FormatPostingsTermsWriter::close() { docsWriter->close(); } } LucenePlusPlus-rel_3.0.4/src/core/index/FreqProxFieldMergeState.cpp000066400000000000000000000063141217574114600253440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FreqProxFieldMergeState.h" #include "FreqProxTermsWriterPerField.h" #include "FreqProxTermsWriterPerThread.h" #include "FreqProxTermsWriter.h" #include "TermsHashPerThread.h" #include "TermsHashPerField.h" #include "ByteSliceReader.h" #include "DocumentsWriter.h" #include "CharBlockPool.h" #include "FieldInfo.h" #include "MiscUtils.h" namespace Lucene { FreqProxFieldMergeState::FreqProxFieldMergeState(FreqProxTermsWriterPerFieldPtr field) { this->numPostings = 0; this->textOffset = 0; this->docID = 0; this->termFreq = 0; this->postingUpto = -1; this->freq = newLucene(); this->prox = newLucene(); this->field = field; this->charPool = TermsHashPerThreadPtr(FreqProxTermsWriterPerThreadPtr(field->_perThread)->_termsHashPerThread)->charPool; TermsHashPerFieldPtr termsHashPerField(field->_termsHashPerField); this->numPostings = termsHashPerField->numPostings; this->postings = termsHashPerField->sortPostings(); } FreqProxFieldMergeState::~FreqProxFieldMergeState() { } bool FreqProxFieldMergeState::nextTerm() { ++postingUpto; if (postingUpto == numPostings) return false; p = boost::static_pointer_cast(postings[postingUpto]); docID = 0; text = charPool->buffers[p->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]; textOffset = (p->textStart & DocumentsWriter::CHAR_BLOCK_MASK); TermsHashPerFieldPtr termsHashPerField(field->_termsHashPerField); termsHashPerField->initReader(freq, p, 0); if (!field->fieldInfo->omitTermFreqAndPositions) termsHashPerField->initReader(prox, p, 1); // Should always be true bool result = nextDoc(); BOOST_ASSERT(result); return true; } bool FreqProxFieldMergeState::nextDoc() { if (freq->eof()) { if (p->lastDocCode != -1) { // Return last doc docID = p->lastDocID; if (!field->omitTermFreqAndPositions) termFreq = p->docFreq; p->lastDocCode = -1; return true; } else { // EOF return false; } } int32_t code = freq->readVInt(); if (field->omitTermFreqAndPositions) docID += code; else { docID += MiscUtils::unsignedShift(code, 1); if ((code & 1) != 0) termFreq = 1; else termFreq = freq->readVInt(); } BOOST_ASSERT(docID != p->lastDocID); return true; } } LucenePlusPlus-rel_3.0.4/src/core/index/FreqProxTermsWriter.cpp000066400000000000000000000271721217574114600246340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FreqProxTermsWriter.h" #include "FreqProxTermsWriterPerThread.h" #include "FreqProxTermsWriterPerField.h" #include "FreqProxFieldMergeState.h" #include "TermsHashConsumerPerThread.h" #include "TermsHashConsumerPerField.h" #include "TermsHashPerField.h" #include "TermsHashPerThread.h" #include "FormatPostingsDocsConsumer.h" #include "FormatPostingsFieldsConsumer.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsTermsConsumer.h" #include "FormatPostingsPositionsConsumer.h" #include "FieldInfo.h" #include "ByteSliceReader.h" #include "RawPostingList.h" #include "DocumentsWriter.h" #include "UTF8Stream.h" #include "TestPoint.h" namespace Lucene { FreqProxTermsWriter::~FreqProxTermsWriter() { } TermsHashConsumerPerThreadPtr FreqProxTermsWriter::addThread(TermsHashPerThreadPtr perThread) { return newLucene(perThread); } void FreqProxTermsWriter::createPostings(Collection postings, int32_t start, int32_t count) { int32_t end = start + count; for (int32_t i = start; i < end; ++i) postings[i] = newLucene(); } int32_t FreqProxTermsWriter::compareText(const wchar_t* text1, int32_t pos1, const wchar_t* text2, int32_t pos2) { while (true) { wchar_t c1 = text1[pos1++]; wchar_t c2 = text2[pos2++]; if (c1 != c2) { if (c2 == UTF8Base::UNICODE_TERMINATOR) return 1; else if (c1 == UTF8Base::UNICODE_TERMINATOR) return -1; else return (c1 - c2); } else if (c1 == UTF8Base::UNICODE_TERMINATOR) return 0; } } void FreqProxTermsWriter::closeDocStore(SegmentWriteStatePtr state) { } void FreqProxTermsWriter::abort() { } void FreqProxTermsWriter::flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state) { // Gather all FieldData's that have postings, across all ThreadStates Collection allFields(Collection::newInstance()); for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { FreqProxTermsWriterPerFieldPtr freqProxPerField(boost::static_pointer_cast(*perField)); if (TermsHashPerFieldPtr(freqProxPerField->_termsHashPerField)->numPostings > 0) allFields.add(freqProxPerField); } } // Sort by field name std::sort(allFields.begin(), allFields.end(), luceneCompare()); int32_t numAllFields = allFields.size(); FormatPostingsFieldsConsumerPtr consumer(newLucene(state, fieldInfos)); // Current writer chain: // FormatPostingsFieldsConsumer // -> IMPL: FormatPostingsFieldsWriter // -> FormatPostingsTermsConsumer // -> IMPL: FormatPostingsTermsWriter // -> FormatPostingsDocConsumer // -> IMPL: FormatPostingsDocWriter // -> FormatPostingsPositionsConsumer // -> IMPL: FormatPostingsPositionsWriter int32_t start = 0; while (start < numAllFields) { FieldInfoPtr fieldInfo(allFields[start]->fieldInfo); String fieldName(fieldInfo->name); int32_t end = start + 1; while (end < numAllFields && allFields[end]->fieldInfo->name == fieldName) ++end; Collection fields(Collection::newInstance(end - start)); for (int32_t i = start; i < end; ++i) { fields[i - start] = allFields[i]; // Aggregate the storePayload as seen by the same field across multiple threads if (fields[i - start]->hasPayloads) fieldInfo->storePayloads = true; } // If this field has postings then add them to the segment appendPostings(fields, consumer); for (int32_t i = 0; i < fields.size(); ++i) { TermsHashPerFieldPtr perField(fields[i]->_termsHashPerField); int32_t numPostings = perField->numPostings; perField->reset(); perField->shrinkHash(numPostings); fields[i]->reset(); } start = end; } for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) TermsHashPerThreadPtr(boost::static_pointer_cast(entry->first)->_termsHashPerThread)->reset(true); consumer->finish(); } void FreqProxTermsWriter::appendPostings(Collection fields, FormatPostingsFieldsConsumerPtr consumer) { TestScope testScope(L"FreqProxTermsWriter", L"appendPostings"); int32_t numFields = fields.size(); Collection mergeStates(Collection::newInstance(numFields)); for (int32_t i = 0; i < numFields; ++i) { FreqProxFieldMergeStatePtr fms(newLucene(fields[i])); mergeStates[i] = fms; BOOST_ASSERT(fms->field->fieldInfo == fields[0]->fieldInfo); // Should always be true bool result = fms->nextTerm(); BOOST_ASSERT(result); } FormatPostingsTermsConsumerPtr termsConsumer(consumer->addField(fields[0]->fieldInfo)); Collection termStates(Collection::newInstance(numFields)); bool currentFieldOmitTermFreqAndPositions = fields[0]->fieldInfo->omitTermFreqAndPositions; while (numFields > 0) { // Get the next term to merge termStates[0] = mergeStates[0]; int32_t numToMerge = 1; for (int32_t i = 1; i < numFields; ++i) { CharArray text = mergeStates[i]->text; int32_t textOffset = mergeStates[i]->textOffset; int32_t cmp = compareText(text.get(), textOffset, termStates[0]->text.get(), termStates[0]->textOffset); if (cmp < 0) { termStates[0] = mergeStates[i]; numToMerge = 1; } else if (cmp == 0) termStates[numToMerge++] = mergeStates[i]; } FormatPostingsDocsConsumerPtr docConsumer(termsConsumer->addTerm(termStates[0]->text, termStates[0]->textOffset)); // Now termStates has numToMerge FieldMergeStates which all share the same term. Now we must // interleave the docID streams. while (numToMerge > 0) { FreqProxFieldMergeStatePtr minState(termStates[0]); for (int32_t i = 1; i < numToMerge; ++i) { if (termStates[i]->docID < minState->docID) minState = termStates[i]; } int32_t termDocFreq = minState->termFreq; FormatPostingsPositionsConsumerPtr posConsumer(docConsumer->addDoc(minState->docID, termDocFreq)); ByteSliceReaderPtr prox(minState->prox); // Carefully copy over the prox + payload info, changing the format to match Lucene's segment format. if (!currentFieldOmitTermFreqAndPositions) { // omitTermFreqAndPositions == false so we do write positions & payload int32_t position = 0; for (int32_t j = 0; j < termDocFreq; ++j) { int32_t code = prox->readVInt(); position += (code >> 1); int32_t payloadLength; if ((code & 1) != 0) { // This position has a payload payloadLength = prox->readVInt(); if (!payloadBuffer) payloadBuffer = ByteArray::newInstance(payloadLength); if (payloadBuffer.size() < payloadLength) payloadBuffer.resize(payloadLength); prox->readBytes(payloadBuffer.get(), 0, payloadLength); } else payloadLength = 0; posConsumer->addPosition(position, payloadBuffer, 0, payloadLength); } posConsumer->finish(); } if (!minState->nextDoc()) { // Remove from termStates int32_t upto = 0; for (int32_t i = 0; i < numToMerge; ++i) { if (termStates[i] != minState) termStates[upto++] = termStates[i]; } --numToMerge; BOOST_ASSERT(upto == numToMerge); // Advance this state to the next term if (!minState->nextTerm()) { // OK, no more terms, so remove from mergeStates as well upto = 0; for (int32_t i = 0; i < numFields; ++i) { if (mergeStates[i] != minState) mergeStates[upto++] = mergeStates[i]; } --numFields; BOOST_ASSERT(upto == numFields); } } } docConsumer->finish(); } termsConsumer->finish(); } int32_t FreqProxTermsWriter::bytesPerPosting() { return RawPostingList::BYTES_SIZE + 4 * DocumentsWriter::INT_NUM_BYTE; } FreqProxTermsWriterPostingList::FreqProxTermsWriterPostingList() { docFreq = 0; lastDocID = 0; lastDocCode = 0; lastPosition = 0; } FreqProxTermsWriterPostingList::~FreqProxTermsWriterPostingList() { } } LucenePlusPlus-rel_3.0.4/src/core/index/FreqProxTermsWriterPerField.cpp000066400000000000000000000142611217574114600262420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FreqProxTermsWriterPerField.h" #include "FreqProxTermsWriter.h" #include "FieldInfo.h" #include "Fieldable.h" #include "TermsHashPerField.h" #include "FieldInvertState.h" #include "AttributeSource.h" #include "Payload.h" #include "PayloadAttribute.h" #include "DocumentsWriter.h" #include "RawPostingList.h" namespace Lucene { FreqProxTermsWriterPerField::FreqProxTermsWriterPerField(TermsHashPerFieldPtr termsHashPerField, FreqProxTermsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo) { this->hasPayloads = false; this->_termsHashPerField = termsHashPerField; this->_perThread = perThread; this->fieldInfo = fieldInfo; docState = termsHashPerField->docState; fieldState = termsHashPerField->fieldState; omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; } FreqProxTermsWriterPerField::~FreqProxTermsWriterPerField() { } int32_t FreqProxTermsWriterPerField::getStreamCount() { return fieldInfo->omitTermFreqAndPositions ? 1 : 2; } void FreqProxTermsWriterPerField::finish() { } void FreqProxTermsWriterPerField::skippingLongTerm() { } int32_t FreqProxTermsWriterPerField::compareTo(LuceneObjectPtr other) { return fieldInfo->name.compare(boost::static_pointer_cast(other)->fieldInfo->name); } void FreqProxTermsWriterPerField::reset() { // Record, up front, whether our in-RAM format will be with or without term freqs omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; payloadAttribute.reset(); } bool FreqProxTermsWriterPerField::start(Collection fields, int32_t count) { for (int32_t i = 0; i < count; ++i) { if (fields[i]->isIndexed()) return true; } return false; } void FreqProxTermsWriterPerField::start(FieldablePtr field) { if (fieldState->attributeSource->hasAttribute()) payloadAttribute = fieldState->attributeSource->getAttribute(); else payloadAttribute.reset(); } void FreqProxTermsWriterPerField::writeProx(FreqProxTermsWriterPostingListPtr p, int32_t proxCode) { PayloadPtr payload; if (payloadAttribute) payload = payloadAttribute->getPayload(); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (payload && payload->length() > 0) { termsHashPerField->writeVInt(1, (proxCode << 1) | 1); termsHashPerField->writeVInt(1, payload->length()); termsHashPerField->writeBytes(1, payload->getData().get(), payload->getOffset(), payload->length()); hasPayloads = true; } else termsHashPerField->writeVInt(1, proxCode << 1); p->lastPosition = fieldState->position; } void FreqProxTermsWriterPerField::newTerm(RawPostingListPtr p) { // First time we're seeing this term since the last flush BOOST_ASSERT(docState->testPoint(L"FreqProxTermsWriterPerField.newTerm start")); FreqProxTermsWriterPostingListPtr newPostingList(boost::static_pointer_cast(p)); newPostingList->lastDocID = docState->docID; if (omitTermFreqAndPositions) newPostingList->lastDocCode = docState->docID; else { newPostingList->lastDocCode = docState->docID << 1; newPostingList->docFreq = 1; writeProx(newPostingList, fieldState->position); } } void FreqProxTermsWriterPerField::addTerm(RawPostingListPtr p) { BOOST_ASSERT(docState->testPoint(L"FreqProxTermsWriterPerField.addTerm start")); FreqProxTermsWriterPostingListPtr addPostingList(boost::static_pointer_cast(p)); BOOST_ASSERT(omitTermFreqAndPositions || addPostingList->docFreq > 0); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (omitTermFreqAndPositions) { if (docState->docID != addPostingList->lastDocID) { BOOST_ASSERT(docState->docID > addPostingList->lastDocID); termsHashPerField->writeVInt(0, addPostingList->lastDocCode); addPostingList->lastDocCode = docState->docID - addPostingList->lastDocID; addPostingList->lastDocID = docState->docID; } } else { if (docState->docID != addPostingList->lastDocID) { BOOST_ASSERT(docState->docID > addPostingList->lastDocID); // Term not yet seen in the current doc but previously seen in other doc(s) since // the last flush // Now that we know doc freq for previous doc, write it & lastDocCode if (addPostingList->docFreq == 1) termsHashPerField->writeVInt(0, addPostingList->lastDocCode | 1); else { termsHashPerField->writeVInt(0, addPostingList->lastDocCode); termsHashPerField->writeVInt(0, addPostingList->docFreq); } addPostingList->docFreq = 1; addPostingList->lastDocCode = (docState->docID - addPostingList->lastDocID) << 1; addPostingList->lastDocID = docState->docID; writeProx(addPostingList, fieldState->position); } else { ++addPostingList->docFreq; writeProx(addPostingList, fieldState->position - addPostingList->lastPosition); } } } void FreqProxTermsWriterPerField::abort() { } } LucenePlusPlus-rel_3.0.4/src/core/index/FreqProxTermsWriterPerThread.cpp000066400000000000000000000023751217574114600264310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FreqProxTermsWriterPerThread.h" #include "FreqProxTermsWriterPerField.h" #include "TermsHashPerThread.h" namespace Lucene { FreqProxTermsWriterPerThread::FreqProxTermsWriterPerThread(TermsHashPerThreadPtr perThread) { docState = perThread->docState; _termsHashPerThread = perThread; } FreqProxTermsWriterPerThread::~FreqProxTermsWriterPerThread() { } TermsHashConsumerPerFieldPtr FreqProxTermsWriterPerThread::addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo) { return newLucene(termsHashPerField, shared_from_this(), fieldInfo); } void FreqProxTermsWriterPerThread::startDocument() { } DocWriterPtr FreqProxTermsWriterPerThread::finishDocument() { return DocWriterPtr(); } void FreqProxTermsWriterPerThread::abort() { } } LucenePlusPlus-rel_3.0.4/src/core/index/IndexCommit.cpp000066400000000000000000000021261217574114600230660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexCommit.h" #include "Directory.h" namespace Lucene { IndexCommit::~IndexCommit() { } bool IndexCommit::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; IndexCommitPtr otherCommit(boost::dynamic_pointer_cast(other)); if (!otherCommit) return false; return (otherCommit->getDirectory()->equals(getDirectory()) && otherCommit->getVersion() == getVersion()); } int32_t IndexCommit::hashCode() { return (getDirectory()->hashCode() + (int32_t)getVersion()); } int64_t IndexCommit::getTimestamp() { return getDirectory()->fileModified(getSegmentsFileName()); } } LucenePlusPlus-rel_3.0.4/src/core/index/IndexDeletionPolicy.cpp000066400000000000000000000010311217574114600245530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexDeletionPolicy.h" namespace Lucene { IndexDeletionPolicy::IndexDeletionPolicy() { } IndexDeletionPolicy::~IndexDeletionPolicy() { } } LucenePlusPlus-rel_3.0.4/src/core/index/IndexFileDeleter.cpp000066400000000000000000000453731217574114600240350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "IndexFileDeleter.h" #include "IndexFileNameFilter.h" #include "IndexFileNames.h" #include "IndexDeletionPolicy.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "Directory.h" #include "DocumentsWriter.h" #include "InfoStream.h" #include "DateTools.h" #include "LuceneThread.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// Change to true to see details of reference counts when infoStream != null bool IndexFileDeleter::VERBOSE_REF_COUNTS = false; IndexFileDeleter::IndexFileDeleter(DirectoryPtr directory, IndexDeletionPolicyPtr policy, SegmentInfosPtr segmentInfos, InfoStreamPtr infoStream, DocumentsWriterPtr docWriter, HashSet synced) { this->lastFiles = Collection< HashSet >::newInstance(); this->commits = Collection::newInstance(); this->commitsToDelete = Collection::newInstance(); this->refCounts = MapStringRefCount::newInstance(); this->docWriter = docWriter; this->infoStream = infoStream; this->synced = synced; if (infoStream) message(L"init: current segments file is \"" + segmentInfos->getCurrentSegmentFileName()); this->policy = policy; this->directory = directory; // First pass: walk the files and initialize our ref counts int64_t currentGen = segmentInfos->getGeneration(); IndexFileNameFilterPtr filter(IndexFileNameFilter::getFilter()); HashSet files(directory->listAll()); CommitPointPtr currentCommitPoint; for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (filter->accept(L"", *fileName) && *fileName != IndexFileNames::SEGMENTS_GEN()) { // Add this file to refCounts with initial count 0 getRefCount(*fileName); if (boost::starts_with(*fileName, IndexFileNames::SEGMENTS())) { // This is a commit (segments or segments_N), and it's valid (<= the max gen). // Load it, then incref all files it refers to if (infoStream) message(L"init: load commit \"" + *fileName + L"\""); SegmentInfosPtr sis(newLucene()); try { sis->read(directory, *fileName); } catch (IOException& e) { if (SegmentInfos::generationFromSegmentsFileName(*fileName) <= currentGen) boost::throw_exception(e); else { // Most likely we are opening an index that has an aborted "future" commit, // so suppress exc in this case sis.reset(); } } catch (...) { if (infoStream) message(L"init: hit exception when loading commit \"" + *fileName + L"\"; skipping this commit point"); sis.reset(); } if (sis) { CommitPointPtr commitPoint(newLucene(commitsToDelete, directory, sis)); if (sis->getGeneration() == segmentInfos->getGeneration()) currentCommitPoint = commitPoint; commits.add(commitPoint); incRef(sis, true); if (!lastSegmentInfos || sis->getGeneration() > lastSegmentInfos->getGeneration()) lastSegmentInfos = sis; } } } } if (!currentCommitPoint) { // We did not in fact see the segments_N file corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds the write lock. This can happen when the // directory listing was stale (eg when index accessed via NFS client with stale directory listing // cache). So we try now to explicitly open this commit point. SegmentInfosPtr sis(newLucene()); try { sis->read(directory, segmentInfos->getCurrentSegmentFileName()); } catch (LuceneException&) { boost::throw_exception(CorruptIndexException(L"failed to locate current segments_N file")); } if (infoStream) message(L"forced open of current segments file " + segmentInfos->getCurrentSegmentFileName()); currentCommitPoint = newLucene(commitsToDelete, directory, sis); commits.add(currentCommitPoint); incRef(sis, true); } // We keep commits list in sorted order (oldest to newest) std::sort(commits.begin(), commits.end(), luceneCompare()); // Now delete anything with ref count at 0. These are presumably abandoned files eg due to crash of IndexWriter. for (MapStringRefCount::iterator entry = refCounts.begin(); entry != refCounts.end(); ++entry) { if (entry->second->count == 0) { if (infoStream) message(L"init: removing unreferenced file \"" + entry->first + L"\""); deleteFile(entry->first); } } // Finally, give policy a chance to remove things on startup policy->onInit(commits); // Always protect the incoming segmentInfos since sometime it may not be the most recent commit checkpoint(segmentInfos, false); startingCommitDeleted = currentCommitPoint->isDeleted(); deleteCommits(); } IndexFileDeleter::~IndexFileDeleter() { } void IndexFileDeleter::setInfoStream(InfoStreamPtr infoStream) { this->infoStream = infoStream; } void IndexFileDeleter::message(const String& message) { if (infoStream) { *infoStream << L"IFD [" << DateTools::timeToString(MiscUtils::currentTimeMillis(), DateTools::RESOLUTION_SECOND); *infoStream << L"; " << StringUtils::toString(LuceneThread::currentId()) << L"]: " << message << L"\n"; } } SegmentInfosPtr IndexFileDeleter::getLastSegmentInfos() { return lastSegmentInfos; } void IndexFileDeleter::deleteCommits() { if (!commitsToDelete.empty()) { // First decref all files that had been referred to by the now-deleted commits for (Collection::iterator commit = commitsToDelete.begin(); commit != commitsToDelete.end(); ++commit) { if (infoStream) message(L"deleteCommits: now decRef commit \"" + (*commit)->getSegmentsFileName() + L"\""); for (HashSet::iterator file = (*commit)->files.begin(); file != (*commit)->files.end(); ++file) decRef(*file); } commitsToDelete.clear(); // Now compact commits to remove deleted ones (preserving the sort) int32_t size = commits.size(); int32_t readFrom = 0; int32_t writeTo = 0; while (readFrom < size) { CommitPointPtr commit(boost::dynamic_pointer_cast(commits[readFrom])); if (!commit->deleted) { if (writeTo != readFrom) commits[writeTo] = commits[readFrom]; ++writeTo; } ++readFrom; } while (size > writeTo) { commits.removeLast(); --size; } } } void IndexFileDeleter::refresh(const String& segmentName) { HashSet files(directory->listAll()); IndexFileNameFilterPtr filter(IndexFileNameFilter::getFilter()); String segmentPrefix1(segmentName + L"."); String segmentPrefix2(segmentName + L"_"); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (filter->accept(L"", *fileName) && (segmentName.empty() || boost::starts_with(*fileName, segmentPrefix1) || boost::starts_with(*fileName, segmentPrefix2)) && !refCounts.contains(*fileName) && *fileName != IndexFileNames::SEGMENTS_GEN()) { // Unreferenced file, so remove it if (infoStream) message(L"refresh [prefix=" + segmentName + L"]: removing newly created unreferenced file \"" + *fileName + L"\""); deleteFile(*fileName); } } } void IndexFileDeleter::refresh() { refresh(L""); } void IndexFileDeleter::close() { // DecRef old files from the last checkpoint, if any for (Collection< HashSet >::iterator file = lastFiles.begin(); file != lastFiles.end(); ++file) decRef(*file); lastFiles.clear(); deletePendingFiles(); } void IndexFileDeleter::deletePendingFiles() { if (deletable) { HashSet oldDeletable(deletable); deletable.reset(); for (HashSet::iterator fileName = oldDeletable.begin(); fileName != oldDeletable.end(); ++fileName) { if (infoStream) message(L"delete pending file " + *fileName); deleteFile(*fileName); } } } void IndexFileDeleter::checkpoint(SegmentInfosPtr segmentInfos, bool isCommit) { if (infoStream) message(L"now checkpoint \"" + segmentInfos->getCurrentSegmentFileName() + L"\" [" + StringUtils::toString(segmentInfos->size()) + L" segments; isCommit = " + StringUtils::toString(isCommit) + L"]"); // Try again now to delete any previously un-deletable files (because they were in use, on Windows) deletePendingFiles(); // Incref the files incRef(segmentInfos, isCommit); if (isCommit) { // Append to our commits list commits.add(newLucene(commitsToDelete, directory, segmentInfos)); // Tell policy so it can remove commits policy->onCommit(commits); // Decref files for commits that were deleted by the policy deleteCommits(); } else { HashSet docWriterFiles; if (docWriter) { docWriterFiles = docWriter->openFiles(); if (docWriterFiles) { // We must incRef these files before decRef'ing last files to make sure we // don't accidentally delete them incRef(docWriterFiles); } } // DecRef old files from the last checkpoint, if any for (Collection< HashSet >::iterator file = lastFiles.begin(); file != lastFiles.end(); ++file) decRef(*file); lastFiles.clear(); // Save files so we can decr on next checkpoint/commit lastFiles.add(segmentInfos->files(directory, false)); if (docWriterFiles) lastFiles.add(docWriterFiles); } } void IndexFileDeleter::incRef(SegmentInfosPtr segmentInfos, bool isCommit) { // If this is a commit point, also incRef the segments_N file HashSet files(segmentInfos->files(directory, isCommit)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) incRef(*fileName); } void IndexFileDeleter::incRef(HashSet files) { for (HashSet::iterator file = files.begin(); file != files.end(); ++file) incRef(*file); } void IndexFileDeleter::incRef(const String& fileName) { RefCountPtr rc(getRefCount(fileName)); if (infoStream && VERBOSE_REF_COUNTS) message(L" IncRef \"" + fileName + L"\": pre-incr count is " + StringUtils::toString(rc->count)); rc->IncRef(); } void IndexFileDeleter::decRef(HashSet files) { for (HashSet::iterator file = files.begin(); file != files.end(); ++file) decRef(*file); } void IndexFileDeleter::decRef(const String& fileName) { RefCountPtr rc(getRefCount(fileName)); if (infoStream && VERBOSE_REF_COUNTS) message(L" DecRef \"" + fileName + L"\": pre-decr count is " + StringUtils::toString(rc->count)); if (rc->DecRef() == 0) { // This file is no longer referenced by any past commit points nor by the in-memory SegmentInfos deleteFile(fileName); refCounts.remove(fileName); if (synced) { SyncLock syncLock(&synced); synced.remove(fileName); } } } void IndexFileDeleter::decRef(SegmentInfosPtr segmentInfos) { decRef(segmentInfos->files(directory, false)); } bool IndexFileDeleter::exists(const String& fileName) { return refCounts.contains(fileName) ? getRefCount(fileName)->count > 0 : false; } RefCountPtr IndexFileDeleter::getRefCount(const String& fileName) { RefCountPtr rc; MapStringRefCount::iterator ref = refCounts.find(fileName); if (ref == refCounts.end()) { rc = newLucene(fileName); refCounts.put(fileName, rc); } else rc = ref->second; return rc; } void IndexFileDeleter::deleteFiles(HashSet files) { for (HashSet::iterator file = files.begin(); file != files.end(); ++file) deleteFile(*file); } void IndexFileDeleter::deleteNewFiles(HashSet files) { for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (!refCounts.contains(*fileName)) { if (infoStream) message(L"delete new file \"" + *fileName + L"\""); deleteFile(*fileName); } } } void IndexFileDeleter::deleteFile(const String& fileName) { try { if (infoStream) message(L"delete \"" + fileName + L"\""); directory->deleteFile(fileName); } catch (IOException& e) // if delete fails { if (directory->fileExists(fileName)) // if delete fails { // Some operating systems (eg. Windows) don't permit a file to be deleted while it is opened // for read (eg. by another process or thread). So we assume that when a delete fails it is // because the file is open in another process, and queue the file for subsequent deletion. if (infoStream) message(L"IndexFileDeleter: unable to remove file \"" + fileName + L"\": " + e.getError() + L"; Will re-try later."); if (!deletable) deletable = HashSet::newInstance(); deletable.add(fileName); // add to deletable } } } RefCount::RefCount(const String& fileName) { initDone = false; count = 0; this->fileName = fileName; } RefCount::~RefCount() { } int32_t RefCount::IncRef() { if (!initDone) initDone = true; else BOOST_ASSERT(count > 0); return ++count; } int32_t RefCount::DecRef() { BOOST_ASSERT(count > 0); return --count; } CommitPoint::CommitPoint(Collection commitsToDelete, DirectoryPtr directory, SegmentInfosPtr segmentInfos) { deleted = false; this->directory = directory; this->commitsToDelete = commitsToDelete; userData = segmentInfos->getUserData(); segmentsFileName = segmentInfos->getCurrentSegmentFileName(); version = segmentInfos->getVersion(); generation = segmentInfos->getGeneration(); HashSet files(segmentInfos->files(directory, true)); this->files = HashSet::newInstance(files.begin(), files.end()); gen = segmentInfos->getGeneration(); _isOptimized = (segmentInfos->size() == 1 && !segmentInfos->info(0)->hasDeletions()); BOOST_ASSERT(!segmentInfos->hasExternalSegments(directory)); } CommitPoint::~CommitPoint() { } String CommitPoint::toString() { return L"IndexFileDeleter::CommitPoint(" + segmentsFileName + L")"; } bool CommitPoint::isOptimized() { return _isOptimized; } String CommitPoint::getSegmentsFileName() { return segmentsFileName; } HashSet CommitPoint::getFileNames() { return files; } DirectoryPtr CommitPoint::getDirectory() { return directory; } int64_t CommitPoint::getVersion() { return version; } int64_t CommitPoint::getGeneration() { return generation; } MapStringString CommitPoint::getUserData() { return userData; } void CommitPoint::deleteCommit() { if (!deleted) { deleted = true; commitsToDelete.add(shared_from_this()); } } bool CommitPoint::isDeleted() { return deleted; } int32_t CommitPoint::compareTo(LuceneObjectPtr other) { CommitPointPtr otherCommit(boost::static_pointer_cast(other)); if (gen < otherCommit->gen) return -1; if (gen > otherCommit->gen) return 1; return 0; } } LucenePlusPlus-rel_3.0.4/src/core/index/IndexFileNameFilter.cpp000066400000000000000000000043201217574114600244620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "IndexFileNameFilter.h" #include "IndexFileNames.h" namespace Lucene { bool IndexFileNameFilter::accept(const String& directory, const String& name) { String::size_type i = name.find_last_of(L'.'); if (i != String::npos) { String extension(name.substr(i+1)); if (IndexFileNames::INDEX_EXTENSIONS().contains(extension)) return true; else if (!extension.empty()) { if (extension[0] == L'f' && boost::regex_search(extension, boost::wregex(L"f\\d+"))) return true; if (extension[0] == L's' && boost::regex_search(extension, boost::wregex(L"s\\d+"))) return true; } } else { if (name == IndexFileNames::DELETABLE()) return true; if (boost::starts_with(name, IndexFileNames::SEGMENTS())) return true; } return false; } bool IndexFileNameFilter::isCFSFile(const String& name) { String::size_type i = name.find_last_of(L'.'); if (i != String::npos) { String extension(name.substr(i+1)); if (IndexFileNames::INDEX_EXTENSIONS_IN_COMPOUND_FILE().contains(extension)) return true; else if (!extension.empty() && extension[0] == L'f' && boost::regex_search(extension, boost::wregex(L"f\\d+"))) return true; } return false; } IndexFileNameFilterPtr IndexFileNameFilter::getFilter() { static IndexFileNameFilterPtr singleton; if (!singleton) { singleton = newLucene(); CycleCheck::addStatic(singleton); } return singleton; } } LucenePlusPlus-rel_3.0.4/src/core/index/IndexFileNames.cpp000066400000000000000000000226621217574114600235100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "IndexFileNames.h" #include "SegmentInfo.h" #include "StringUtils.h" namespace Lucene { IndexFileNames::~IndexFileNames() { } const String& IndexFileNames::SEGMENTS() { static String _SEGMENTS(L"segments"); return _SEGMENTS; } const String& IndexFileNames::SEGMENTS_GEN() { static String _SEGMENTS_GEN(L"segments.gen"); return _SEGMENTS_GEN; } const String& IndexFileNames::DELETABLE() { static String _DELETABLE(L"deletable"); return _DELETABLE; } const String& IndexFileNames::NORMS_EXTENSION() { static String _NORMS_EXTENSION(L"nrm"); return _NORMS_EXTENSION; } const String& IndexFileNames::FREQ_EXTENSION() { static String _FREQ_EXTENSION(L"frq"); return _FREQ_EXTENSION; } const String& IndexFileNames::PROX_EXTENSION() { static String _PROX_EXTENSION(L"prx"); return _PROX_EXTENSION; } const String& IndexFileNames::TERMS_EXTENSION() { static String _TERMS_EXTENSION(L"tis"); return _TERMS_EXTENSION; } const String& IndexFileNames::TERMS_INDEX_EXTENSION() { static String _TERMS_INDEX_EXTENSION(L"tii"); return _TERMS_INDEX_EXTENSION; } const String& IndexFileNames::FIELDS_INDEX_EXTENSION() { static String _FIELDS_INDEX_EXTENSION(L"fdx"); return _FIELDS_INDEX_EXTENSION; } const String& IndexFileNames::FIELDS_EXTENSION() { static String _FIELDS_EXTENSION(L"fdt"); return _FIELDS_EXTENSION; } const String& IndexFileNames::VECTORS_FIELDS_EXTENSION() { static String _VECTORS_FIELDS_EXTENSION(L"tvf"); return _VECTORS_FIELDS_EXTENSION; } const String& IndexFileNames::VECTORS_DOCUMENTS_EXTENSION() { static String _VECTORS_DOCUMENTS_EXTENSION(L"tvd"); return _VECTORS_DOCUMENTS_EXTENSION; } const String& IndexFileNames::VECTORS_INDEX_EXTENSION() { static String _VECTORS_INDEX_EXTENSION(L"tvx"); return _VECTORS_INDEX_EXTENSION; } const String& IndexFileNames::COMPOUND_FILE_EXTENSION() { static String _COMPOUND_FILE_EXTENSION(L"cfs"); return _COMPOUND_FILE_EXTENSION; } const String& IndexFileNames::COMPOUND_FILE_STORE_EXTENSION() { static String _COMPOUND_FILE_STORE_EXTENSION(L"cfx"); return _COMPOUND_FILE_STORE_EXTENSION; } const String& IndexFileNames::DELETES_EXTENSION() { static String _DELETES_EXTENSION(L"del"); return _DELETES_EXTENSION; } const String& IndexFileNames::FIELD_INFOS_EXTENSION() { static String _FIELD_INFOS_EXTENSION(L"fnm"); return _FIELD_INFOS_EXTENSION; } const String& IndexFileNames::PLAIN_NORMS_EXTENSION() { static String _PLAIN_NORMS_EXTENSION(L"f"); return _PLAIN_NORMS_EXTENSION; } const String& IndexFileNames::SEPARATE_NORMS_EXTENSION() { static String _SEPARATE_NORMS_EXTENSION(L"s"); return _SEPARATE_NORMS_EXTENSION; } const String& IndexFileNames::GEN_EXTENSION() { static String _GEN_EXTENSION(L"gen"); return _GEN_EXTENSION; } const HashSet IndexFileNames::INDEX_EXTENSIONS() { static HashSet _INDEX_EXTENSIONS; if (!_INDEX_EXTENSIONS) { _INDEX_EXTENSIONS = HashSet::newInstance(); _INDEX_EXTENSIONS.add(COMPOUND_FILE_EXTENSION()); _INDEX_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); _INDEX_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); _INDEX_EXTENSIONS.add(FIELDS_EXTENSION()); _INDEX_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); _INDEX_EXTENSIONS.add(TERMS_EXTENSION()); _INDEX_EXTENSIONS.add(FREQ_EXTENSION()); _INDEX_EXTENSIONS.add(PROX_EXTENSION()); _INDEX_EXTENSIONS.add(DELETES_EXTENSION()); _INDEX_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); _INDEX_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); _INDEX_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); _INDEX_EXTENSIONS.add(GEN_EXTENSION()); _INDEX_EXTENSIONS.add(NORMS_EXTENSION()); _INDEX_EXTENSIONS.add(COMPOUND_FILE_STORE_EXTENSION()); } return _INDEX_EXTENSIONS; }; const HashSet IndexFileNames::INDEX_EXTENSIONS_IN_COMPOUND_FILE() { static HashSet _INDEX_EXTENSIONS_IN_COMPOUND_FILE; if (!_INDEX_EXTENSIONS_IN_COMPOUND_FILE) { _INDEX_EXTENSIONS_IN_COMPOUND_FILE = HashSet::newInstance(); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELD_INFOS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELDS_INDEX_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELDS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(TERMS_INDEX_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(TERMS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FREQ_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(PROX_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_INDEX_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_DOCUMENTS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_FIELDS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(NORMS_EXTENSION()); } return _INDEX_EXTENSIONS_IN_COMPOUND_FILE; }; const HashSet IndexFileNames::STORE_INDEX_EXTENSIONS() { static HashSet _STORE_INDEX_EXTENSIONS; if (!_STORE_INDEX_EXTENSIONS) { _STORE_INDEX_EXTENSIONS = HashSet::newInstance(); _STORE_INDEX_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); _STORE_INDEX_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); _STORE_INDEX_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); _STORE_INDEX_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); _STORE_INDEX_EXTENSIONS.add(FIELDS_EXTENSION()); } return _STORE_INDEX_EXTENSIONS; }; const HashSet IndexFileNames::NON_STORE_INDEX_EXTENSIONS() { static HashSet _NON_STORE_INDEX_EXTENSIONS; if (!_NON_STORE_INDEX_EXTENSIONS) { _NON_STORE_INDEX_EXTENSIONS = HashSet::newInstance(); _NON_STORE_INDEX_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(FREQ_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(PROX_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(TERMS_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(NORMS_EXTENSION()); } return _NON_STORE_INDEX_EXTENSIONS; }; const HashSet IndexFileNames::COMPOUND_EXTENSIONS() { static HashSet _COMPOUND_EXTENSIONS; if (!_COMPOUND_EXTENSIONS) { _COMPOUND_EXTENSIONS = HashSet::newInstance(); _COMPOUND_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); _COMPOUND_EXTENSIONS.add(FREQ_EXTENSION()); _COMPOUND_EXTENSIONS.add(PROX_EXTENSION()); _COMPOUND_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); _COMPOUND_EXTENSIONS.add(FIELDS_EXTENSION()); _COMPOUND_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); _COMPOUND_EXTENSIONS.add(TERMS_EXTENSION()); } return _COMPOUND_EXTENSIONS; }; const HashSet IndexFileNames::VECTOR_EXTENSIONS() { static HashSet _VECTOR_EXTENSIONS; if (!_VECTOR_EXTENSIONS) { _VECTOR_EXTENSIONS = HashSet::newInstance(); _VECTOR_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); _VECTOR_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); _VECTOR_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); } return _VECTOR_EXTENSIONS; }; String IndexFileNames::fileNameFromGeneration(const String& base, const String& extension, int64_t gen) { if (gen == SegmentInfo::NO) return L""; else if (gen == SegmentInfo::WITHOUT_GEN) return base + extension; else return base + L"_" + StringUtils::toString(gen, StringUtils::CHARACTER_MAX_RADIX) + extension; } bool IndexFileNames::isDocStoreFile(const String& fileName) { if (boost::ends_with(fileName, COMPOUND_FILE_STORE_EXTENSION())) return true; for (HashSet::iterator index = STORE_INDEX_EXTENSIONS().begin(); index != STORE_INDEX_EXTENSIONS().end(); ++index) { if (boost::ends_with(fileName, *index)) return true; } return false; } String IndexFileNames::segmentFileName(const String& segmentName, const String& ext) { return segmentName + L"." + ext; } } LucenePlusPlus-rel_3.0.4/src/core/index/IndexReader.cpp000066400000000000000000000327031217574114600230440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "IndexReader.h" #include "_IndexReader.h" #include "DirectoryReader.h" #include "IndexDeletionPolicy.h" #include "FSDirectory.h" #include "FieldSelector.h" #include "Similarity.h" #include "CompoundFileReader.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t IndexReader::DEFAULT_TERMS_INDEX_DIVISOR = 1; IndexReader::IndexReader() { refCount = 1; closed = false; _hasChanges = false; } IndexReader::~IndexReader() { } int32_t IndexReader::getRefCount() { SyncLock syncLock(this); return refCount; } void IndexReader::incRef() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0); ensureOpen(); ++refCount; } void IndexReader::decRef() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0); ensureOpen(); if (refCount == 1) { commit(); doClose(); } --refCount; } void IndexReader::ensureOpen() { if (refCount <= 0) boost::throw_exception(AlreadyClosedException(L"this IndexReader is closed")); } IndexReaderPtr IndexReader::open(DirectoryPtr directory) { return open(directory, IndexDeletionPolicyPtr(), IndexCommitPtr(), true, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(DirectoryPtr directory, bool readOnly) { return open(directory, IndexDeletionPolicyPtr(), IndexCommitPtr(), readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(IndexCommitPtr commit, bool readOnly) { return open(commit->getDirectory(), IndexDeletionPolicyPtr(), commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, bool readOnly) { return open(directory, deletionPolicy, IndexCommitPtr(), readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) { return open(directory, deletionPolicy, IndexCommitPtr(), readOnly, termInfosIndexDivisor); } IndexReaderPtr IndexReader::open(IndexCommitPtr commit, IndexDeletionPolicyPtr deletionPolicy, bool readOnly) { return open(commit->getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(IndexCommitPtr commit, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) { return open(commit->getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); } IndexReaderPtr IndexReader::open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, IndexCommitPtr commit, bool readOnly, int32_t termInfosIndexDivisor) { return DirectoryReader::open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); } IndexReaderPtr IndexReader::reopen() { SyncLock syncLock(this); boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen().")); return IndexReaderPtr(); } IndexReaderPtr IndexReader::reopen(bool openReadOnly) { SyncLock syncLock(this); boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen().")); return IndexReaderPtr(); } IndexReaderPtr IndexReader::reopen(IndexCommitPtr commit) { SyncLock syncLock(this); boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen(IndexCommit).")); return IndexReaderPtr(); } LuceneObjectPtr IndexReader::clone(LuceneObjectPtr other) { SyncLock syncLock(this); if (!other) boost::throw_exception(UnsupportedOperationException(L"This reader does not implement clone().")); return other; } LuceneObjectPtr IndexReader::clone(bool openReadOnly, LuceneObjectPtr other) { SyncLock syncLock(this); if (!other) boost::throw_exception(UnsupportedOperationException(L"This reader does not implement clone(bool).")); return other; } DirectoryPtr IndexReader::directory() { ensureOpen(); boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return DirectoryPtr(); } int64_t IndexReader::lastModified(DirectoryPtr directory2) { return newLucene(newLucene(), directory2)->run(); } int64_t IndexReader::getCurrentVersion(DirectoryPtr directory) { return SegmentInfos::readCurrentVersion(directory); } MapStringString IndexReader::getCommitUserData(DirectoryPtr directory) { return SegmentInfos::readCurrentUserData(directory); } int64_t IndexReader::getVersion() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return 0; } MapStringString IndexReader::getCommitUserData() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return MapStringString(); } bool IndexReader::isCurrent() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return false; } bool IndexReader::isOptimized() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return false; } bool IndexReader::indexExists(DirectoryPtr directory) { return (SegmentInfos::getCurrentSegmentGeneration(directory) != -1); } int32_t IndexReader::numDeletedDocs() { return (maxDoc() - numDocs()); } DocumentPtr IndexReader::document(int32_t n) { ensureOpen(); return document(n, FieldSelectorPtr()); } bool IndexReader::hasChanges() { return _hasChanges; } bool IndexReader::hasNorms(const String& field) { // backward compatible implementation. // SegmentReader has an efficient implementation. ensureOpen(); return norms(field); } void IndexReader::setNorm(int32_t doc, const String& field, uint8_t value) { SyncLock syncLock(this); ensureOpen(); acquireWriteLock(); _hasChanges = true; doSetNorm(doc, field, value); } void IndexReader::setNorm(int32_t doc, const String& field, double value) { ensureOpen(); setNorm(doc, field, Similarity::encodeNorm(value)); } TermDocsPtr IndexReader::termDocs(TermPtr term) { ensureOpen(); TermDocsPtr _termDocs(termDocs()); _termDocs->seek(term); return _termDocs; } TermPositionsPtr IndexReader::termPositions(TermPtr term) { ensureOpen(); TermPositionsPtr _termPositions(termPositions()); _termPositions->seek(term); return _termPositions; } void IndexReader::deleteDocument(int32_t docNum) { SyncLock syncLock(this); ensureOpen(); acquireWriteLock(); _hasChanges = true; doDelete(docNum); } int32_t IndexReader::deleteDocuments(TermPtr term) { ensureOpen(); TermDocsPtr docs(termDocs(term)); if (!docs) return 0; int32_t n = 0; LuceneException finally; try { while (docs->next()) { deleteDocument(docs->doc()); ++n; } } catch (LuceneException& e) { finally = e; } docs->close(); finally.throwException(); return n; } void IndexReader::undeleteAll() { SyncLock syncLock(this); ensureOpen(); acquireWriteLock(); _hasChanges = true; doUndeleteAll(); } void IndexReader::acquireWriteLock() { SyncLock syncLock(this); // NOOP } void IndexReader::flush() { SyncLock syncLock(this); ensureOpen(); commit(); } void IndexReader::flush(MapStringString commitUserData) { SyncLock syncLock(this); ensureOpen(); commit(commitUserData); } void IndexReader::commit() { commit(MapStringString()); } void IndexReader::commit(MapStringString commitUserData) { SyncLock syncLock(this); if (_hasChanges) doCommit(commitUserData); _hasChanges = false; } void IndexReader::close() { SyncLock syncLock(this); if (!closed) { decRef(); closed = true; } } IndexCommitPtr IndexReader::getIndexCommit() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return IndexCommitPtr(); } void IndexReader::main(Collection args) { String filename; bool extract = false; for (Collection::iterator arg = args.begin(); arg != args.end(); ++arg) { if (*arg == L"-extract") extract = true; else if (filename.empty()) filename = *arg; } if (filename.empty()) { std::wcout << L"Usage: IndexReader [-extract] "; return; } DirectoryPtr dir; CompoundFileReaderPtr cfr; LuceneException finally; try { String dirname(FileUtils::extractPath(filename)); filename = FileUtils::extractPath(filename); dir = FSDirectory::open(dirname); cfr = newLucene(dir, filename); HashSet _files(cfr->listAll()); Collection files(Collection::newInstance(_files.begin(), _files.end())); std::sort(files.begin(), files.end()); // sort the array of filename so that the output is more readable for (Collection::iterator file = files.begin(); file != files.end(); ++file) { int64_t len = cfr->fileLength(*file); if (extract) { std::wcout << L"extract " << *file << L" with " << len << L" bytes to local directory..."; IndexInputPtr ii(cfr->openInput(*file)); std::ofstream f(StringUtils::toUTF8(*file).c_str(), std::ios::binary | std::ios::out); // read and write with a small buffer, which is more effective than reading byte by byte ByteArray buffer(ByteArray::newInstance(1024)); int32_t chunk = buffer.size(); while (len > 0) { int32_t bufLen = std::min(chunk, (int32_t)len); ii->readBytes(buffer.get(), 0, bufLen); f.write((char*)buffer.get(), bufLen); len -= bufLen; } ii->close(); } else std::wcout << *file << L": " << len << " bytes\n"; } } catch (LuceneException& e) { finally = e; } if (dir) dir->close(); if (cfr) cfr->close(); finally.throwException(); } Collection IndexReader::listCommits(DirectoryPtr dir) { return DirectoryReader::listCommits(dir); } Collection IndexReader::getSequentialSubReaders() { return Collection(); // override } LuceneObjectPtr IndexReader::getFieldCacheKey() { return shared_from_this(); } LuceneObjectPtr IndexReader::getDeletesCacheKey() { return shared_from_this(); } int64_t IndexReader::getUniqueTermCount() { boost::throw_exception(UnsupportedOperationException(L"This reader does not implement getUniqueTermCount()")); return 0; } int32_t IndexReader::getTermInfosIndexDivisor() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return 0; } FindSegmentsModified::FindSegmentsModified(SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFileT(infos, directory) { result = 0; } FindSegmentsModified::~FindSegmentsModified() { } uint64_t FindSegmentsModified::doBody(const String& segmentFileName) { return directory->fileModified(segmentFileName); } } LucenePlusPlus-rel_3.0.4/src/core/index/IndexWriter.cpp000066400000000000000000004102571217574114600231220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexWriter.h" #include "_IndexWriter.h" #include "Directory.h" #include "Analyzer.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "DocumentsWriter.h" #include "IndexFileDeleter.h" #include "IndexFileNames.h" #include "Lock.h" #include "SegmentInfo.h" #include "SegmentReader.h" #include "ReadOnlyDirectoryReader.h" #include "BufferedIndexInput.h" #include "LogByteSizeMergePolicy.h" #include "LogDocMergePolicy.h" #include "Similarity.h" #include "ConcurrentMergeScheduler.h" #include "CompoundFileWriter.h" #include "SegmentMerger.h" #include "DateTools.h" #include "Constants.h" #include "InfoStream.h" #include "TestPoint.h" #include "StringUtils.h" namespace Lucene { /// The normal read buffer size defaults to 1024, but increasing this during merging seems to /// yield performance gains. However we don't want to increase it too much because there are /// quite a few BufferedIndexInputs created during merging. const int32_t IndexWriter::MERGE_READ_BUFFER_SIZE = 4096; int32_t IndexWriter::MESSAGE_ID = 0; InfoStreamPtr IndexWriter::defaultInfoStream; /// Default value for the write lock timeout (1,000). int64_t IndexWriter::WRITE_LOCK_TIMEOUT = 1000; const String IndexWriter::WRITE_LOCK_NAME = L"write.lock"; /// Value to denote a flush trigger is disabled. const int32_t IndexWriter::DISABLE_AUTO_FLUSH = -1; /// Disabled by default (because IndexWriter flushes by RAM usage by default). const int32_t IndexWriter::DEFAULT_MAX_BUFFERED_DOCS = IndexWriter::DISABLE_AUTO_FLUSH; /// Default value is 16 MB (which means flush when buffered docs consume 16 MB RAM). const double IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB = 16.0; /// Disabled by default (because IndexWriter flushes by RAM usage by default). const int32_t IndexWriter::DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriter::DISABLE_AUTO_FLUSH; /// Default value is 10000. const int32_t IndexWriter::DEFAULT_MAX_FIELD_LENGTH = 10000; /// Default value is 128. const int32_t IndexWriter::DEFAULT_TERM_INDEX_INTERVAL = 128; /// Sets the maximum field length to INT_MAX const int32_t IndexWriter::MaxFieldLengthUNLIMITED = INT_MAX; /// Sets the maximum field length to {@link #DEFAULT_MAX_FIELD_LENGTH} const int32_t IndexWriter::MaxFieldLengthLIMITED = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) { this->directory = d; this->analyzer = a; this->create = create; this->maxFieldLength = mfl; } IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, int32_t mfl) { this->directory = d; this->analyzer = a; this->create = !IndexReader::indexExists(d); this->maxFieldLength = mfl; } IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl) { this->directory = d; this->analyzer = a; this->deletionPolicy = deletionPolicy; this->create = !IndexReader::indexExists(d); this->maxFieldLength = mfl; } IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl) { this->directory = d; this->analyzer = a; this->create = create; this->deletionPolicy = deletionPolicy; this->maxFieldLength = mfl; } IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl, IndexingChainPtr indexingChain, IndexCommitPtr commit) { this->directory = d; this->analyzer = a; this->create = create; this->deletionPolicy = deletionPolicy; this->maxFieldLength = mfl; this->indexingChain = indexingChain; this->indexCommit = commit; } IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl, IndexCommitPtr commit) { this->directory = d; this->analyzer = a; this->create = false; this->deletionPolicy = deletionPolicy; this->maxFieldLength = mfl; this->indexCommit = commit; } IndexWriter::~IndexWriter() { } void IndexWriter::initialize() { messageID = -1; messageIDLock = newInstance(); setMessageID(defaultInfoStream); this->writeLockTimeout = WRITE_LOCK_TIMEOUT; this->segmentInfos = newLucene(); pendingMerges = Collection::newInstance(); mergeExceptions = Collection::newInstance(); segmentsToOptimize = SetSegmentInfo::newInstance(); optimizeMaxNumSegments = 0; mergingSegments = SetSegmentInfo::newInstance(); runningMerges = SetOneMerge::newInstance(); synced = HashSet::newInstance(); syncing = HashSet::newInstance(); changeCount = 0; lastCommitChangeCount = 0; poolReaders = false; readCount = 0; writeThread = 0; upgradeCount = 0; readerTermsIndexDivisor = IndexReader::DEFAULT_TERMS_INDEX_DIVISOR; readerPool = newLucene(shared_from_this()); closed = false; closing = false; hitOOM = false; stopMerges = false; mergeGen = 0; flushCount = 0; flushDeletesCount = 0; localFlushedDocCount = 0; pendingCommitChangeCount = 0; mergePolicy = newLucene(shared_from_this()); mergeScheduler = newLucene(); similarity = Similarity::getDefault(); termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; commitLock = newInstance(); if (!indexingChain) indexingChain = DocumentsWriter::getDefaultIndexingChain(); if (create) directory->clearLock(WRITE_LOCK_NAME); // clear the write lock in case it's leftover LockPtr writeLock(directory->makeLock(WRITE_LOCK_NAME)); if (!writeLock->obtain((int32_t)writeLockTimeout)) // obtain write lock boost::throw_exception(LockObtainFailedException(L"Index locked for write: " + writeLock->toString())); this->writeLock = writeLock; bool success = false; LuceneException finally; try { if (create) { // Try to read first. This is to allow create against an index that's currently open for // searching. In this case we write the next segments_N file with no segments bool doCommit; try { segmentInfos->read(directory); segmentInfos->clear(); doCommit = false; } catch (LuceneException&) { // Likely this means it's a fresh directory doCommit = true; } if (doCommit) { // Only commit if there is no segments file in this dir already. segmentInfos->commit(directory); HashSet files(segmentInfos->files(directory, true)); synced.addAll(files.begin(), files.end()); } else { // Record that we have a change (zero out all segments) pending ++changeCount; } } else { segmentInfos->read(directory); if (indexCommit) { // Swap out all segments, but, keep metadata in SegmentInfos, like version & generation, to // preserve write-once. This is important if readers are open against the future commit points. if (indexCommit->getDirectory() != directory) boost::throw_exception(IllegalArgumentException(L"IndexCommit's directory doesn't match my directory")); SegmentInfosPtr oldInfos(newLucene()); oldInfos->read(directory, indexCommit->getSegmentsFileName()); segmentInfos->replace(oldInfos); ++changeCount; if (infoStream) message(L"init: loaded commit \"" + indexCommit->getSegmentsFileName() + L"\""); } // We assume that this segments_N was previously properly sync'd HashSet files(segmentInfos->files(directory, true)); synced.addAll(files.begin(), files.end()); } setRollbackSegmentInfos(segmentInfos); docWriter = newLucene(directory, shared_from_this(), indexingChain); docWriter->setInfoStream(infoStream); docWriter->setMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is KeepOnlyLastCommitDeleter deleter = newLucene(directory, deletionPolicy ? deletionPolicy : newLucene(), segmentInfos, infoStream, docWriter, synced); if (deleter->startingCommitDeleted) { // Deletion policy deleted the "head" commit point. We have to mark ourself as changed so that if we // are closed without any further changes we write a new segments_N file. ++changeCount; } pushMaxBufferedDocs(); if (infoStream) message(L"init: create=" + StringUtils::toString(create)); messageState(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) message(L"init: hit exception on init; releasing write lock"); try { this->writeLock->release(); } catch (...) { // don't mask the original exception } this->writeLock.reset(); } finally.throwException(); } int32_t IndexWriter::MAX_TERM_LENGTH() { static int32_t _MAX_TERM_LENGTH = 0; if (_MAX_TERM_LENGTH == 0) _MAX_TERM_LENGTH = DocumentsWriter::MAX_TERM_LENGTH; return _MAX_TERM_LENGTH; } IndexReaderPtr IndexWriter::getReader() { return getReader(readerTermsIndexDivisor); } IndexReaderPtr IndexWriter::getReader(int32_t termInfosIndexDivisor) { ensureOpen(); if (infoStream) message(L"flush at getReader"); // Do this up front before flushing so that the readers obtained during this flush are pooled, the first time // this method is called poolReaders = true; // Prevent segmentInfos from changing while opening the reader; in theory we could do similar retry logic, // just like we do when loading segments_N IndexReaderPtr r; { SyncLock syncLock(this); flush(false, true, true); r = newLucene(shared_from_this(), segmentInfos, termInfosIndexDivisor); } maybeMerge(); return r; } int32_t IndexWriter::numDeletedDocs(SegmentInfoPtr info) { SegmentReaderPtr reader(readerPool->getIfExists(info)); int32_t deletedDocs = 0; LuceneException finally; try { deletedDocs = reader ? reader->numDeletedDocs() : info->getDelCount(); } catch (LuceneException& e) { finally = e; } if (reader) readerPool->release(reader); finally.throwException(); return deletedDocs; } void IndexWriter::acquireWrite() { SyncLock syncLock(this); BOOST_ASSERT(writeThread != LuceneThread::currentId()); while (writeThread != 0 || readCount > 0) doWait(); // we could have been closed while we were waiting ensureOpen(); writeThread = LuceneThread::currentId(); } void IndexWriter::releaseWrite() { SyncLock syncLock(this); BOOST_ASSERT(writeThread == LuceneThread::currentId()); writeThread = 0; notifyAll(); } void IndexWriter::acquireRead() { SyncLock syncLock(this); int64_t current = LuceneThread::currentId(); while (writeThread != 0 && writeThread != current) doWait(); ++readCount; } void IndexWriter::upgradeReadToWrite() { SyncLock syncLock(this); BOOST_ASSERT(readCount > 0); ++upgradeCount; while (readCount > upgradeCount || writeThread != 0) doWait(); writeThread = LuceneThread::currentId(); --readCount; --upgradeCount; } void IndexWriter::releaseRead() { SyncLock syncLock(this); --readCount; BOOST_ASSERT(readCount >= 0); notifyAll(); } bool IndexWriter::isOpen(bool includePendingClose) { SyncLock syncLock(this); return !(closed || (includePendingClose && closing)); } void IndexWriter::ensureOpen(bool includePendingClose) { SyncLock syncLock(this); if (!isOpen(includePendingClose)) boost::throw_exception(AlreadyClosedException(L"This IndexWriter is closed")); } void IndexWriter::ensureOpen() { ensureOpen(true); } void IndexWriter::message(const String& message) { if (infoStream) { *infoStream << L"IW " << StringUtils::toString(messageID); *infoStream << L" [" << DateTools::timeToString(MiscUtils::currentTimeMillis(), DateTools::RESOLUTION_SECOND); *infoStream << L"; " << StringUtils::toString(LuceneThread::currentId()) << L"]: " << message << L"\n"; } } void IndexWriter::setMessageID(InfoStreamPtr infoStream) { SyncLock syncLock(this); if (infoStream && messageID == -1) { SyncLock messageLock(messageIDLock); messageID = MESSAGE_ID++; } this->infoStream = infoStream; } LogMergePolicyPtr IndexWriter::getLogMergePolicy() { LogMergePolicyPtr logMergePolicy(boost::dynamic_pointer_cast(mergePolicy)); if (logMergePolicy) return logMergePolicy; boost::throw_exception(IllegalArgumentException(L"This method can only be called when the merge policy is the default LogMergePolicy")); return LogMergePolicyPtr(); } bool IndexWriter::getUseCompoundFile() { return getLogMergePolicy()->getUseCompoundFile(); } void IndexWriter::setUseCompoundFile(bool value) { getLogMergePolicy()->setUseCompoundFile(value); getLogMergePolicy()->setUseCompoundDocStore(value); } void IndexWriter::setSimilarity(SimilarityPtr similarity) { ensureOpen(); this->similarity = similarity; docWriter->setSimilarity(similarity); } SimilarityPtr IndexWriter::getSimilarity() { ensureOpen(); return this->similarity; } void IndexWriter::setTermIndexInterval(int32_t interval) { ensureOpen(); this->termIndexInterval = interval; } int32_t IndexWriter::getTermIndexInterval() { // We pass false because this method is called by SegmentMerger while we are in the process of closing ensureOpen(false); return termIndexInterval; } void IndexWriter::setRollbackSegmentInfos(SegmentInfosPtr infos) { SyncLock syncLock(this); rollbackSegmentInfos = boost::dynamic_pointer_cast(infos->clone()); BOOST_ASSERT(!rollbackSegmentInfos->hasExternalSegments(directory)); rollbackSegments = MapSegmentInfoInt::newInstance(); int32_t size = rollbackSegmentInfos->size(); for (int32_t i = 0; i < size; ++i) rollbackSegments.put(rollbackSegmentInfos->info(i), i); } void IndexWriter::setMergePolicy(MergePolicyPtr mp) { ensureOpen(); if (!mp) boost::throw_exception(NullPointerException(L"MergePolicy must be non-null")); if (mergePolicy != mp) mergePolicy->close(); mergePolicy = mp; pushMaxBufferedDocs(); if (infoStream) message(L"setMergePolicy"); } MergePolicyPtr IndexWriter::getMergePolicy() { ensureOpen(); return mergePolicy; } void IndexWriter::setMergeScheduler(MergeSchedulerPtr mergeScheduler) { SyncLock syncLock(this); ensureOpen(); if (!mergeScheduler) boost::throw_exception(NullPointerException(L"MergeScheduler must be non-null")); if (this->mergeScheduler != mergeScheduler) { finishMerges(true); this->mergeScheduler->close(); } this->mergeScheduler = mergeScheduler; if (infoStream) message(L"setMergeScheduler"); } MergeSchedulerPtr IndexWriter::getMergeScheduler() { ensureOpen(); return mergeScheduler; } void IndexWriter::setMaxMergeDocs(int32_t maxMergeDocs) { getLogMergePolicy()->setMaxMergeDocs(maxMergeDocs); } int32_t IndexWriter::getMaxMergeDocs() { return getLogMergePolicy()->getMaxMergeDocs(); } void IndexWriter::setMaxFieldLength(int32_t maxFieldLength) { ensureOpen(); this->maxFieldLength = maxFieldLength; docWriter->setMaxFieldLength(maxFieldLength); if (infoStream) message(L"setMaxFieldLength " + StringUtils::toString(maxFieldLength)); } int32_t IndexWriter::getMaxFieldLength() { ensureOpen(); return maxFieldLength; } void IndexWriter::setReaderTermsIndexDivisor(int32_t divisor) { ensureOpen(); if (divisor <= 0) boost::throw_exception(IllegalArgumentException(L"divisor must be >= 1 (got " + StringUtils::toString(divisor) + L")")); readerTermsIndexDivisor = divisor; if (infoStream) message(L"setReaderTermsIndexDivisor " + StringUtils::toString(readerTermsIndexDivisor)); } int32_t IndexWriter::getReaderTermsIndexDivisor() { ensureOpen(); return readerTermsIndexDivisor; } void IndexWriter::setMaxBufferedDocs(int32_t maxBufferedDocs) { ensureOpen(); if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) boost::throw_exception(IllegalArgumentException(L"maxBufferedDocs must at least be 2 when enabled")); if (maxBufferedDocs == DISABLE_AUTO_FLUSH && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) boost::throw_exception(IllegalArgumentException(L"at least one of ramBufferSize and maxBufferedDocs must be enabled")); docWriter->setMaxBufferedDocs(maxBufferedDocs); pushMaxBufferedDocs(); if (infoStream) message(L"setMaxBufferedDocs " + StringUtils::toString(maxBufferedDocs)); } void IndexWriter::pushMaxBufferedDocs() { if (docWriter->getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) { LogDocMergePolicyPtr lmp(boost::dynamic_pointer_cast(mergePolicy)); if (lmp) { int32_t maxBufferedDocs = docWriter->getMaxBufferedDocs(); if (lmp->getMinMergeDocs() != maxBufferedDocs) { if (infoStream) message(L"now push maxBufferedDocs " + StringUtils::toString(maxBufferedDocs) + L" to LogDocMergePolicy"); lmp->setMinMergeDocs(maxBufferedDocs); } } } } int32_t IndexWriter::getMaxBufferedDocs() { ensureOpen(); return docWriter->getMaxBufferedDocs(); } void IndexWriter::setRAMBufferSizeMB(double mb) { if (mb > 2048.0) boost::throw_exception(IllegalArgumentException(L"ramBufferSize " + StringUtils::toString(mb) + L" is too large; should be comfortably less than 2048")); if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) boost::throw_exception(IllegalArgumentException(L"ramBufferSize should be > 0.0 MB when enabled")); if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH) boost::throw_exception(IllegalArgumentException(L"at least one of ramBufferSize and maxBufferedDocs must be enabled")); docWriter->setRAMBufferSizeMB(mb); if (infoStream) message(L"setRAMBufferSizeMB " + StringUtils::toString(mb)); } double IndexWriter::getRAMBufferSizeMB() { return docWriter->getRAMBufferSizeMB(); } void IndexWriter::setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms) { ensureOpen(); if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) boost::throw_exception(IllegalArgumentException(L"maxBufferedDeleteTerms must at least be 1 when enabled")); docWriter->setMaxBufferedDeleteTerms(maxBufferedDeleteTerms); if (infoStream) message(L"setMaxBufferedDeleteTerms " + StringUtils::toString(maxBufferedDeleteTerms)); } int32_t IndexWriter::getMaxBufferedDeleteTerms() { ensureOpen(); return docWriter->getMaxBufferedDeleteTerms(); } void IndexWriter::setMergeFactor(int32_t mergeFactor) { getLogMergePolicy()->setMergeFactor(mergeFactor); } int32_t IndexWriter::getMergeFactor() { return getLogMergePolicy()->getMergeFactor(); } void IndexWriter::setDefaultInfoStream(InfoStreamPtr infoStream) { IndexWriter::defaultInfoStream = infoStream; } InfoStreamPtr IndexWriter::getDefaultInfoStream() { return IndexWriter::defaultInfoStream; } void IndexWriter::setInfoStream(InfoStreamPtr infoStream) { ensureOpen(); setMessageID(infoStream); docWriter->setInfoStream(infoStream); deleter->setInfoStream(infoStream); messageState(); } void IndexWriter::messageState() { if (infoStream) { message(L"ramBufferSizeMB=" + StringUtils::toString(docWriter->getRAMBufferSizeMB()) + L" maxBufferedDocs=" + StringUtils::toString(docWriter->getMaxBufferedDocs()) + L" maxBuffereDeleteTerms=" + StringUtils::toString(docWriter->getMaxBufferedDeleteTerms()) + L" maxFieldLength=" + StringUtils::toString(maxFieldLength) + L" index=" + segString()); } } InfoStreamPtr IndexWriter::getInfoStream() { ensureOpen(); return infoStream; } bool IndexWriter::verbose() { return infoStream; } void IndexWriter::setWriteLockTimeout(int64_t writeLockTimeout) { ensureOpen(); this->writeLockTimeout = writeLockTimeout; } int64_t IndexWriter::getWriteLockTimeout() { ensureOpen(); return writeLockTimeout; } void IndexWriter::setDefaultWriteLockTimeout(int64_t writeLockTimeout) { IndexWriter::WRITE_LOCK_TIMEOUT = writeLockTimeout; } int64_t IndexWriter::getDefaultWriteLockTimeout() { return IndexWriter::WRITE_LOCK_TIMEOUT; } void IndexWriter::close() { close(true); } void IndexWriter::close(bool waitForMerges) { // Ensure that only one thread actually gets to do the closing if (shouldClose()) { // If any methods have hit std::bad_alloc, then abort on close, in case the internal state of IndexWriter // or DocumentsWriter is corrupt if (hitOOM) rollbackInternal(); else closeInternal(waitForMerges); } } bool IndexWriter::shouldClose() { SyncLock syncLock(this); while (true) { if (!closed) { if (!closing) { closing = true; return true; } else { // Another thread is presently trying to close; wait until it finishes one way (closes // successfully) or another (fails to close) doWait(); } } else return false; } } void IndexWriter::closeInternal(bool waitForMerges) { docWriter->pauseAllThreads(); LuceneException finally; try { if (infoStream) message(L"now flush at close"); docWriter->close(); // Only allow a new merge to be triggered if we are going to wait for merges if (!hitOOM) flush(waitForMerges, true, true); // Give merge scheduler last chance to run, in case any pending merges are waiting if (waitForMerges) mergeScheduler->merge(shared_from_this()); mergePolicy->close(); finishMerges(waitForMerges); stopMerges = true; mergeScheduler->close(); if (infoStream) message(L"now call final commit()"); if (!hitOOM) commit(0); if (infoStream) message(L"at close: " + segString()); { SyncLock syncLock(this); readerPool->close(); docWriter.reset(); deleter->close(); } if (writeLock) { writeLock->release(); // release write lock writeLock.reset(); } { SyncLock syncLock(this); closed = true; } } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"closeInternal"); } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); closing = false; notifyAll(); if (!closed) { if (docWriter) docWriter->resumeAllThreads(); if (infoStream) message(L"hit exception while closing"); } } finally.throwException(); } bool IndexWriter::flushDocStores() { SyncLock syncLock(this); if (infoStream) message(L"flushDocStores segment=" + docWriter->getDocStoreSegment()); bool useCompoundDocStore = false; if (infoStream) message(L"closeDocStores segment=" + docWriter->getDocStoreSegment()); String docStoreSegment; bool success = false; LuceneException finally; try { docStoreSegment = docWriter->closeDocStore(); success = true; } catch (LuceneException& e) { finally = e; } if (!success && infoStream) message(L"hit exception closing doc store segment"); finally.throwException(); if (infoStream) message(L"flushDocStores files=" + docWriter->closedFiles()); useCompoundDocStore = mergePolicy->useCompoundDocStore(segmentInfos); HashSet closedFiles(docWriter->closedFiles()); if (useCompoundDocStore && !docStoreSegment.empty() && !closedFiles.empty()) { // Now build compound doc store file if (infoStream) message(L"create compound file " + docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); success = false; int32_t numSegments = segmentInfos->size(); String compoundFileName(docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); try { CompoundFileWriterPtr cfsWriter(newLucene(directory, compoundFileName)); for (HashSet::iterator file = closedFiles.begin(); file != closedFiles.end(); ++file) cfsWriter->addFile(*file); // Perform the merge cfsWriter->close(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) message(L"hit exception building compound file doc store for segment " + docStoreSegment); deleter->deleteFile(compoundFileName); docWriter->abort(); } finally.throwException(); for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr si(segmentInfos->info(i)); if (si->getDocStoreOffset() != -1 && si->getDocStoreSegment() == docStoreSegment) si->setDocStoreIsCompoundFile(true); } checkpoint(); // In case the files we just merged into a CFS were not previously checkpointed deleter->deleteNewFiles(docWriter->closedFiles()); } return useCompoundDocStore; } DirectoryPtr IndexWriter::getDirectory() { ensureOpen(false); // Pass false because the flush during closing calls getDirectory return directory; } AnalyzerPtr IndexWriter::getAnalyzer() { ensureOpen(); return analyzer; } int32_t IndexWriter::maxDoc() { SyncLock syncLock(this); int32_t count = docWriter ? docWriter->getNumDocsInRAM() : 0; for (int32_t i = 0; i < segmentInfos->size(); ++i) count += segmentInfos->info(i)->docCount; return count; } int32_t IndexWriter::numDocs() { SyncLock syncLock(this); int32_t count = docWriter ? docWriter->getNumDocsInRAM() : 0; for (int32_t i = 0; i < segmentInfos->size(); ++i) { SegmentInfoPtr info(segmentInfos->info(i)); count += info->docCount - info->getDelCount(); } return count; } bool IndexWriter::hasDeletions() { SyncLock syncLock(this); ensureOpen(); if (docWriter->hasDeletes()) return true; for (int32_t i = 0; i < segmentInfos->size(); ++i) { if (segmentInfos->info(i)->hasDeletions()) return true; } return false; } void IndexWriter::addDocument(DocumentPtr doc) { addDocument(doc, analyzer); } void IndexWriter::addDocument(DocumentPtr doc, AnalyzerPtr analyzer) { ensureOpen(); bool doFlush = false; bool success = false; try { LuceneException finally; try { doFlush = docWriter->addDocument(doc, analyzer); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) message(L"hit exception adding document"); { SyncLock syncLock(this); // If docWriter has some aborted files that were never incref'd, then we clean them up here if (docWriter) { HashSet files(docWriter->abortedFiles()); if (files) deleter->deleteNewFiles(files); } } } finally.throwException(); if (doFlush) flush(true, false, false); } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"addDocument")); } } void IndexWriter::deleteDocuments(TermPtr term) { ensureOpen(); try { bool doFlush = docWriter->bufferDeleteTerm(term); if (doFlush) flush(true, false, false); } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"deleteDocuments(Term)")); } } void IndexWriter::deleteDocuments(Collection terms) { ensureOpen(); try { bool doFlush = docWriter->bufferDeleteTerms(terms); if (doFlush) flush(true, false, false); } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"deleteDocuments(VectorTerm)")); } } void IndexWriter::deleteDocuments(QueryPtr query) { ensureOpen(); bool doFlush = docWriter->bufferDeleteQuery(query); if (doFlush) flush(true, false, false); } void IndexWriter::deleteDocuments(Collection queries) { ensureOpen(); bool doFlush = docWriter->bufferDeleteQueries(queries); if (doFlush) flush(true, false, false); } void IndexWriter::updateDocument(TermPtr term, DocumentPtr doc) { ensureOpen(); updateDocument(term, doc, getAnalyzer()); } void IndexWriter::updateDocument(TermPtr term, DocumentPtr doc, AnalyzerPtr analyzer) { ensureOpen(); try { bool doFlush = false; bool success = false; LuceneException finally; try { doFlush = docWriter->updateDocument(term, doc, analyzer); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) message(L"hit exception updating document"); { SyncLock syncLock(this); // If docWriter has some aborted files that were never incref'd, then we clean them up here if (docWriter) { HashSet files(docWriter->abortedFiles()); if (files) deleter->deleteNewFiles(files); } } } finally.throwException(); if (doFlush) flush(true, false, false); } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"updateDocument")); } } int32_t IndexWriter::getSegmentCount() { SyncLock syncLock(this); return segmentInfos->size(); } int32_t IndexWriter::getNumBufferedDocuments() { SyncLock syncLock(this); return docWriter->getNumDocsInRAM(); } int32_t IndexWriter::getDocCount(int32_t i) { SyncLock syncLock(this); return (i >= 0 && i < segmentInfos->size()) ? segmentInfos->info(i)->docCount : -1; } int32_t IndexWriter::getFlushCount() { SyncLock syncLock(this); return flushCount; } int32_t IndexWriter::getFlushDeletesCount() { SyncLock syncLock(this); return flushDeletesCount; } String IndexWriter::newSegmentName() { // Cannot synchronize on IndexWriter because that causes deadlock SyncLock segmentLock(segmentInfos); // Important to increment changeCount so that the segmentInfos is written on close. // Otherwise we could close, re-open and re-return the same segment name that was // previously returned which can cause problems at least with ConcurrentMergeScheduler. ++changeCount; return L"_" + StringUtils::toString(segmentInfos->counter++, StringUtils::CHARACTER_MAX_RADIX); } void IndexWriter::optimize() { optimize(true); } void IndexWriter::optimize(int32_t maxNumSegments) { optimize(maxNumSegments, true); } void IndexWriter::optimize(bool doWait) { optimize(1, doWait); } void IndexWriter::optimize(int32_t maxNumSegments, bool doWait) { ensureOpen(); if (maxNumSegments < 1) boost::throw_exception(IllegalArgumentException(L"maxNumSegments must be >= 1; got " + StringUtils::toString(maxNumSegments))); if (infoStream) message(L"optimize: index now " + segString()); flush(true, false, true); { SyncLock syncLock(this); resetMergeExceptions(); segmentsToOptimize.clear(); optimizeMaxNumSegments = maxNumSegments; int32_t numSegments = segmentInfos->size(); for (int32_t i = 0; i < numSegments; ++i) segmentsToOptimize.add(segmentInfos->info(i)); // Now mark all pending & running merges as optimize merge for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { (*merge)->optimize = true; (*merge)->maxNumSegmentsOptimize = maxNumSegments; } for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) { (*merge)->optimize = true; (*merge)->maxNumSegmentsOptimize = maxNumSegments; } } maybeMerge(maxNumSegments, true); if (doWait) { { SyncLock syncLock(this); while (true) { if (hitOOM) boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete optimize")); if (!mergeExceptions.empty()) { // Forward any exceptions in background merge threads to the current thread for (Collection::iterator merge = mergeExceptions.begin(); merge != mergeExceptions.end(); ++merge) { if ((*merge)->optimize) { LuceneException err = (*merge)->getException(); if (!err.isNull()) boost::throw_exception(IOException(L"background merge hit exception: " + (*merge)->segString(directory))); } } } if (optimizeMergesPending()) IndexWriter::doWait(); else break; } } // If close is called while we are still running, throw an exception so the calling thread will know the // optimize did not complete ensureOpen(); } // NOTE: in the ConcurrentMergeScheduler case, when doWait is false, we can return immediately while background // threads accomplish the optimization } bool IndexWriter::optimizeMergesPending() { SyncLock syncLock(this); for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { if ((*merge)->optimize) return true; } for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) { if ((*merge)->optimize) return true; } return false; } void IndexWriter::expungeDeletes(bool doWait) { ensureOpen(); if (infoStream) message(L"expungeDeletes: index now " + segString()); MergeSpecificationPtr spec; { SyncLock syncLock(this); spec = mergePolicy->findMergesToExpungeDeletes(segmentInfos); for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) registerMerge(*merge); } mergeScheduler->merge(shared_from_this()); if (doWait) { { SyncLock syncLock(this); bool running = true; while (running) { if (hitOOM) boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete expungeDeletes")); // Check each merge that MergePolicy asked us to do, to see if any of them are still running and // if any of them have hit an exception. running = false; for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { if (pendingMerges.contains(*merge) || runningMerges.contains(*merge)) running = true; LuceneException err = (*merge)->getException(); if (!err.isNull()) boost::throw_exception(IOException(L"background merge hit exception: " + (*merge)->segString(directory))); } // If any of our merges are still running, wait if (running) IndexWriter::doWait(); } } } // NOTE: in the ConcurrentMergeScheduler case, when doWait is false, we can return immediately while background // threads accomplish the optimization } void IndexWriter::expungeDeletes() { expungeDeletes(true); } void IndexWriter::maybeMerge() { maybeMerge(false); } void IndexWriter::maybeMerge(bool optimize) { maybeMerge(1, optimize); } void IndexWriter::maybeMerge(int32_t maxNumSegmentsOptimize, bool optimize) { updatePendingMerges(maxNumSegmentsOptimize, optimize); mergeScheduler->merge(shared_from_this()); } void IndexWriter::updatePendingMerges(int32_t maxNumSegmentsOptimize, bool optimize) { SyncLock syncLock(this); BOOST_ASSERT(!optimize || maxNumSegmentsOptimize > 0); if (stopMerges) return; // Do not start new merges if we've hit std::bad_alloc if (hitOOM) return; MergeSpecificationPtr spec; if (optimize) { spec = mergePolicy->findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize); if (spec) { for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { (*merge)->optimize = true; (*merge)->maxNumSegmentsOptimize = maxNumSegmentsOptimize; } } } else spec = mergePolicy->findMerges(segmentInfos); if (spec) { for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) registerMerge(*merge); } } OneMergePtr IndexWriter::getNextMerge() { SyncLock syncLock(this); if (pendingMerges.empty()) return OneMergePtr(); else { // Advance the merge from pending to running OneMergePtr merge(pendingMerges.removeFirst()); runningMerges.add(merge); return merge; } } OneMergePtr IndexWriter::getNextExternalMerge() { SyncLock syncLock(this); if (pendingMerges.empty()) return OneMergePtr(); else { for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { if ((*merge)->isExternal) { // Advance the merge from pending to running OneMergePtr running(*merge); runningMerges.add(*merge); pendingMerges.remove(merge); return running; } } } // All existing merges do not involve external segments return OneMergePtr(); } void IndexWriter::startTransaction(bool haveReadLock) { SyncLock syncLock(this); bool success = false; LuceneException finally; try { if (infoStream) message(L"now start transaction"); BOOST_ASSERT(docWriter->getNumBufferedDeleteTerms() == 0); // calling startTransaction with buffered delete terms not supported BOOST_ASSERT(docWriter->getNumDocsInRAM() == 0); // calling startTransaction with buffered documents not supported ensureOpen(); // If a transaction is trying to roll back (because addIndexes hit an exception) then wait here until that's done while (stopMerges) doWait(); success = true; } catch (LuceneException& e) { finally = e; } // Release the write lock if our caller held it, on hitting an exception if (!success && haveReadLock) releaseRead(); finally.throwException(); if (haveReadLock) upgradeReadToWrite(); else acquireWrite(); success = false; try { localRollbackSegmentInfos = boost::dynamic_pointer_cast(segmentInfos->clone()); BOOST_ASSERT(!hasExternalSegments()); localFlushedDocCount = docWriter->getFlushedDocCount(); // We must "protect" our files at this point from deletion in case we need to rollback deleter->incRef(segmentInfos, false); success = true; } catch (LuceneException& e) { finally = e; } if (!success) finishAddIndexes(); finally.throwException(); } void IndexWriter::rollbackTransaction() { SyncLock syncLock(this); if (infoStream) message(L"now rollback transaction"); if (docWriter) docWriter->setFlushedDocCount(localFlushedDocCount); // Must finish merges before rolling back segmentInfos so merges don't hit exceptions on trying to commit // themselves, don't get files deleted out from under them, etc. finishMerges(false); // Keep the same segmentInfos instance but replace all of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter will always write to a new generation ("write once"). segmentInfos->clear(); segmentInfos->addAll(localRollbackSegmentInfos); localRollbackSegmentInfos.reset(); // This must come after we rollback segmentInfos, so that if a commit() kicks off it does not see the // segmentInfos with external segments. finishAddIndexes(); // Ask deleter to locate unreferenced files we had created & remove them deleter->checkpoint(segmentInfos, false); // Remove the incRef we did in startTransaction deleter->decRef(segmentInfos); // Also ask deleter to remove any newly created files that were never incref'd; this "garbage" is created // when a merge kicks off but aborts part way through before it had a chance to incRef the files it had // partially created deleter->refresh(); notifyAll(); BOOST_ASSERT(!hasExternalSegments()); } void IndexWriter::commitTransaction() { SyncLock syncLock(this); if (infoStream) message(L"now commit transaction"); // Give deleter a chance to remove files now checkpoint(); // Remove the incRef we did in startTransaction. deleter->decRef(localRollbackSegmentInfos); localRollbackSegmentInfos.reset(); BOOST_ASSERT(!hasExternalSegments()); finishAddIndexes(); } void IndexWriter::rollback() { ensureOpen(); // Ensure that only one thread actually gets to do the closing if (shouldClose()) rollbackInternal(); } void IndexWriter::rollbackInternal() { bool success = false; if (infoStream) message(L"rollback"); docWriter->pauseAllThreads(); LuceneException finally; try { finishMerges(false); // Must pre-close these two, in case they increment changeCount so that we can then set it to false before // calling closeInternal mergePolicy->close(); mergeScheduler->close(); { SyncLock syncLock(this); if (pendingCommit) { pendingCommit->rollbackCommit(directory); deleter->decRef(pendingCommit); pendingCommit.reset(); notifyAll(); } // Keep the same segmentInfos instance but replace all of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter will always write to a new generation ("write once"). segmentInfos->clear(); segmentInfos->addAll(rollbackSegmentInfos); BOOST_ASSERT(!hasExternalSegments()); docWriter->abort(); bool test = testPoint(L"rollback before checkpoint"); BOOST_ASSERT(test); // Ask deleter to locate unreferenced files & remove them deleter->checkpoint(segmentInfos, false); deleter->refresh(); } // Don't bother saving any changes in our segmentInfos readerPool->clear(SegmentInfosPtr()); lastCommitChangeCount = changeCount; success = true; } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"rollbackInternal"); } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); if (!success) { docWriter->resumeAllThreads(); closing = false; notifyAll(); if (infoStream) message(L"hit exception during rollback"); } } finally.throwException(); closeInternal(false); } void IndexWriter::deleteAll() { SyncLock syncLock(this); bool success = false; docWriter->pauseAllThreads(); LuceneException finally; try { // Abort any running merges finishMerges(false); // Remove any buffered docs docWriter->abort(); docWriter->setFlushedDocCount(0); // Remove all segments segmentInfos->clear(); // Ask deleter to locate unreferenced files & remove them deleter->checkpoint(segmentInfos, false); deleter->refresh(); // Don't bother saving any changes in our segmentInfos readerPool->clear(SegmentInfosPtr()); // Mark that the index has changed ++changeCount; success = true; } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"deleteAll"); } catch (LuceneException& e) { finally = e; } docWriter->resumeAllThreads(); if (!success && infoStream) message(L"hit exception during deleteAll"); finally.throwException(); } void IndexWriter::finishMerges(bool waitForMerges) { SyncLock syncLock(this); if (!waitForMerges) { stopMerges = true; // Abort all pending and running merges for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { if (infoStream) message(L"now abort pending merge " + (*merge)->segString(directory)); (*merge)->abort(); mergeFinish(*merge); } pendingMerges.clear(); for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) { if (infoStream) message(L"now abort running merge " + (*merge)->segString(directory)); (*merge)->abort(); } // Ensure any running addIndexes finishes. It's fine if a new one attempts to start because its merges // will quickly see the stopMerges == true and abort. acquireRead(); releaseRead(); // These merges periodically check whether they have been aborted, and stop if so. We wait here to make // sure they all stop. It should not take very long because the merge threads periodically check if they // are aborted. while (!runningMerges.empty()) { if (infoStream) message(L"now wait for " + StringUtils::toString(runningMerges.size()) + L" running merge to abort"); doWait(); } stopMerges = false; notifyAll(); BOOST_ASSERT(mergingSegments.empty()); if (infoStream) message(L"all running merges have aborted"); } else { // waitForMerges() will ensure any running addIndexes finishes. It's fine if a new one attempts to start // because from our caller above the call will see that we are in the process of closing, and will throw // an AlreadyClosed exception. IndexWriter::waitForMerges(); } } void IndexWriter::waitForMerges() { SyncLock syncLock(this); // Ensure any running addIndexes finishes. acquireRead(); releaseRead(); while (!pendingMerges.empty() || !runningMerges.empty()) doWait(); // sanity check BOOST_ASSERT(mergingSegments.empty()); } void IndexWriter::checkpoint() { SyncLock syncLock(this); ++changeCount; deleter->checkpoint(segmentInfos, false); } void IndexWriter::finishAddIndexes() { releaseWrite(); } void IndexWriter::blockAddIndexes(bool includePendingClose) { acquireRead(); bool success = false; LuceneException finally; try { // Make sure we are still open since we could have waited quite a while for last addIndexes to finish ensureOpen(includePendingClose); success = true; } catch (LuceneException& e) { finally = e; } if (!success) releaseRead(); finally.throwException(); } void IndexWriter::resumeAddIndexes() { releaseRead(); } void IndexWriter::resetMergeExceptions() { SyncLock syncLock(this); mergeExceptions.clear(); ++mergeGen; } void IndexWriter::noDupDirs(Collection dirs) { Collection dups(Collection::newInstance()); for (Collection::iterator dir = dirs.begin(); dir != dirs.end(); ++dir) { for (Collection::iterator dup = dups.begin(); dup != dups.end(); ++dup) { if (*dup == *dir) boost::throw_exception(IllegalArgumentException(L"Directory " + (*dir)->getLockID() + L" appears more than once")); } if (*dir == directory) boost::throw_exception(IllegalArgumentException(L"Cannot add directory to itself")); dups.add(*dir); } } void IndexWriter::addIndexesNoOptimize(Collection dirs) { ensureOpen(); noDupDirs(dirs); // Do not allow add docs or deletes while we are running docWriter->pauseAllThreads(); LuceneException finally; try { if (infoStream) message(L"flush at addIndexesNoOptimize"); flush(true, false, true); bool success = false; startTransaction(false); try { int32_t docCount = 0; { SyncLock syncLock(this); ensureOpen(); for (Collection::iterator dir = dirs.begin(); dir != dirs.end(); ++dir) { if (directory == *dir) { // cannot add this index: segments may be deleted in merge before added boost::throw_exception(IllegalArgumentException(L"Cannot add this index to itself")); } SegmentInfosPtr sis(newLucene()); // read infos from dir sis->read(*dir); for (int32_t j = 0; j < sis->size(); ++j) { SegmentInfoPtr info(sis->info(j)); BOOST_ASSERT(!segmentInfos->contains(info)); docCount += info->docCount; segmentInfos->add(info); // add each info } } } // Notify DocumentsWriter that the flushed count just increased docWriter->updateFlushedDocCount(docCount); maybeMerge(); ensureOpen(); // If after merging there remain segments in the index that are in a different directory, just copy these // over into our index. This is necessary (before finishing the transaction) to avoid leaving the index // in an unusable (inconsistent) state. resolveExternalSegments(); ensureOpen(); success = true; } catch (LuceneException& e) { finally = e; } if (success) commitTransaction(); else rollbackTransaction(); } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"addIndexesNoOptimize"); } catch (LuceneException& e) { finally = e; } if (docWriter) docWriter->resumeAllThreads(); finally.throwException(); } bool IndexWriter::hasExternalSegments() { return segmentInfos->hasExternalSegments(directory); } void IndexWriter::resolveExternalSegments() { bool any = false; bool done = false; while (!done) { SegmentInfoPtr info; OneMergePtr merge; { SyncLock syncLock(this); if (stopMerges) boost::throw_exception(MergeAbortedException(L"rollback() was called or addIndexes* hit an unhandled exception")); int32_t numSegments = segmentInfos->size(); done = true; for (int32_t i = 0; i < numSegments; ++i) { info = segmentInfos->info(i); if (info->dir != directory) { done = false; OneMergePtr newMerge(newLucene(segmentInfos->range(i, i + 1), boost::dynamic_pointer_cast(mergePolicy) && getUseCompoundFile())); // Returns true if no running merge conflicts with this one (and, records this merge as // pending), ie, this segment is not currently being merged if (registerMerge(newMerge)) { merge = newMerge; // If this segment is not currently being merged, then advance it to running & run // the merge ourself (below) pendingMerges.remove(merge); runningMerges.add(merge); break; } } } if (!done && !merge) { // We are not yet done (external segments still exist in segmentInfos), yet, all such segments // are currently "covered" by a pending or running merge. We now try to grab any pending merge // that involves external segments merge = getNextExternalMerge(); } if (!done && !merge) { // We are not yet done, and, all external segments fall under merges that the merge scheduler is // currently running. So, we now wait and check back to see if the merge has completed. doWait(); } } if (merge) { any = true; IndexWriter::merge(merge); } } if (any) { // Sometimes, on copying an external segment over, more merges may become necessary mergeScheduler->merge(shared_from_this()); } } void IndexWriter::addIndexes(Collection readers) { ensureOpen(); // Do not allow add docs or deletes while we are running docWriter->pauseAllThreads(); // We must pre-acquire a read lock here (and upgrade to write lock in startTransaction below) so that no // other addIndexes is allowed to start up after we have flushed & optimized but before we then start our // transaction. This is because the merging below requires that only one segment is present in the index acquireRead(); LuceneException finally; try { SegmentInfoPtr info; String mergedName; SegmentMergerPtr merger; bool success = false; try { flush(true, false, true); optimize(); // start with zero or 1 seg success = true; } catch (LuceneException& e) { finally = e; } // Take care to release the read lock if we hit an exception before starting the transaction if (!success) releaseRead(); finally.throwException(); // true means we already have a read lock; if this call hits an exception it will release the write lock startTransaction(true); try { mergedName = newSegmentName(); merger = newLucene(shared_from_this(), mergedName, OneMergePtr()); SegmentReaderPtr sReader; { SyncLock syncLock(this); if (segmentInfos->size() == 1) // add existing index, if any sReader = readerPool->get(segmentInfos->info(0), true, BufferedIndexInput::BUFFER_SIZE, -1); } success = false; try { if (sReader) merger->add(sReader); for (Collection::iterator i = readers.begin(); i != readers.end(); ++i) merger->add(*i); int32_t docCount = merger->merge(); // merge 'em { SyncLock syncLock(this); segmentInfos->clear(); // pop old infos & add new info = newLucene(mergedName, docCount, directory, false, true, -1, L"", false, merger->hasProx()); setDiagnostics(info, L"addIndexes(Collection)"); segmentInfos->add(info); } // Notify DocumentsWriter that the flushed count just increased docWriter->updateFlushedDocCount(docCount); success = true; } catch (LuceneException& e) { finally = e; } if (sReader) readerPool->release(sReader); } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) message(L"hit exception in addIndexes during merge"); rollbackTransaction(); } else commitTransaction(); finally.throwException(); if (boost::dynamic_pointer_cast(mergePolicy) && getUseCompoundFile()) { HashSet files; { SyncLock syncLock(this); // Must incRef our files so that if another thread is running merge/optimize, it doesn't delete our // segment's files before we have a change to finish making the compound file. if (segmentInfos->contains(info)) { files = info->files(); deleter->incRef(files); } } if (files) { success = false; startTransaction(false); try { merger->createCompoundFile(mergedName + L".cfs"); { SyncLock syncLock(this); info->setUseCompoundFile(true); } success = true; } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); deleter->decRef(files); } if (!success) { if (infoStream) message(L"hit exception building compound file in addIndexes during merge"); rollbackTransaction(); } else commitTransaction(); } } } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"addIndexes(Collection)"); } catch (LuceneException& e) { finally = e; } if (docWriter) docWriter->resumeAllThreads(); finally.throwException(); } void IndexWriter::doAfterFlush() { // override } void IndexWriter::doBeforeFlush() { // override } void IndexWriter::prepareCommit() { ensureOpen(); prepareCommit(MapStringString()); } void IndexWriter::prepareCommit(MapStringString commitUserData) { if (hitOOM) boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot commit")); if (pendingCommit) boost::throw_exception(IllegalStateException(L"prepareCommit was already called with no corresponding call to commit")); if (infoStream) message(L"prepareCommit: flush"); flush(true, true, true); startCommit(0, commitUserData); } void IndexWriter::commit(int64_t sizeInBytes) { SyncLock messageLock(commitLock); startCommit(sizeInBytes, MapStringString()); finishCommit(); } void IndexWriter::commit() { commit(MapStringString()); } void IndexWriter::commit(MapStringString commitUserData) { ensureOpen(); if (infoStream) message(L"commit: start"); { SyncLock messageLock(commitLock); if (infoStream) message(L"commit: enter lock"); if (!pendingCommit) { if (infoStream) message(L"commit: now prepare"); prepareCommit(commitUserData); } else if (infoStream) message(L"commit: already prepared"); finishCommit(); } } void IndexWriter::finishCommit() { SyncLock syncLock(this); if (pendingCommit) { LuceneException finally; try { if (infoStream) message(L"commit: pendingCommit != null"); pendingCommit->finishCommit(directory); if (infoStream) message(L"commit: wrote segments file \"" + pendingCommit->getCurrentSegmentFileName() + L"\""); lastCommitChangeCount = pendingCommitChangeCount; segmentInfos->updateGeneration(pendingCommit); segmentInfos->setUserData(pendingCommit->getUserData()); setRollbackSegmentInfos(pendingCommit); deleter->checkpoint(pendingCommit, true); } catch (LuceneException& e) { finally = e; } deleter->decRef(pendingCommit); pendingCommit.reset(); notifyAll(); finally.throwException(); } else if (infoStream) message(L"commit: pendingCommit == null; skip"); if (infoStream) message(L"commit: done"); } void IndexWriter::flush(bool triggerMerge, bool flushDocStores, bool flushDeletes) { // We can be called during close, when closing = true, so we must pass false to ensureOpen ensureOpen(false); if (doFlush(flushDocStores, flushDeletes) && triggerMerge) maybeMerge(); } bool IndexWriter::doFlush(bool flushDocStores, bool flushDeletes) { TestScope testScope(L"IndexWriter", L"doFlush"); SyncLock syncLock(this); bool success = false; LuceneException finally; try { try { success = doFlushInternal(flushDocStores, flushDeletes); } catch (LuceneException& e) { finally = e; } if (docWriter->doBalanceRAM()) docWriter->balanceRAM(); finally.throwException(); } catch (LuceneException& e) { finally = e; } docWriter->clearFlushPending(); finally.throwException(); return success; } bool IndexWriter::doFlushInternal(bool flushDocStores, bool flushDeletes) { SyncLock syncLock(this); if (hitOOM) boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot flush")); ensureOpen(false); BOOST_ASSERT(testPoint(L"startDoFlush")); doBeforeFlush(); ++flushCount; // If we are flushing because too many deletes accumulated, then we should apply the deletes to free RAM if (docWriter->doApplyDeletes()) flushDeletes = true; // Make sure no threads are actively adding a document. Returns true if docWriter is currently aborting, in // which case we skip flushing this segment if (infoStream) message(L"flush: now pause all indexing threads"); if (docWriter->pauseAllThreads()) { docWriter->resumeAllThreads(); return false; } bool flushDocs = false; LuceneException finally; try { SegmentInfoPtr newSegment; int32_t numDocs = docWriter->getNumDocsInRAM(); // Always flush docs if there are any flushDocs = (numDocs > 0); String docStoreSegment(docWriter->getDocStoreSegment()); BOOST_ASSERT(!docStoreSegment.empty() || numDocs == 0); if (docStoreSegment.empty()) flushDocStores = false; int32_t docStoreOffset = docWriter->getDocStoreOffset(); bool docStoreIsCompoundFile = false; if (infoStream) { message(L" flush: segment=" + docWriter->getSegment() + L" docStoreSegment=" + StringUtils::toString(docWriter->getDocStoreSegment()) + L" docStoreOffset=" + StringUtils::toString(docStoreOffset) + L" flushDocs=" + StringUtils::toString(flushDocs) + L" flushDeletes=" + StringUtils::toString(flushDeletes) + L" flushDocStores=" + StringUtils::toString(flushDocStores) + L" numDocs=" + StringUtils::toString(numDocs) + L" numBufDelTerms=" + StringUtils::toString(docWriter->getNumBufferedDeleteTerms())); message(L" index before flush " + segString()); } // Check if the doc stores must be separately flushed because other segments, besides the one we are // about to flush, reference it if (flushDocStores && (!flushDocs || docWriter->getSegment() != docWriter->getDocStoreSegment())) { // We must separately flush the doc store if (infoStream) message(L" flush shared docStore segment " + docStoreSegment); docStoreIsCompoundFile = IndexWriter::flushDocStores(); flushDocStores = false; } String segment(docWriter->getSegment()); // If we are flushing docs, segment must not be null BOOST_ASSERT(!segment.empty() || !flushDocs); if (flushDocs) { bool success = false; int32_t flushedDocCount; try { flushedDocCount = docWriter->flush(flushDocStores); if (infoStream) message(L"flushedFiles=" + docWriter->getFlushedFiles()); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) message(L"hit exception flushing segment " + segment); deleter->refresh(segment); } finally.throwException(); if (docStoreOffset == 0 && flushDocStores) { // This means we are flushing private doc stores with this segment, so it will not be shared // with other segments BOOST_ASSERT(!docStoreSegment.empty()); BOOST_ASSERT(docStoreSegment == segment); docStoreOffset = -1; docStoreIsCompoundFile = false; docStoreSegment.clear(); } // Create new SegmentInfo, but do not add to our segmentInfos until deletes are flushed successfully. newSegment = newLucene(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter->hasProx()); setDiagnostics(newSegment, L"flush"); } docWriter->pushDeletes(); if (flushDocs) { segmentInfos->add(newSegment); checkpoint(); } if (flushDocs && mergePolicy->useCompoundFile(segmentInfos, newSegment)) { // Now build compound file bool success = false; try { docWriter->createCompoundFile(segment); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) message(L"hit exception creating compound file for newly flushed segment " + segment); deleter->deleteFile(segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); } finally.throwException(); newSegment->setUseCompoundFile(true); checkpoint(); } if (flushDeletes) applyDeletes(); if (flushDocs) checkpoint(); doAfterFlush(); } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"doFlush"); flushDocs = false; } catch (LuceneException& e) { finally = e; } docWriter->resumeAllThreads(); finally.throwException(); return flushDocs; } int64_t IndexWriter::ramSizeInBytes() { ensureOpen(); return docWriter->getRAMUsed(); } int32_t IndexWriter::numRamDocs() { SyncLock syncLock(this); ensureOpen(); return docWriter->getNumDocsInRAM(); } int32_t IndexWriter::ensureContiguousMerge(OneMergePtr merge) { int32_t first = segmentInfos->find(merge->segments->info(0)); if (first == -1) boost::throw_exception(MergeException(L"Could not find segment " + merge->segments->info(0)->name + L" in current index " + segString())); int32_t numSegments = segmentInfos->size(); int32_t numSegmentsToMerge = merge->segments->size(); for (int32_t i = 0; i < numSegmentsToMerge; ++i) { SegmentInfoPtr info(merge->segments->info(i)); if (first + i >= numSegments || !segmentInfos->info(first + i)->equals(info)) { if (!segmentInfos->contains(info)) boost::throw_exception(MergeException(L"MergePolicy selected a segment (" + info->name + L") that is not in the current index " + segString())); else boost::throw_exception(MergeException(L"MergePolicy selected non-contiguous segments to merge (" + merge->segString(directory) + L" vs " + segString() + L"), which IndexWriter (currently) cannot handle")); } } return first; } void IndexWriter::commitMergedDeletes(OneMergePtr merge, SegmentReaderPtr mergeReader) { SyncLock syncLock(this); BOOST_ASSERT(testPoint(L"startCommitMergeDeletes")); SegmentInfosPtr sourceSegments(merge->segments); if (infoStream) message(L"commitMergeDeletes " + merge->segString(directory)); // Carefully merge deletes that occurred after we started merging int32_t docUpto = 0; int32_t delCount = 0; for (int32_t i = 0; i < sourceSegments->size(); ++i) { SegmentInfoPtr info(sourceSegments->info(i)); int32_t docCount = info->docCount; SegmentReaderPtr previousReader(merge->readersClone[i]); SegmentReaderPtr currentReader(merge->readers[i]); if (previousReader->hasDeletions()) { // There were deletes on this segment when the merge started. The merge has collapsed away those deletes, // but if new deletes were flushed since the merge started, we must now carefully keep any newly flushed // deletes but mapping them to the new docIDs. if (currentReader->numDeletedDocs() > previousReader->numDeletedDocs()) { // This means this segment has had new deletes committed since we started the merge, so we must merge them for (int32_t j = 0; j < docCount; ++j) { if (previousReader->isDeleted(j)) BOOST_ASSERT(currentReader->isDeleted(j)); else { if (currentReader->isDeleted(j)) { mergeReader->doDelete(docUpto); ++delCount; } ++docUpto; } } } else docUpto += docCount - previousReader->numDeletedDocs(); } else if (currentReader->hasDeletions()) { // This segment had no deletes before but now it does for (int32_t j = 0; j < docCount; ++j) { if (currentReader->isDeleted(j)) { mergeReader->doDelete(docUpto); ++delCount; } ++docUpto; } } else { // No deletes before or after docUpto += info->docCount; } } BOOST_ASSERT(mergeReader->numDeletedDocs() == delCount); mergeReader->_hasChanges = (delCount > 0); } bool IndexWriter::commitMerge(OneMergePtr merge, SegmentMergerPtr merger, int32_t mergedDocCount, SegmentReaderPtr mergedReader) { SyncLock syncLock(this); BOOST_ASSERT(testPoint(L"startCommitMerge")); if (hitOOM) boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete merge")); if (infoStream) message(L"commitMerge: " + merge->segString(directory) + L" index=" + segString()); BOOST_ASSERT(merge->registerDone); // If merge was explicitly aborted, or, if rollback() or rollbackTransaction() had been called since our merge // started (which results in an unqualified deleter.refresh() call that will remove any index file that current // segments does not reference), we abort this merge if (merge->isAborted()) { if (infoStream) message(L"commitMerge: skipping merge " + merge->segString(directory) + L": it was aborted"); return false; } int32_t start = ensureContiguousMerge(merge); commitMergedDeletes(merge, mergedReader); docWriter->remapDeletes(segmentInfos, merger->getDocMaps(), merger->getDelCounts(), merge, mergedDocCount); // If the doc store we are using has been closed and is in now compound format (but wasn't when we started), // then we will switch to the compound format as well setMergeDocStoreIsCompoundFile(merge); merge->info->setHasProx(merger->hasProx()); segmentInfos->remove(start, start + merge->segments->size()); BOOST_ASSERT(!segmentInfos->contains(merge->info)); segmentInfos->add(start, merge->info); closeMergeReaders(merge, false); // Must note the change to segmentInfos so any commits in-flight don't lose it checkpoint(); // If the merged segments had pending changes, clear them so that they don't bother writing // them to disk, updating SegmentInfo, etc. readerPool->clear(merge->segments); if (merge->optimize) { // cascade the optimize segmentsToOptimize.add(merge->info); } return true; } LuceneException IndexWriter::handleMergeException(const LuceneException& exc, OneMergePtr merge) { if (infoStream) message(L"handleMergeException: merge=" + merge->segString(directory) + L" exc=" + exc.getError()); // Set the exception on the merge, so if optimize() is waiting on us it sees the root cause exception merge->setException(exc); addMergeException(merge); switch (exc.getType()) { case LuceneException::MergeAborted: // We can ignore this exception (it happens when close(false) or rollback is called), unless the // merge involves segments from external directories, in which case we must throw it so, for // example, the rollbackTransaction code in addIndexes* is executed. if (merge->isExternal) return exc; break; case LuceneException::IO: case LuceneException::Runtime: return exc; default: return RuntimeException(); // Should not get here } return LuceneException(); } void IndexWriter::merge(OneMergePtr merge) { bool success = false; try { LuceneException finally; try { try { mergeInit(merge); if (infoStream) message(L"now merge\n merge=" + merge->segString(directory) + L"\n index=" + segString()); mergeMiddle(merge); mergeSuccess(merge); success = true; } catch (LuceneException& e) { finally = handleMergeException(e, merge); } { SyncLock syncLock(this); mergeFinish(merge); if (!success) { if (infoStream) message(L"hit exception during merge"); if (merge->info && !segmentInfos->contains(merge->info)) deleter->refresh(merge->info->name); } // This merge (and, generally, any change to the segments) may now enable // new merges, so we call merge policy & update pending merges. if (success && !merge->isAborted() && !closed && !closing) updatePendingMerges(merge->maxNumSegmentsOptimize, merge->optimize); } } catch (LuceneException& e) { finally = e; } finally.throwException(); } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"merge")); } } void IndexWriter::mergeSuccess(OneMergePtr merge) { // override } bool IndexWriter::registerMerge(OneMergePtr merge) { SyncLock syncLock(this); if (merge->registerDone) return true; if (stopMerges) { merge->abort(); boost::throw_exception(MergeAbortedException(L"merge is aborted: " + merge->segString(directory))); } int32_t count = merge->segments->size(); bool isExternal = false; for (int32_t i = 0; i < count; ++i) { SegmentInfoPtr info(merge->segments->info(i)); if (mergingSegments.contains(info)) return false; if (!segmentInfos->contains(info)) return false; if (info->dir != directory) isExternal = true; if (segmentsToOptimize.contains(info)) { merge->optimize = true; merge->maxNumSegmentsOptimize = optimizeMaxNumSegments; } } ensureContiguousMerge(merge); pendingMerges.add(merge); if (infoStream) message(L"add merge to pendingMerges: " + merge->segString(directory) + L" [total " + StringUtils::toString(pendingMerges.size()) + L" pending]"); merge->mergeGen = mergeGen; merge->isExternal = isExternal; // OK it does not conflict; now record that this merge is running (while synchronized) // to avoid race condition where two conflicting merges from different threads, start for (int32_t i = 0; i < count; ++i) mergingSegments.add(merge->segments->info(i)); // Merge is now registered merge->registerDone = true; return true; } void IndexWriter::mergeInit(OneMergePtr merge) { SyncLock syncLock(this); bool success = false; LuceneException finally; try { _mergeInit(merge); success = true; } catch (LuceneException& e) { finally = e; } if (!success) mergeFinish(merge); finally.throwException(); } void IndexWriter::_mergeInit(OneMergePtr merge) { SyncLock syncLock(this); bool test = testPoint(L"startMergeInit"); BOOST_ASSERT(test); BOOST_ASSERT(merge->registerDone); BOOST_ASSERT(!merge->optimize || merge->maxNumSegmentsOptimize > 0); if (hitOOM) boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot merge")); if (merge->info) { // mergeInit already done return; } if (merge->isAborted()) return; applyDeletes(); SegmentInfosPtr sourceSegments(merge->segments); int32_t end = sourceSegments->size(); // Check whether this merge will allow us to skip merging the doc stores (stored field & vectors). // This is a very substantial optimization (saves tons of IO). DirectoryPtr lastDir(directory); String lastDocStoreSegment; int32_t next = -1; bool mergeDocStores = false; bool doFlushDocStore = false; String currentDocStoreSegment(docWriter->getDocStoreSegment()); // Test each segment to be merged: check if we need to flush/merge doc stores for (int32_t i = 0; i < end; ++i) { SegmentInfoPtr si(sourceSegments->info(i)); // If it has deletions we must merge the doc stores if (si->hasDeletions()) mergeDocStores = true; // If it has its own (private) doc stores we must merge the doc stores if (si->getDocStoreOffset() == -1) mergeDocStores = true; // If it has a different doc store segment than previous segments, we must merge the doc stores String docStoreSegment(si->getDocStoreSegment()); if (docStoreSegment.empty()) mergeDocStores = true; else if (lastDocStoreSegment.empty()) lastDocStoreSegment = docStoreSegment; else if (lastDocStoreSegment != docStoreSegment) mergeDocStores = true; // Segments' docScoreOffsets must be in-order, contiguous. For the default merge policy now // this will always be the case but for an arbitrary merge policy this may not be the case if (next == -1) next = si->getDocStoreOffset() + si->docCount; else if (next != si->getDocStoreOffset()) mergeDocStores = true; else next = si->getDocStoreOffset() + si->docCount; // If the segment comes from a different directory we must merge if (lastDir != si->dir) mergeDocStores = true; // If the segment is referencing the current "live" doc store outputs then we must merge if (si->getDocStoreOffset() != -1 && !currentDocStoreSegment.empty() && si->getDocStoreSegment() == currentDocStoreSegment) doFlushDocStore = true; } // if a mergedSegmentWarmer is installed, we must merge the doc stores because we will open a full // SegmentReader on the merged segment if (!mergeDocStores && mergedSegmentWarmer && !currentDocStoreSegment.empty() && !lastDocStoreSegment.empty() && lastDocStoreSegment == currentDocStoreSegment) mergeDocStores = true; int32_t docStoreOffset; String docStoreSegment; bool docStoreIsCompoundFile; if (mergeDocStores) { docStoreOffset = -1; docStoreSegment.clear(); docStoreIsCompoundFile = false; } else { SegmentInfoPtr si(sourceSegments->info(0)); docStoreOffset = si->getDocStoreOffset(); docStoreSegment = si->getDocStoreSegment(); docStoreIsCompoundFile = si->getDocStoreIsCompoundFile(); } if (mergeDocStores && doFlushDocStore) { // SegmentMerger intends to merge the doc stores (stored fields, vectors), and at // least one of the segments to be merged refers to the currently live doc stores. if (infoStream) message(L"now flush at merge"); doFlush(true, false); } merge->mergeDocStores = mergeDocStores; // Bind a new segment name here so even with ConcurrentMergePolicy we keep deterministic segment names. merge->info = newLucene(newSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, false); MapStringString details(MapStringString::newInstance()); details.put(L"optimize", StringUtils::toString(merge->optimize)); details.put(L"mergeFactor", StringUtils::toString(end)); details.put(L"mergeDocStores", StringUtils::toString(mergeDocStores)); setDiagnostics(merge->info, L"merge", details); // Also enroll the merged segment into mergingSegments; this prevents it from getting // selected for a merge after our merge is done but while we are building the CFS mergingSegments.add(merge->info); } void IndexWriter::setDiagnostics(SegmentInfoPtr info, const String& source) { setDiagnostics(info, source, MapStringString()); } void IndexWriter::setDiagnostics(SegmentInfoPtr info, const String& source, MapStringString details) { MapStringString diagnostics(MapStringString::newInstance()); diagnostics.put(L"source", source); diagnostics.put(L"lucene.version", Constants::LUCENE_VERSION); diagnostics.put(L"os", Constants::OS_NAME); if (details) diagnostics.putAll(details.begin(), details.end()); info->setDiagnostics(diagnostics); } void IndexWriter::mergeFinish(OneMergePtr merge) { SyncLock syncLock(this); // Optimize, addIndexes or finishMerges may be waiting on merges to finish. notifyAll(); // It's possible we are called twice, eg if there was an exception inside mergeInit if (merge->registerDone) { SegmentInfosPtr sourceSegments(merge->segments); int32_t end = sourceSegments->size(); for (int32_t i = 0; i < end; ++i) mergingSegments.remove(sourceSegments->info(i)); mergingSegments.remove(merge->info); merge->registerDone = false; } runningMerges.remove(merge); } void IndexWriter::setMergeDocStoreIsCompoundFile(OneMergePtr merge) { SyncLock syncLock(this); String mergeDocStoreSegment(merge->info->getDocStoreSegment()); if (!mergeDocStoreSegment.empty() && !merge->info->getDocStoreIsCompoundFile()) { int32_t size = segmentInfos->size(); for (int32_t i = 0; i < size; ++i) { SegmentInfoPtr info(segmentInfos->info(i)); String docStoreSegment(info->getDocStoreSegment()); if (!docStoreSegment.empty() && docStoreSegment == mergeDocStoreSegment && info->getDocStoreIsCompoundFile()) { merge->info->setDocStoreIsCompoundFile(true); break; } } } } void IndexWriter::closeMergeReaders(OneMergePtr merge, bool suppressExceptions) { SyncLock syncLock(this); int32_t numSegments = merge->segments->size(); if (suppressExceptions) { // Suppress any new exceptions so we throw the original cause for (int32_t i = 0; i < numSegments; ++i) { if (merge->readers[i]) { try { readerPool->release(merge->readers[i], false); } catch (...) { } merge->readers[i].reset(); } if (merge->readersClone[i]) { try { merge->readersClone[i]->close(); } catch (...) { } // This was a private clone and we had the only reference BOOST_ASSERT(merge->readersClone[i]->getRefCount() == 0); merge->readersClone[i].reset(); } } } else { for (int32_t i = 0; i < numSegments; ++i) { if (merge->readers[i]) { readerPool->release(merge->readers[i], true); merge->readers[i].reset(); } if (merge->readersClone[i]) { merge->readersClone[i]->close(); // This was a private clone and we had the only reference BOOST_ASSERT(merge->readersClone[i]->getRefCount() == 0); merge->readersClone[i].reset(); } } } } int32_t IndexWriter::mergeMiddle(OneMergePtr merge) { merge->checkAborted(directory); String mergedName(merge->info->name); int32_t mergedDocCount = 0; SegmentInfosPtr sourceSegments(merge->segments); int32_t numSegments = sourceSegments->size(); if (infoStream) message(L"merging " + merge->segString(directory)); SegmentMergerPtr merger(newLucene(shared_from_this(), mergedName, merge)); merge->readers = Collection::newInstance(numSegments); merge->readersClone = Collection::newInstance(numSegments); bool mergeDocStores = false; String currentDocStoreSegment; { SyncLock syncLock(this); currentDocStoreSegment = docWriter->getDocStoreSegment(); } bool currentDSSMerged = false; LuceneException finally; // This is try/finally to make sure merger's readers are closed bool success = false; try { int32_t totDocCount = 0; for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr info(sourceSegments->info(i)); // Hold onto the "live" reader; we will use this to commit merged deletes merge->readers[i] = readerPool->get(info, merge->mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); SegmentReaderPtr reader(merge->readers[i]); // We clone the segment readers because other deletes may come in while we're merging so we need readers that will not change merge->readersClone[i] = boost::dynamic_pointer_cast(reader->clone(true)); SegmentReaderPtr clone(merge->readersClone[i]); merger->add(clone); if (clone->hasDeletions()) mergeDocStores = true; if (info->getDocStoreOffset() != -1 && !currentDocStoreSegment.empty()) currentDSSMerged = currentDSSMerged || (currentDocStoreSegment == info->getDocStoreSegment()); totDocCount += clone->numDocs(); } if (infoStream) message(L"merge: total " + StringUtils::toString(totDocCount) + L" docs"); merge->checkAborted(directory); // If deletions have arrived and it has now become necessary to merge doc stores, go and open them if (mergeDocStores && !merge->mergeDocStores) { merge->mergeDocStores = true; { SyncLock syncLock(this); if (currentDSSMerged) { if (infoStream) message(L"now flush at mergeMiddle"); doFlush(true, false); } } for (Collection::iterator reader = merge->readersClone.begin(); reader != merge->readersClone.end(); ++reader) (*reader)->openDocStores(); // Clear DSS merge->info->setDocStore(-1, L"", false); } // This is where all the work happens merge->info->docCount = merger->merge(merge->mergeDocStores); mergedDocCount = merge->info->docCount; BOOST_ASSERT(mergedDocCount == totDocCount); if (merge->useCompoundFile) { success = false; String compoundFileName(IndexFileNames::segmentFileName(mergedName, IndexFileNames::COMPOUND_FILE_EXTENSION())); try { if (infoStream) message(L"create compound file " + compoundFileName); merger->createCompoundFile(compoundFileName); success = true; } catch (IOException& ioe) { SyncLock syncLock(this); if (merge->isAborted()) { // This can happen if rollback or close(false) is called - fall through to logic // below to remove the partially created CFS } else finally = handleMergeException(ioe, merge); } catch (LuceneException& e) { finally = handleMergeException(e, merge); } if (!success) { if (infoStream) message(L"hit exception creating compound file during merge"); { SyncLock syncLock(this); deleter->deleteFile(compoundFileName); deleter->deleteNewFiles(merger->getMergedFiles()); } } finally.throwException(); success = false; { SyncLock syncLock(this); // delete new non cfs files directly: they were never registered with IFD deleter->deleteNewFiles(merger->getMergedFiles()); if (merge->isAborted()) { if (infoStream) message(L"abort merge after building CFS"); deleter->deleteFile(compoundFileName); boost::throw_exception(TemporaryException()); } } merge->info->setUseCompoundFile(true); } int32_t termsIndexDivisor = -1; bool loadDocStores = false; // if the merged segment warmer was not installed when this merge was started, causing us // to not force the docStores to close, we can't warm it now bool canWarm = (merge->info->getDocStoreSegment().empty() || currentDocStoreSegment.empty() || merge->info->getDocStoreSegment() == currentDocStoreSegment); if (poolReaders && mergedSegmentWarmer && canWarm) { // Load terms index & doc stores so the segment warmer can run searches, load documents/term vectors termsIndexDivisor = readerTermsIndexDivisor; loadDocStores = true; } SegmentReaderPtr mergedReader(readerPool->get(merge->info, loadDocStores, BufferedIndexInput::BUFFER_SIZE, termsIndexDivisor)); try { if (poolReaders && mergedSegmentWarmer) mergedSegmentWarmer->warm(mergedReader); if (!commitMerge(merge, merger, mergedDocCount, mergedReader)) { // commitMerge will return false if this merge was aborted boost::throw_exception(TemporaryException()); } } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); readerPool->release(mergedReader); } finally.throwException(); success = true; } catch (LuceneException& e) { finally = e; } // Readers are already closed in commitMerge if we didn't hit an exc if (!success) closeMergeReaders(merge, true); // has this merge been aborted? if (finally.getType() == LuceneException::Temporary) return 0; finally.throwException(); return mergedDocCount; } void IndexWriter::addMergeException(OneMergePtr merge) { SyncLock syncLock(this); BOOST_ASSERT(!merge->getException().isNull()); if (!mergeExceptions.contains(merge) && mergeGen == merge->mergeGen) mergeExceptions.add(merge); } bool IndexWriter::applyDeletes() { TestScope testScope(L"IndexWriter", L"applyDeletes"); SyncLock syncLock(this); BOOST_ASSERT(testPoint(L"startApplyDeletes")); ++flushDeletesCount; bool success = false; bool changed = false; LuceneException finally; try { changed = docWriter->applyDeletes(segmentInfos); success = true; } catch (LuceneException& e) { finally = e; } if (!success && infoStream) message(L"hit exception flushing deletes"); finally.throwException(); if (changed) checkpoint(); return changed; } int32_t IndexWriter::getBufferedDeleteTermsSize() { SyncLock syncLock(this); return docWriter->getBufferedDeleteTerms().size(); } int32_t IndexWriter::getNumBufferedDeleteTerms() { SyncLock syncLock(this); return docWriter->getNumBufferedDeleteTerms(); } SegmentInfoPtr IndexWriter::newestSegment() { return !segmentInfos->empty() ? segmentInfos->info(segmentInfos->size() - 1) : SegmentInfoPtr(); } String IndexWriter::segString() { return segString(segmentInfos); } String IndexWriter::segString(SegmentInfosPtr infos) { SyncLock syncLock(this); StringStream buffer; int32_t count = infos->size(); for (int32_t i = 0; i < count; ++i) { if (i > 0) buffer << L" "; SegmentInfoPtr info(infos->info(i)); buffer << info->segString(directory); if (info->dir != directory) buffer << L"**"; } return buffer.str(); } bool IndexWriter::startSync(const String& fileName, HashSet pending) { SyncLock syncedLock(&synced); if (!synced.contains(fileName)) { if (!syncing.contains(fileName)) { syncing.add(fileName); return true; } else { pending.add(fileName); return false; } } else return false; } void IndexWriter::finishSync(const String& fileName, bool success) { SyncLock syncedLock(&synced); BOOST_ASSERT(syncing.contains(fileName)); syncing.remove(fileName); if (success) synced.add(fileName); synced.notifyAll(); } bool IndexWriter::waitForAllSynced(HashSet syncing) { SyncLock syncedLock(&synced); for (HashSet::iterator fileName = syncing.begin(); fileName != syncing.end(); ++fileName) { while (!synced.contains(*fileName)) { if (!syncing.contains(*fileName)) { // There was an error because a file that was previously syncing failed to appear in synced return false; } else synced.wait(); } } return true; } void IndexWriter::doWait() { SyncLock syncLock(this); // NOTE: the callers of this method should in theory be able to do simply wait(), but, as a defense against // thread timing hazards where notifyAll() fails to be called, we wait for at most 1 second and then return // so caller can check if wait conditions are satisfied wait(1000); } void IndexWriter::startCommit(int64_t sizeInBytes, MapStringString commitUserData) { BOOST_ASSERT(testPoint(L"startStartCommit")); if (hitOOM) boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot commit")); try { if (infoStream) message(L"startCommit(): start sizeInBytes=" + StringUtils::toString(sizeInBytes)); SegmentInfosPtr toSync; int64_t myChangeCount = 0; LuceneException finally; { SyncLock syncLock(this); // Wait for any running addIndexes to complete first, then block any from running // until we've copied the segmentInfos we intend to sync blockAddIndexes(false); // On commit the segmentInfos must never reference a segment in another directory BOOST_ASSERT(!hasExternalSegments()); try { BOOST_ASSERT(lastCommitChangeCount <= changeCount); myChangeCount = changeCount; if (changeCount == lastCommitChangeCount) { if (infoStream) message(L" skip startCommit(): no changes pending"); boost::throw_exception(TemporaryException()); } // First, we clone & incref the segmentInfos we intend to sync, then, without locking, we sync() each // file referenced by toSync, in the background. Multiple threads can be doing this at once, if say // a large merge and a small merge finish at the same time if (infoStream) message(L"startCommit index=" + segString(segmentInfos) + L" changeCount=" + StringUtils::toString(changeCount)); readerPool->commit(); // It's possible another flush (that did not close the open do stores) snook in after the flush we // just did, so we remove any tail segments referencing the open doc store from the SegmentInfos // we are about to sync (the main SegmentInfos will keep them) toSync = boost::dynamic_pointer_cast(segmentInfos->clone()); String dss(docWriter->getDocStoreSegment()); if (!dss.empty()) { while(true) { String dss2(toSync->info(toSync->size() - 1)->getDocStoreSegment()); if (dss2.empty() || dss2 != dss) break; toSync->remove(toSync->size() - 1); ++changeCount; } } if (commitUserData) toSync->setUserData(commitUserData); deleter->incRef(toSync, false); HashSet files(toSync->files(directory, false)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { BOOST_ASSERT(directory->fileExists(*fileName)); // If this trips it means we are missing a call to .checkpoint somewhere, because by the // time we are called, deleter should know about every file referenced by the current head // segmentInfos BOOST_ASSERT(deleter->exists(*fileName)); } } catch (LuceneException& e) { finally = e; } resumeAddIndexes(); // no changes pending? if (finally.getType() == LuceneException::Temporary) return; finally.throwException(); } BOOST_ASSERT(testPoint(L"midStartCommit")); bool setPending = false; try { // Loop until all files toSync references are sync'd while (true) { HashSet pending(HashSet::newInstance()); HashSet files(toSync->files(directory, false)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (startSync(*fileName, pending)) { bool success = false; try { // Because we incRef'd this commit point above, the file had better exist BOOST_ASSERT(directory->fileExists(*fileName)); if (infoStream) message(L"now sync " + *fileName); directory->sync(*fileName); success = true; } catch (LuceneException& e) { finally = e; } finishSync(*fileName, success); finally.throwException(); } } // All files that I require are either synced or being synced by other threads. If they are being // synced, we must at this point block until they are done. If this returns false, that means an // error in another thread resulted in failing to actually sync one of our files, so we repeat if (waitForAllSynced(pending)) break; } BOOST_ASSERT(testPoint(L"midStartCommit2")); { SyncLock syncLock(this); // If someone saved a newer version of segments file since I first started syncing // my version, I can safely skip saving myself since I've been superseded while (true) { if (myChangeCount <= lastCommitChangeCount) { if (infoStream) message(L"sync superseded by newer infos"); break; } else if (!pendingCommit) { // My turn to commit if (segmentInfos->getGeneration() > toSync->getGeneration()) toSync->updateGeneration(segmentInfos); bool success = false; try { // Exception here means nothing is prepared (this method unwinds // everything it did on an exception) try { toSync->prepareCommit(directory); } catch (LuceneException& e) { finally = e; } // Have our master segmentInfos record the generations we just prepared. We do this on // error or success so we don't double-write a segments_N file. segmentInfos->updateGeneration(toSync); finally.throwException(); BOOST_ASSERT(!pendingCommit); setPending = true; pendingCommit = toSync; pendingCommitChangeCount = myChangeCount; success = true; } catch (LuceneException& e) { finally = e; } if (!success && infoStream) message(L"hit exception committing segments file"); finally.throwException(); break; } else { // Must wait for other commit to complete doWait(); } } } if (infoStream) message(L"done all syncs"); BOOST_ASSERT(testPoint(L"midStartCommitSuccess")); } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); if (!setPending) deleter->decRef(toSync); } finally.throwException(); } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"startCommit")); } BOOST_ASSERT(testPoint(L"finishStartCommit")); } bool IndexWriter::isLocked(DirectoryPtr directory) { return directory->makeLock(WRITE_LOCK_NAME)->isLocked(); } void IndexWriter::unlock(DirectoryPtr directory) { directory->makeLock(IndexWriter::WRITE_LOCK_NAME)->release(); } void IndexWriter::setMergedSegmentWarmer(IndexReaderWarmerPtr warmer) { mergedSegmentWarmer = warmer; } IndexReaderWarmerPtr IndexWriter::getMergedSegmentWarmer() { return mergedSegmentWarmer; } LuceneException IndexWriter::handleOOM(const std::bad_alloc& oom, const String& location) { if (infoStream) message(L"hit OutOfMemoryError inside " + location); hitOOM = true; return OutOfMemoryError(); } bool IndexWriter::testPoint(const String& name) { return true; } bool IndexWriter::nrtIsCurrent(SegmentInfosPtr infos) { SyncLock syncLock(this); if (!infos->equals(segmentInfos)) { // if any structural changes (new segments), we are stale return false; } else if (infos->getGeneration() != segmentInfos->getGeneration()) { // if any commit took place since we were opened, we are stale return false; } else return !docWriter->anyChanges(); } bool IndexWriter::isClosed() { SyncLock syncLock(this); return closed; } ReaderPool::ReaderPool(IndexWriterPtr writer) { readerMap = MapSegmentInfoSegmentReader::newInstance(); _indexWriter = writer; } ReaderPool::~ReaderPool() { } void ReaderPool::clear(SegmentInfosPtr infos) { SyncLock syncLock(this); if (!infos) { for (MapSegmentInfoSegmentReader::iterator ent = readerMap.begin(); ent != readerMap.end(); ++ent) ent->second->_hasChanges = false; } else { for (int32_t i = 0; i < infos->size(); ++i) { MapSegmentInfoSegmentReader::iterator ent = readerMap.find(infos->info(i)); if (ent != readerMap.end()) ent->second->_hasChanges = false; } } } bool ReaderPool::infoIsLive(SegmentInfoPtr info) { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); int32_t idx = indexWriter->segmentInfos->find(info); BOOST_ASSERT(idx != -1); BOOST_ASSERT(indexWriter->segmentInfos->info(idx) == info); return true; } SegmentInfoPtr ReaderPool::mapToLive(SegmentInfoPtr info) { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); int32_t idx = indexWriter->segmentInfos->find(info); if (idx != -1) info = indexWriter->segmentInfos->info(idx); return info; } void ReaderPool::release(SegmentReaderPtr sr) { release(sr, false); } void ReaderPool::release(SegmentReaderPtr sr, bool drop) { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); bool pooled = readerMap.contains(sr->getSegmentInfo()); BOOST_ASSERT(!pooled || readerMap.get(sr->getSegmentInfo()) == sr); // Drop caller's ref; for an external reader (not pooled), this decRef will close it sr->decRef(); if (pooled && (drop || (!indexWriter->poolReaders && sr->getRefCount() == 1))) { // We invoke deleter.checkpoint below, so we must be sync'd on IW if there are changes BOOST_ASSERT(!sr->_hasChanges || holdsLock()); // Discard (don't save) changes when we are dropping the reader; this is used only on the // sub-readers after a successful merge. sr->_hasChanges = sr->_hasChanges && !drop; bool hasChanges = sr->_hasChanges; // Drop our ref - this will commit any pending changes to the dir sr->close(); // We are the last ref to this reader; since we're not pooling readers, we release it readerMap.remove(sr->getSegmentInfo()); if (hasChanges) { // Must checkpoint with deleter, because this segment reader will have created new // _X_N.del file. indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); } } } void ReaderPool::close() { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); // We invoke deleter.checkpoint below, so we must be sync'd on IW BOOST_ASSERT(holdsLock()); for (MapSegmentInfoSegmentReader::iterator iter = readerMap.begin(); iter != readerMap.end(); ++iter) { if (iter->second->_hasChanges) { BOOST_ASSERT(infoIsLive(iter->second->getSegmentInfo())); iter->second->doCommit(MapStringString()); // Must checkpoint with deleter, because this segment reader will have created // new _X_N.del file. indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); } // NOTE: it is allowed that this decRef does not actually close the SR; this can happen when a // near real-time reader is kept open after the IndexWriter instance is closed iter->second->decRef(); } readerMap.clear(); } void ReaderPool::commit() { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); // We invoke deleter.checkpoint below, so we must be sync'd on IW BOOST_ASSERT(holdsLock()); for (MapSegmentInfoSegmentReader::iterator ent = readerMap.begin(); ent != readerMap.end(); ++ent) { if (ent->second->_hasChanges) { BOOST_ASSERT(infoIsLive(ent->second->getSegmentInfo())); ent->second->doCommit(MapStringString()); // Must checkpoint with deleter, because this segment reader will have created // new _X_N.del file. indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); } } } IndexReaderPtr ReaderPool::getReadOnlyClone(SegmentInfoPtr info, bool doOpenStores, int32_t termInfosIndexDivisor) { SyncLock syncLock(this); SegmentReaderPtr sr(get(info, doOpenStores, BufferedIndexInput::BUFFER_SIZE, termInfosIndexDivisor)); IndexReaderPtr clone; LuceneException finally; try { clone = boost::dynamic_pointer_cast(sr->clone(true)); } catch (LuceneException& e) { finally = e; } sr->decRef(); finally.throwException(); return clone; } SegmentReaderPtr ReaderPool::get(SegmentInfoPtr info, bool doOpenStores) { return get(info, doOpenStores, BufferedIndexInput::BUFFER_SIZE, IndexWriterPtr(_indexWriter)->readerTermsIndexDivisor); } SegmentReaderPtr ReaderPool::get(SegmentInfoPtr info, bool doOpenStores, int32_t readBufferSize, int32_t termsIndexDivisor) { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); if (indexWriter->poolReaders) readBufferSize = BufferedIndexInput::BUFFER_SIZE; SegmentReaderPtr sr(readerMap.get(info)); if (!sr) { // Returns a ref, which we xfer to readerMap sr = SegmentReader::get(false, info->dir, info, readBufferSize, doOpenStores, termsIndexDivisor); if (info->dir == indexWriter->directory) { // Only pool if reader is not external readerMap.put(info, sr); } } else { if (doOpenStores) sr->openDocStores(); if (termsIndexDivisor != -1 && !sr->termsIndexLoaded()) { // If this reader was originally opened because we needed to merge it, we didn't load the terms // index. But now, if the caller wants the terms index (eg because it's doing deletes, or an NRT // reader is being opened) we ask the reader to load its terms index. sr->loadTermsIndex(termsIndexDivisor); } } // Return a ref to our caller if (info->dir == indexWriter->directory) { // Only incRef if we pooled (reader is not external) sr->incRef(); } return sr; } SegmentReaderPtr ReaderPool::getIfExists(SegmentInfoPtr info) { SyncLock syncLock(this); SegmentReaderPtr sr(readerMap.get(info)); if (sr) sr->incRef(); return sr; } IndexReaderWarmer::~IndexReaderWarmer() { } } LucenePlusPlus-rel_3.0.4/src/core/index/IntBlockPool.cpp000066400000000000000000000033221217574114600232040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IntBlockPool.h" #include "DocumentsWriter.h" namespace Lucene { IntBlockPool::IntBlockPool(DocumentsWriterPtr docWriter, bool trackAllocations) { this->buffers = Collection::newInstance(10); this->bufferUpto = -1; this->intUpto = DocumentsWriter::INT_BLOCK_SIZE; this->intOffset = -DocumentsWriter::INT_BLOCK_SIZE; this->_docWriter = docWriter; this->trackAllocations = trackAllocations; } IntBlockPool::~IntBlockPool() { } void IntBlockPool::reset() { if (bufferUpto != -1) { if (bufferUpto > 0) { // Recycle all but the first buffer DocumentsWriterPtr(_docWriter)->recycleIntBlocks(buffers, 1, 1 + bufferUpto); } // Reuse first buffer bufferUpto = 0; intUpto = 0; intOffset = 0; buffer = buffers[0]; } } void IntBlockPool::nextBuffer() { if (bufferUpto + 1 == buffers.size()) buffers.resize((int32_t)((double)buffers.size() * 1.5)); buffer = DocumentsWriterPtr(_docWriter)->getIntBlock(trackAllocations); buffers[1 + bufferUpto] = buffer; ++bufferUpto; intUpto = 0; intOffset += DocumentsWriter::INT_BLOCK_SIZE; } } LucenePlusPlus-rel_3.0.4/src/core/index/InvertedDocConsumer.cpp000066400000000000000000000011331217574114600245650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocConsumer.h" namespace Lucene { InvertedDocConsumer::~InvertedDocConsumer() { } void InvertedDocConsumer::setFieldInfos(FieldInfosPtr fieldInfos) { this->fieldInfos = fieldInfos; } } LucenePlusPlus-rel_3.0.4/src/core/index/InvertedDocConsumerPerField.cpp000066400000000000000000000007651217574114600262120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocConsumerPerField.h" namespace Lucene { InvertedDocConsumerPerField::~InvertedDocConsumerPerField() { } } LucenePlusPlus-rel_3.0.4/src/core/index/InvertedDocConsumerPerThread.cpp000066400000000000000000000007701217574114600263720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocConsumerPerThread.h" namespace Lucene { InvertedDocConsumerPerThread::~InvertedDocConsumerPerThread() { } } LucenePlusPlus-rel_3.0.4/src/core/index/InvertedDocEndConsumer.cpp000066400000000000000000000007461217574114600252250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocEndConsumer.h" namespace Lucene { InvertedDocEndConsumer::~InvertedDocEndConsumer() { } } LucenePlusPlus-rel_3.0.4/src/core/index/InvertedDocEndConsumerPerField.cpp000066400000000000000000000007761217574114600266430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocEndConsumerPerField.h" namespace Lucene { InvertedDocEndConsumerPerField::~InvertedDocEndConsumerPerField() { } } LucenePlusPlus-rel_3.0.4/src/core/index/InvertedDocEndConsumerPerThread.cpp000066400000000000000000000010011217574114600270050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocEndConsumerPerThread.h" namespace Lucene { InvertedDocEndConsumerPerThread::~InvertedDocEndConsumerPerThread() { } } LucenePlusPlus-rel_3.0.4/src/core/index/KeepOnlyLastCommitDeletionPolicy.cpp000066400000000000000000000020301217574114600272270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "IndexCommit.h" namespace Lucene { KeepOnlyLastCommitDeletionPolicy::~KeepOnlyLastCommitDeletionPolicy() { } void KeepOnlyLastCommitDeletionPolicy::onInit(Collection commits) { // Note that commits.size() should normally be 1 onCommit(commits); } void KeepOnlyLastCommitDeletionPolicy::onCommit(Collection commits) { // Note that commits.size() should normally be 2 (if not called by onInit above) int32_t size = commits.size(); for (int32_t i = 0; i < size - 1; ++i) commits[i]->deleteCommit(); } } LucenePlusPlus-rel_3.0.4/src/core/index/LogByteSizeMergePolicy.cpp000066400000000000000000000032741217574114600252130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "LogByteSizeMergePolicy.h" namespace Lucene { /// Default minimum segment size. const double LogByteSizeMergePolicy::DEFAULT_MIN_MERGE_MB = 1.6; /// Default maximum segment size. A segment of this size or larger will never be merged. const double LogByteSizeMergePolicy::DEFAULT_MAX_MERGE_MB = DBL_MAX; LogByteSizeMergePolicy::LogByteSizeMergePolicy(IndexWriterPtr writer) : LogMergePolicy(writer) { minMergeSize = (int64_t)(DEFAULT_MIN_MERGE_MB * 1024 * 1024); maxMergeSize = DEFAULT_MAX_MERGE_MB == DBL_MAX ? std::numeric_limits::max() : (int64_t)(DEFAULT_MAX_MERGE_MB * 1024 * 1024); } LogByteSizeMergePolicy::~LogByteSizeMergePolicy() { } int64_t LogByteSizeMergePolicy::size(SegmentInfoPtr info) { return sizeBytes(info); } void LogByteSizeMergePolicy::setMaxMergeMB(double mb) { maxMergeSize = (int64_t)(mb * 1024 * 1024); } double LogByteSizeMergePolicy::getMaxMergeMB() { return ((double)maxMergeSize) / 1024 / 1024; } void LogByteSizeMergePolicy::setMinMergeMB(double mb) { minMergeSize = (int64_t)(mb * 1024 * 1024); } double LogByteSizeMergePolicy::getMinMergeMB() { return ((double)minMergeSize) / 1024 / 1024; } } LucenePlusPlus-rel_3.0.4/src/core/index/LogDocMergePolicy.cpp000066400000000000000000000023411217574114600241540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LogDocMergePolicy.h" namespace Lucene { /// Default minimum segment size. @see setMinMergeDocs const int32_t LogDocMergePolicy::DEFAULT_MIN_MERGE_DOCS = 1000; LogDocMergePolicy::LogDocMergePolicy(IndexWriterPtr writer) : LogMergePolicy(writer) { minMergeSize = DEFAULT_MIN_MERGE_DOCS; // maxMergeSize is never used by LogDocMergePolicy; set it to LLONG_MAX to disable it maxMergeSize = std::numeric_limits::max(); } LogDocMergePolicy::~LogDocMergePolicy() { } int64_t LogDocMergePolicy::size(SegmentInfoPtr info) { return sizeDocs(info); } void LogDocMergePolicy::setMinMergeDocs(int32_t minMergeDocs) { minMergeSize = minMergeDocs; } int32_t LogDocMergePolicy::getMinMergeDocs() { return (int32_t)minMergeSize; } } LucenePlusPlus-rel_3.0.4/src/core/index/LogMergePolicy.cpp000066400000000000000000000400401217574114600235240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LogMergePolicy.h" #include "IndexWriter.h" #include "SegmentInfo.h" #include "StringUtils.h" namespace Lucene { /// Defines the allowed range of log(size) for each level. A level is computed by taking the max segment /// log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range. const double LogMergePolicy::LEVEL_LOG_SPAN = 0.75; /// Default merge factor, which is how many segments are merged at a time. const int32_t LogMergePolicy::DEFAULT_MERGE_FACTOR = 10; /// Default maximum segment size. A segment of this size or larger will never be merged. const int32_t LogMergePolicy::DEFAULT_MAX_MERGE_DOCS = INT_MAX; /// Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it. const double LogMergePolicy::DEFAULT_NO_CFS_RATIO = 0.1; LogMergePolicy::LogMergePolicy(IndexWriterPtr writer) : MergePolicy(writer) { mergeFactor = DEFAULT_MERGE_FACTOR; noCFSRatio = DEFAULT_NO_CFS_RATIO; minMergeSize = 0; maxMergeSize = 0; maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; calibrateSizeByDeletes = false; _useCompoundFile = true; _useCompoundDocStore = true; } LogMergePolicy::~LogMergePolicy() { } double LogMergePolicy::getNoCFSRatio() { return noCFSRatio; } void LogMergePolicy::setNoCFSRatio(double noCFSRatio) { if (noCFSRatio < 0.0 || noCFSRatio > 1.0) boost::throw_exception(IllegalArgumentException(L"noCFSRatio must be 0.0 to 1.0 inclusive; got " + StringUtils::toString(noCFSRatio))); this->noCFSRatio = noCFSRatio; } bool LogMergePolicy::verbose() { return (!_writer.expired() && IndexWriterPtr(_writer)->verbose()); } void LogMergePolicy::message(const String& message) { if (verbose()) IndexWriterPtr(_writer)->message(L"LMP: " + message); } int32_t LogMergePolicy::getMergeFactor() { return mergeFactor; } void LogMergePolicy::setMergeFactor(int32_t mergeFactor) { if (mergeFactor < 2) boost::throw_exception(IllegalArgumentException(L"mergeFactor cannot be less than 2")); this->mergeFactor = mergeFactor; } bool LogMergePolicy::getUseCompoundFile() { return _useCompoundFile; } void LogMergePolicy::setUseCompoundFile(bool useCompoundFile) { _useCompoundFile = useCompoundFile; } bool LogMergePolicy::useCompoundFile(SegmentInfosPtr segments, SegmentInfoPtr newSegment) { return _useCompoundFile; } bool LogMergePolicy::useCompoundDocStore(SegmentInfosPtr segments) { return _useCompoundDocStore; } void LogMergePolicy::setUseCompoundDocStore(bool useCompoundDocStore) { _useCompoundDocStore = useCompoundDocStore; } bool LogMergePolicy::getUseCompoundDocStore() { return _useCompoundDocStore; } void LogMergePolicy::setCalibrateSizeByDeletes(bool calibrateSizeByDeletes) { this->calibrateSizeByDeletes = calibrateSizeByDeletes; } bool LogMergePolicy::getCalibrateSizeByDeletes() { return calibrateSizeByDeletes; } void LogMergePolicy::close() { } int64_t LogMergePolicy::sizeDocs(SegmentInfoPtr info) { if (calibrateSizeByDeletes) { int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); return (info->docCount - (int64_t)delCount); } else return info->docCount; } int64_t LogMergePolicy::sizeBytes(SegmentInfoPtr info) { int64_t byteSize = info->sizeInBytes(); if (calibrateSizeByDeletes) { int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); double delRatio = info->docCount <= 0 ? 0.0 : ((double)delCount / (double)info->docCount); return info->docCount <= 0 ? byteSize : (int64_t)(byteSize * (1.0 - delRatio)); } else return byteSize; } bool LogMergePolicy::isOptimized(SegmentInfosPtr infos, int32_t maxNumSegments, SetSegmentInfo segmentsToOptimize) { int32_t numSegments = infos->size(); int32_t numToOptimize = 0; SegmentInfoPtr optimizeInfo; for (int32_t i = 0; i < numSegments && numToOptimize <= maxNumSegments; ++i) { SegmentInfoPtr info(infos->info(i)); if (segmentsToOptimize.contains(info)) { ++numToOptimize; optimizeInfo = info; } } return (numToOptimize <= maxNumSegments && (numToOptimize != 1 || isOptimized(optimizeInfo))); } bool LogMergePolicy::isOptimized(SegmentInfoPtr info) { IndexWriterPtr writer(_writer); bool hasDeletions = (writer->numDeletedDocs(info) > 0); return (!hasDeletions && !info->hasSeparateNorms() && info->dir == writer->getDirectory() && (info->getUseCompoundFile() == _useCompoundFile || noCFSRatio < 1.0)); } MergeSpecificationPtr LogMergePolicy::findMergesForOptimize(SegmentInfosPtr segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize) { MergeSpecificationPtr spec; BOOST_ASSERT(maxSegmentCount > 0); if (!isOptimized(segmentInfos, maxSegmentCount, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to be optimized (other segments may have been // flushed since optimize started) int32_t last = segmentInfos->size(); while (last > 0) { if (segmentsToOptimize.contains(segmentInfos->info(--last))) { ++last; break; } } if (last > 0) { spec = newLucene(); // First, enroll all "full" merges (size mergeFactor) to potentially be run concurrently while (last - maxSegmentCount + 1 >= mergeFactor) { spec->add(makeOneMerge(segmentInfos, segmentInfos->range(last - mergeFactor, last))); last -= mergeFactor; } // Only if there are no full merges pending do we add a final partial (< mergeFactor segments) merge if (spec->merges.empty()) { if (maxSegmentCount == 1) { // Since we must optimize down to 1 segment, the choice is simple if (last > 1 || !isOptimized(segmentInfos->info(0))) spec->add(makeOneMerge(segmentInfos, segmentInfos->range(0, last))); } else if (last > maxSegmentCount) { // Take care to pick a partial merge that is least cost, but does not make the index too // lopsided. If we always just picked the partial tail then we could produce a highly // lopsided index over time // We must merge this many segments to leave maxNumSegments in the index (from when // optimize was first kicked off) int32_t finalMergeSize = last - maxSegmentCount + 1; // Consider all possible starting points int64_t bestSize = 0; int32_t bestStart = 0; for (int32_t i = 0; i < last - finalMergeSize + 1; ++i) { int64_t sumSize = 0; for (int32_t j = 0; j < finalMergeSize; ++j) sumSize += size(segmentInfos->info(j + i)); if (i == 0 || (sumSize < 2 * size(segmentInfos->info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec->add(makeOneMerge(segmentInfos, segmentInfos->range(bestStart, bestStart + finalMergeSize))); } } } else spec.reset(); } else spec.reset(); return spec; } MergeSpecificationPtr LogMergePolicy::findMergesToExpungeDeletes(SegmentInfosPtr segmentInfos) { int32_t numSegments = segmentInfos->size(); message(L"findMergesToExpungeDeletes: " + StringUtils::toString(numSegments) + L" segments"); MergeSpecificationPtr spec(newLucene()); int32_t firstSegmentWithDeletions = -1; for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr info(segmentInfos->info(i)); int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); if (delCount > 0) { message(L" segment " + info->name + L" has deletions"); if (firstSegmentWithDeletions == -1) firstSegmentWithDeletions = i; else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with deletions, so force a merge now message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(i - 1) + L" inclusive"); spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { // End of a sequence of segments with deletions, so merge those past segments even if // it's fewer than mergeFactor segments message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(i - 1) + L" inclusive"); spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } if (firstSegmentWithDeletions != -1) { message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(numSegments - 1) + L" inclusive"); spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, numSegments))); } return spec; } MergeSpecificationPtr LogMergePolicy::findMerges(SegmentInfosPtr segmentInfos) { int32_t numSegments = segmentInfos->size(); message(L"findMerges: " + StringUtils::toString(numSegments) + L" segments"); // Compute levels, which is just log (base mergeFactor) of the size of each segment Collection levels(Collection::newInstance(numSegments)); double norm = std::log((double)mergeFactor); for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr info(segmentInfos->info(i)); int64_t _size = size(info); // Floor tiny segments _size = std::max(_size, (int64_t)1); levels[i] = std::log((double)_size) / norm; } double levelFloor = minMergeSize <= 0 ? 0 : (std::log((double)minMergeSize) / norm); // Now, we quantize the log values into levels. The first level is any segment whose log // size is within LEVEL_LOG_SPAN of the max size, or, who has such as segment "to the right". // Then, we find the max of all other segments and use that to define the next level segment, etc. MergeSpecificationPtr spec; int32_t start = 0; while (start < numSegments) { // Find max level of all segments not already quantized double maxLevel = levels[start]; for (int32_t i = 1 + start; i < numSegments; ++i) maxLevel = std::max(maxLevel, levels[i]); // Now search backwards for the rightmost segment that falls into this level double levelBottom; if (maxLevel < levelFloor) levelBottom = -1.0; else { levelBottom = (double)(maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) levelBottom = levelFloor; } int32_t upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) break; --upto; } message(L" level " + StringUtils::toString(levelBottom) + L" to " + StringUtils::toString(maxLevel) + L": " + StringUtils::toString(1 + upto - start) + L" segments"); // Finally, record all merges that are viable at this level int32_t end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int32_t i = start; i < end; ++i) { SegmentInfoPtr info(segmentInfos->info(i)); if (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs) { anyTooLarge = true; break; } } if (!anyTooLarge) { if (!spec) spec = newLucene(); message(L" " + StringUtils::toString(start) + L" to " + StringUtils::toString(end) + L": add this merge"); spec->add(makeOneMerge(segmentInfos, segmentInfos->range(start, end))); } else message(L" " + StringUtils::toString(start) + L" to " + StringUtils::toString(end) + L": contains segment over maxMergeSize or maxMergeDocs; skipping"); start = end; end = start + mergeFactor; } start = 1 + upto; } return spec; } OneMergePtr LogMergePolicy::makeOneMerge(SegmentInfosPtr infos, SegmentInfosPtr infosToMerge) { bool doCFS; if (!_useCompoundFile) doCFS = false; else if (noCFSRatio == 1.0) doCFS = true; else { int64_t totSize = 0; int32_t numInfos = infos->size(); for (int32_t i = 0; i < numInfos; ++i) { SegmentInfoPtr info(infos->info(i)); totSize += size(info); } int64_t mergeSize = 0; int32_t numMerges = infosToMerge->size(); for (int32_t i = 0; i < numMerges; ++i) { SegmentInfoPtr info(infosToMerge->info(i)); mergeSize += size(info); } doCFS = mergeSize <= noCFSRatio * totSize; } return newLucene(infosToMerge, doCFS); } void LogMergePolicy::setMaxMergeDocs(int32_t maxMergeDocs) { this->maxMergeDocs = maxMergeDocs; } int32_t LogMergePolicy::getMaxMergeDocs() { return maxMergeDocs; } } LucenePlusPlus-rel_3.0.4/src/core/index/MergeDocIDRemapper.cpp000066400000000000000000000063711217574114600242520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MergeDocIDRemapper.h" #include "SegmentMerger.h" #include "MergePolicy.h" #include "SegmentInfo.h" namespace Lucene { MergeDocIDRemapper::MergeDocIDRemapper(SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergedDocCount) { this->docMaps = docMaps; SegmentInfoPtr firstSegment(merge->segments->info(0)); int32_t i = 0; this->minDocID = 0; while (true) { SegmentInfoPtr info(infos->info(i)); if (info->equals(firstSegment)) break; minDocID += info->docCount; ++i; } int32_t numDocs = 0; for (int32_t j = 0; j < docMaps.size(); ++i, ++j) { numDocs += infos->info(i)->docCount; BOOST_ASSERT(infos->info(i)->equals(merge->segments->info(j))); } this->maxDocID = minDocID + numDocs; starts = Collection::newInstance(docMaps.size()); newStarts = Collection::newInstance(docMaps.size()); starts[0] = minDocID; newStarts[0] = minDocID; for (i = 1; i < docMaps.size(); ++i) { int32_t lastDocCount = merge->segments->info(i - 1)->docCount; starts[i] = starts[i - 1] + lastDocCount; newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; } this->docShift = numDocs - mergedDocCount; // There are rare cases when docShift is 0. It happens if you try to delete a docID that's // out of bounds, because the SegmentReader still allocates deletedDocs and pretends it has // deletions ... so we can't make this assert here: BOOST_ASSERT(docShift > 0); // Make sure it all adds up BOOST_ASSERT(docShift == maxDocID - (newStarts[docMaps.size() - 1] + merge->segments->info(docMaps.size() - 1)->docCount - delCounts[docMaps.size() - 1])); } MergeDocIDRemapper::~MergeDocIDRemapper() { } int32_t MergeDocIDRemapper::remap(int32_t oldDocID) { if (oldDocID < minDocID) { // Unaffected by merge return oldDocID; } else if (oldDocID >= maxDocID) { // This doc was "after" the merge, so simple shift return oldDocID - docShift; } else { // Binary search to locate this document & find its new docID Collection::iterator doc = std::upper_bound(starts.begin(), starts.begin() + docMaps.size(), oldDocID); int32_t docMap = std::distance(starts.begin(), doc) - 1; if (docMaps[docMap]) return newStarts[docMap] + docMaps[docMap][oldDocID - starts[docMap]]; else return newStarts[docMap] + oldDocID - starts[docMap]; } } } LucenePlusPlus-rel_3.0.4/src/core/index/MergePolicy.cpp000066400000000000000000000057521217574114600230750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MergePolicy.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "StringUtils.h" namespace Lucene { MergePolicy::MergePolicy(IndexWriterPtr writer) { this->_writer = writer; } MergePolicy::~MergePolicy() { } OneMerge::OneMerge(SegmentInfosPtr segments, bool useCompoundFile) { mergeDocStores = false; optimize = false; registerDone = false; mergeGen = 0; isExternal = false; maxNumSegmentsOptimize = 0; aborted = false; if (segments->empty()) boost::throw_exception(RuntimeException(L"segments must include at least one segment")); this->segments = segments; this->useCompoundFile = useCompoundFile; } OneMerge::~OneMerge() { } void OneMerge::setException(const LuceneException& error) { SyncLock syncLock(this); this->error = error; } LuceneException OneMerge::getException() { SyncLock syncLock(this); return error; } void OneMerge::abort() { SyncLock syncLock(this); aborted = true; } bool OneMerge::isAborted() { SyncLock syncLock(this); return aborted; } void OneMerge::checkAborted(DirectoryPtr dir) { SyncLock syncLock(this); if (aborted) boost::throw_exception(MergeAbortedException(L"merge is aborted: " + segString(dir))); } String OneMerge::segString(DirectoryPtr dir) { StringStream buffer; int32_t numSegments = segments->size(); for (int32_t i = 0; i < numSegments; ++i) { if (i > 0) buffer << L" "; buffer << segments->info(i)->segString(dir); } if (info) buffer << L" into " + info->name; if (optimize) buffer << L" [optimize]"; if (mergeDocStores) buffer << L" [mergeDocStores]"; return buffer.str(); } MergeSpecification::MergeSpecification() { merges = Collection::newInstance(); } MergeSpecification::~MergeSpecification() { } void MergeSpecification::add(OneMergePtr merge) { merges.add(merge); } String MergeSpecification::segString(DirectoryPtr dir) { String seg(L"MergeSpec:\n"); int32_t i = 1; for (Collection::iterator merge = merges.begin(); merge != merges.end(); ++merge) seg += L" " + StringUtils::toString(i++) + L": " + (*merge)->segString(dir); return seg; } } LucenePlusPlus-rel_3.0.4/src/core/index/MergeScheduler.cpp000066400000000000000000000007161217574114600235470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MergeScheduler.h" namespace Lucene { MergeScheduler::~MergeScheduler() { } } LucenePlusPlus-rel_3.0.4/src/core/index/MultiLevelSkipListReader.cpp000066400000000000000000000167641217574114600255530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiLevelSkipListReader.h" #include "BufferedIndexInput.h" #include "MiscUtils.h" namespace Lucene { MultiLevelSkipListReader::MultiLevelSkipListReader(IndexInputPtr skipStream, int32_t maxSkipLevels, int32_t skipInterval) { this->numberOfLevelsToBuffer = 1; this->numberOfSkipLevels = 0; this->docCount = 0; this->haveSkipped = false; this->lastDoc = 0; this->lastChildPointer = 0; this->skipStream = Collection::newInstance(maxSkipLevels); this->skipPointer = Collection::newInstance(maxSkipLevels); this->childPointer = Collection::newInstance(maxSkipLevels); this->numSkipped = Collection::newInstance(maxSkipLevels); this->maxNumberOfSkipLevels = maxSkipLevels; this->skipInterval = Collection::newInstance(maxSkipLevels); this->skipStream[0] = skipStream; this->inputIsBuffered = boost::dynamic_pointer_cast(skipStream); this->skipInterval[0] = skipInterval; this->skipDoc = Collection::newInstance(maxSkipLevels); MiscUtils::arrayFill(this->skipPointer.begin(), 0, this->skipPointer.size(), 0); MiscUtils::arrayFill(this->childPointer.begin(), 0, this->childPointer.size(), 0); MiscUtils::arrayFill(this->numSkipped.begin(), 0, this->numSkipped.size(), 0); MiscUtils::arrayFill(this->skipDoc.begin(), 0, this->skipDoc.size(), 0); for (int32_t i = 1; i < maxSkipLevels; ++i) { // cache skip intervals this->skipInterval[i] = this->skipInterval[i - 1] * skipInterval; } } MultiLevelSkipListReader::~MultiLevelSkipListReader() { } int32_t MultiLevelSkipListReader::getDoc() { return lastDoc; } int32_t MultiLevelSkipListReader::skipTo(int32_t target) { if (!haveSkipped) { // first time, load skip levels loadSkipLevels(); haveSkipped = true; } // walk up the levels until highest level is found that has a skip for this target int32_t level = 0; while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) ++level; while (level >= 0) { if (target > skipDoc[level]) { if (!loadNextSkip(level)) continue; } else { // no more skips on this level, go down one level if (level > 0 && lastChildPointer > skipStream[level - 1]->getFilePointer()) seekChild(level - 1); --level; } } return numSkipped[0] - skipInterval[0] - 1; } bool MultiLevelSkipListReader::loadNextSkip(int32_t level) { // we have to skip, the target document is greater than the current skip list entry setLastSkipData(level); numSkipped[level] += skipInterval[level]; if (numSkipped[level] > docCount) { // this skip list is exhausted skipDoc[level] = INT_MAX; if (numberOfSkipLevels > level) numberOfSkipLevels = level; return false; } // read next skip entry skipDoc[level] += readSkipData(level, skipStream[level]); if (level != 0) { // read the child pointer if we are not on the leaf level childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; } return true; } void MultiLevelSkipListReader::seekChild(int32_t level) { skipStream[level]->seek(lastChildPointer); numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1]; skipDoc[level] = lastDoc; if (level > 0) childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; } void MultiLevelSkipListReader::close() { for (int32_t i = 1; i < skipStream.size(); ++i) { if (skipStream[i]) skipStream[i]->close(); } } void MultiLevelSkipListReader::init(int64_t skipPointer, int32_t df) { this->skipPointer[0] = skipPointer; this->docCount = df; MiscUtils::arrayFill(skipDoc.begin(), 0, skipDoc.size(), 0); MiscUtils::arrayFill(numSkipped.begin(), 0, numSkipped.size(), 0); MiscUtils::arrayFill(childPointer.begin(), 0, childPointer.size(), 0); haveSkipped = false; for (int32_t i = 1; i < numberOfSkipLevels; ++i) skipStream[i].reset(); } void MultiLevelSkipListReader::loadSkipLevels() { numberOfSkipLevels = docCount == 0 ? 0 : (int32_t)std::floor(std::log((double)docCount) / std::log((double)skipInterval[0])); if (numberOfSkipLevels > maxNumberOfSkipLevels) numberOfSkipLevels = maxNumberOfSkipLevels; skipStream[0]->seek(skipPointer[0]); int32_t toBuffer = numberOfLevelsToBuffer; for (int32_t i = numberOfSkipLevels - 1; i > 0; --i) { // the length of the current level int64_t length = skipStream[0]->readVLong(); // the start pointer of the current level skipPointer[i] = skipStream[0]->getFilePointer(); if (toBuffer > 0) { // buffer this level skipStream[i] = newLucene(skipStream[0], (int32_t)length); --toBuffer; } else { // clone this stream, it is already at the start of the current level skipStream[i] = boost::dynamic_pointer_cast(skipStream[0]->clone()); if (inputIsBuffered && length < BufferedIndexInput::BUFFER_SIZE) boost::dynamic_pointer_cast(skipStream[i])->setBufferSize((int32_t)length); // move base stream beyond the current level skipStream[0]->seek(skipStream[0]->getFilePointer() + length); } } // use base stream for the lowest level skipPointer[0] = skipStream[0]->getFilePointer(); } void MultiLevelSkipListReader::setLastSkipData(int32_t level) { lastDoc = skipDoc[level]; lastChildPointer = childPointer[level]; } SkipBuffer::SkipBuffer(IndexInputPtr input, int32_t length) { pos = 0; data = ByteArray::newInstance(length); pointer = input->getFilePointer(); input->readBytes(data.get(), 0, length); } SkipBuffer::~SkipBuffer() { } void SkipBuffer::close() { data.reset(); } int64_t SkipBuffer::getFilePointer() { return (pointer + pos); } int64_t SkipBuffer::length() { return data.size(); } uint8_t SkipBuffer::readByte() { return data[pos++]; } void SkipBuffer::readBytes(uint8_t* b, int32_t offset, int32_t length) { MiscUtils::arrayCopy(data.get(), pos, b, offset, length); pos += length; } void SkipBuffer::seek(int64_t pos) { this->pos = (int32_t)(pos - pointer); } } LucenePlusPlus-rel_3.0.4/src/core/index/MultiLevelSkipListWriter.cpp000066400000000000000000000061101217574114600256050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiLevelSkipListWriter.h" #include "RAMOutputStream.h" namespace Lucene { MultiLevelSkipListWriter::MultiLevelSkipListWriter(int32_t skipInterval, int32_t maxSkipLevels, int32_t df) { this->skipInterval = skipInterval; // calculate the maximum number of skip levels for this document frequency numberOfSkipLevels = df == 0 ? 0 : (int32_t)std::floor(std::log((double)df) / std::log((double)skipInterval)); // make sure it does not exceed maxSkipLevels numberOfSkipLevels = std::max(numberOfSkipLevels, maxSkipLevels); } MultiLevelSkipListWriter::~MultiLevelSkipListWriter() { } void MultiLevelSkipListWriter::init() { skipBuffer = Collection::newInstance(numberOfSkipLevels); for (int32_t i = 0; i < numberOfSkipLevels; ++i) skipBuffer[i] = newLucene(); } void MultiLevelSkipListWriter::resetSkip() { // creates new buffers or empties the existing ones if (!skipBuffer) init(); else { for (Collection::iterator buffer = skipBuffer.begin(); buffer != skipBuffer.end(); ++buffer) (*buffer)->reset(); } } void MultiLevelSkipListWriter::bufferSkip(int32_t df) { int32_t numLevels = 0; // determine max level for (; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) ++numLevels; int64_t childPointer = 0; for (int32_t level = 0; level < numLevels; ++level) { writeSkipData(level, skipBuffer[level]); int64_t newChildPointer = skipBuffer[level]->getFilePointer(); if (level != 0) { // store child pointers for all levels except the lowest skipBuffer[level]->writeVLong(childPointer); } // remember the childPointer for the next level childPointer = newChildPointer; } } int64_t MultiLevelSkipListWriter::writeSkip(IndexOutputPtr output) { int64_t skipPointer = output->getFilePointer(); if (!skipBuffer || skipBuffer.empty()) return skipPointer; for (int32_t level = numberOfSkipLevels - 1; level > 0; --level) { int64_t length = skipBuffer[level]->getFilePointer(); if (length > 0) { output->writeVLong(length); skipBuffer[level]->writeTo(output); } } skipBuffer[0]->writeTo(output); return skipPointer; } } LucenePlusPlus-rel_3.0.4/src/core/index/MultiReader.cpp000066400000000000000000000274351217574114600230750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiReader.h" #include "DirectoryReader.h" #include "DefaultSimilarity.h" #include "FieldCache.h" #include "MiscUtils.h" namespace Lucene { MultiReader::MultiReader(Collection subReaders, bool closeSubReaders) { this->normsCache = MapStringByteArray::newInstance(); this->_maxDoc = 0; this->_numDocs = -1; this->_hasDeletions = false; this->subReaders = subReaders; starts = Collection::newInstance(subReaders.size() + 1); // build starts array decrefOnClose = Collection::newInstance(subReaders.size()); for (int32_t i = 0; i < subReaders.size(); ++i) { starts[i] = _maxDoc; _maxDoc += subReaders[i]->maxDoc(); // compute maxDocs if (!closeSubReaders) { subReaders[i]->incRef(); decrefOnClose[i] = true; } else decrefOnClose[i] = false; if (subReaders[i]->hasDeletions()) _hasDeletions = true; } starts[subReaders.size()] = _maxDoc; } MultiReader::~MultiReader() { } IndexReaderPtr MultiReader::reopen() { SyncLock syncLock(this); return doReopen(false); } LuceneObjectPtr MultiReader::clone(LuceneObjectPtr other) { SyncLock syncLock(this); try { return doReopen(true); } catch (LuceneException& e) { boost::throw_exception(RuntimeException(e.getError())); } return LuceneObjectPtr(); } IndexReaderPtr MultiReader::doReopen(bool doClone) { ensureOpen(); bool reopened = false; Collection newSubReaders(Collection::newInstance(subReaders.size())); bool success = false; LuceneException finally; try { for (int32_t i = 0; i < subReaders.size(); ++i) { if (doClone) newSubReaders[i] = boost::dynamic_pointer_cast(subReaders[i]->clone()); else newSubReaders[i] = subReaders[i]->reopen(); // if at least one of the subreaders was updated we remember that and return a new MultiReader if (newSubReaders[i] != subReaders[i]) reopened = true; } success = true; } catch (LuceneException& e) { finally = e; } if (!success && reopened) { for (int32_t i = 0; i < newSubReaders.size(); ++i) { if (newSubReaders[i] != subReaders[i]) { try { if (newSubReaders[i]) newSubReaders[i]->close(); } catch (...) { // keep going - we want to clean up as much as possible } } } } finally.throwException(); if (reopened) { Collection newDecrefOnClose(Collection::newInstance(subReaders.size())); for (int32_t i = 0; i < subReaders.size(); ++i) { if (newSubReaders[i] == subReaders[i]) { newSubReaders[i]->incRef(); newDecrefOnClose[i] = true; } } MultiReaderPtr mr(newLucene(newSubReaders)); mr->decrefOnClose = newDecrefOnClose; return mr; } else return shared_from_this(); } Collection MultiReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num return subReaders[i]->getTermFreqVectors(docNumber - starts[i]); // dispatch to segment } TermFreqVectorPtr MultiReader::getTermFreqVector(int32_t docNumber, const String& field) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num return subReaders[i]->getTermFreqVector(docNumber - starts[i], field); } void MultiReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num subReaders[i]->getTermFreqVector(docNumber - starts[i], field, mapper); } void MultiReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num subReaders[i]->getTermFreqVector(docNumber - starts[i], mapper); } bool MultiReader::isOptimized() { return false; } int32_t MultiReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) // NOTE: multiple threads may wind up init'ing numDocs... but that's harmless if (_numDocs == -1) { // check cache int32_t n = 0; // cache miss - recompute for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) n += (*reader)->numDocs(); // sum from readers _numDocs = n; } return _numDocs; } int32_t MultiReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return _maxDoc; } DocumentPtr MultiReader::document(int32_t n, FieldSelectorPtr fieldSelector) { ensureOpen(); int32_t i = readerIndex(n); // find segment num return subReaders[i]->document(n - starts[i], fieldSelector); // dispatch to segment reader } bool MultiReader::isDeleted(int32_t n) { // Don't call ensureOpen() here (it could affect performance) int32_t i = readerIndex(n); // find segment num return subReaders[i]->isDeleted(n - starts[i]); // dispatch to segment reader } bool MultiReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return _hasDeletions; } void MultiReader::doDelete(int32_t docNum) { _numDocs = -1; // invalidate cache int32_t i = readerIndex(docNum); // find segment num subReaders[i]->deleteDocument(docNum - starts[i]); // dispatch to segment reader _hasDeletions = true; } void MultiReader::doUndeleteAll() { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) (*reader)->undeleteAll(); _hasDeletions = false; _numDocs = -1; // invalidate cache } int32_t MultiReader::readerIndex(int32_t n) { return DirectoryReader::readerIndex(n, this->starts, this->subReaders.size()); } bool MultiReader::hasNorms(const String& field) { ensureOpen(); for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { if ((*reader)->hasNorms(field)) return true; } return false; } ByteArray MultiReader::norms(const String& field) { SyncLock syncLock(this); ensureOpen(); ByteArray bytes(normsCache.get(field)); if (bytes) return bytes; // cache hit if (!hasNorms(field)) return ByteArray(); bytes = ByteArray::newInstance(maxDoc()); for (int32_t i = 0; i < subReaders.size(); ++i) subReaders[i]->norms(field, bytes, starts[i]); normsCache.put(field, bytes); // update cache return bytes; } void MultiReader::norms(const String& field, ByteArray norms, int32_t offset) { SyncLock syncLock(this); ensureOpen(); ByteArray bytes(normsCache.get(field)); for (int32_t i = 0; i < subReaders.size(); ++i) // read from segments subReaders[i]->norms(field, norms, offset + starts[i]); if (!bytes && !hasNorms(field)) MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); else if (bytes) // cache hit MiscUtils::arrayCopy(bytes.get(), 0, norms.get(), offset, maxDoc()); else { for (int32_t i = 0; i < subReaders.size(); ++i) subReaders[i]->norms(field, norms, offset + starts[i]); } } void MultiReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { { SyncLock normsLock(&normsCache); normsCache.remove(field); // clear cache } int32_t i = readerIndex(doc); // find segment num subReaders[i]->setNorm(doc - starts[i], field, value); // dispatch } TermEnumPtr MultiReader::terms() { ensureOpen(); return newLucene(shared_from_this(), subReaders, starts, TermPtr()); } TermEnumPtr MultiReader::terms(TermPtr t) { ensureOpen(); return newLucene(shared_from_this(), subReaders, starts, t); } int32_t MultiReader::docFreq(TermPtr t) { ensureOpen(); int32_t total = 0; // sum freqs in segments for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) total += (*reader)->docFreq(t); return total; } TermDocsPtr MultiReader::termDocs() { ensureOpen(); return newLucene(shared_from_this(), subReaders, starts); } TermPositionsPtr MultiReader::termPositions() { ensureOpen(); return newLucene(shared_from_this(), subReaders, starts); } void MultiReader::doCommit(MapStringString commitUserData) { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) (*reader)->commit(commitUserData); } void MultiReader::doClose() { SyncLock syncLock(this); for (int32_t i = 0; i < subReaders.size(); ++i) { if (decrefOnClose[i]) subReaders[i]->decRef(); else subReaders[i]->close(); } // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is // generally not a good idea) FieldCache::DEFAULT()->purge(shared_from_this()); } HashSet MultiReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); return DirectoryReader::getFieldNames(fieldOption, this->subReaders); } bool MultiReader::isCurrent() { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { if (!(*reader)->isCurrent()) return false; } // all subreaders are up to date return true; } int64_t MultiReader::getVersion() { boost::throw_exception(UnsupportedOperationException()); return 0; } Collection MultiReader::getSequentialSubReaders() { return subReaders; } } LucenePlusPlus-rel_3.0.4/src/core/index/MultipleTermPositions.cpp000066400000000000000000000114671217574114600252110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultipleTermPositions.h" #include "_MultipleTermPositions.h" #include "IndexReader.h" #include "Term.h" namespace Lucene { MultipleTermPositions::MultipleTermPositions(IndexReaderPtr indexReader, Collection terms) { Collection termPositions(Collection::newInstance()); for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) termPositions.add(indexReader->termPositions(*term)); termPositionsQueue = newLucene(termPositions); posList = newLucene(); _doc = 0; _freq = 0; } MultipleTermPositions::~MultipleTermPositions() { } bool MultipleTermPositions::next() { if (termPositionsQueue->empty()) return false; posList->clear(); _doc = termPositionsQueue->top()->doc(); TermPositionsPtr tp; do { tp = termPositionsQueue->top(); for (int32_t i = 0; i < tp->freq(); ++i) posList->add(tp->nextPosition()); if (tp->next()) termPositionsQueue->updateTop(); else { termPositionsQueue->pop(); tp->close(); } } while (!termPositionsQueue->empty() && termPositionsQueue->top()->doc() == _doc); posList->sort(); _freq = posList->size(); return true; } int32_t MultipleTermPositions::nextPosition() { return posList->next(); } bool MultipleTermPositions::skipTo(int32_t target) { while (termPositionsQueue->top() && target > termPositionsQueue->top()->doc()) { TermPositionsPtr tp(termPositionsQueue->top()); termPositionsQueue->pop(); if (tp->skipTo(target)) termPositionsQueue->add(tp); else tp->close(); } return next(); } int32_t MultipleTermPositions::doc() { return _doc; } int32_t MultipleTermPositions::freq() { return _freq; } void MultipleTermPositions::close() { while (!termPositionsQueue->empty()) termPositionsQueue->pop()->close(); } void MultipleTermPositions::seek(TermPtr term) { boost::throw_exception(UnsupportedOperationException()); } void MultipleTermPositions::seek(TermEnumPtr termEnum) { boost::throw_exception(UnsupportedOperationException()); } int32_t MultipleTermPositions::read(Collection docs, Collection freqs) { boost::throw_exception(UnsupportedOperationException()); return 0; } ByteArray MultipleTermPositions::getPayload(ByteArray data, int32_t offset) { boost::throw_exception(UnsupportedOperationException()); return ByteArray(); } bool MultipleTermPositions::isPayloadAvailable() { return false; } TermPositionsQueue::TermPositionsQueue(Collection termPositions) : PriorityQueue(termPositions.size()) { this->termPositions = termPositions; } TermPositionsQueue::~TermPositionsQueue() { } void TermPositionsQueue::initialize() { PriorityQueue::initialize(); for (Collection::iterator tp = termPositions.begin(); tp != termPositions.end(); ++tp) { if ((*tp)->next()) add(*tp); } } bool TermPositionsQueue::lessThan(const TermPositionsPtr& first, const TermPositionsPtr& second) { return (first->doc() < second->doc()); } IntQueue::IntQueue() { arraySize = 16; index = 0; lastIndex = 0; array = Collection::newInstance(arraySize); } IntQueue::~IntQueue() { } void IntQueue::add(int32_t i) { if (lastIndex == arraySize) growArray(); array[lastIndex++] = i; } int32_t IntQueue::next() { return array[index++]; } void IntQueue::sort() { std::sort(array.begin() + index, array.begin() + lastIndex); } void IntQueue::clear() { index = 0; lastIndex = 0; } int32_t IntQueue::size() { return (lastIndex - index); } void IntQueue::growArray() { array.resize(arraySize * 2); arraySize *= 2; } } LucenePlusPlus-rel_3.0.4/src/core/index/NormsWriter.cpp000066400000000000000000000160521217574114600231440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NormsWriter.h" #include "NormsWriterPerThread.h" #include "NormsWriterPerField.h" #include "Similarity.h" #include "IndexFileNames.h" #include "IndexOutput.h" #include "SegmentMerger.h" #include "SegmentWriteState.h" #include "InvertedDocEndConsumerPerField.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "Directory.h" namespace Lucene { NormsWriter::NormsWriter() { } NormsWriter::~NormsWriter() { } uint8_t NormsWriter::getDefaultNorm() { static uint8_t defaultNorm = 0; if (defaultNorm == 0) defaultNorm = Similarity::encodeNorm(1.0); return defaultNorm; } InvertedDocEndConsumerPerThreadPtr NormsWriter::addThread(DocInverterPerThreadPtr docInverterPerThread) { return newLucene(docInverterPerThread, shared_from_this()); } void NormsWriter::abort() { } void NormsWriter::files(HashSet files) { } void NormsWriter::setFieldInfos(FieldInfosPtr fieldInfos) { this->fieldInfos = fieldInfos; } void NormsWriter::flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, SegmentWriteStatePtr state) { MapFieldInfoCollectionNormsWriterPerField byField(MapFieldInfoCollectionNormsWriterPerField::newInstance()); // Typically, each thread will have encountered the same field. So first we collate by field, ie all // per-thread field instances that correspond to the same FieldInfo for (MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end();) { NormsWriterPerFieldPtr normsPerField(boost::static_pointer_cast(*perField)); if (normsPerField->upto > 0) { // It has some norms Collection l = byField.get(normsPerField->fieldInfo); if (!l) { l = Collection::newInstance(); byField.put(normsPerField->fieldInfo, l); } l.add(normsPerField); ++perField; } else { // Remove this field since we haven't seen it since the previous flush perField = entry->second.remove(perField); } } } String normsFileName(state->segmentName + L"." + IndexFileNames::NORMS_EXTENSION()); state->flushedFiles.add(normsFileName); IndexOutputPtr normsOut(state->directory->createOutput(normsFileName)); LuceneException finally; try { normsOut->writeBytes(SegmentMerger::NORMS_HEADER, 0, SegmentMerger::NORMS_HEADER_LENGTH); int32_t numField = fieldInfos->size(); int32_t normCount = 0; for (int32_t fieldNumber = 0; fieldNumber < numField; ++fieldNumber) { FieldInfoPtr fieldInfo(fieldInfos->fieldInfo(fieldNumber)); Collection toMerge = byField.get(fieldInfo); int32_t upto = 0; if (toMerge) { int32_t numFields = toMerge.size(); ++normCount; Collection fields(Collection::newInstance(numFields)); Collection uptos(Collection::newInstance(numFields)); for (int32_t j = 0; j < numFields; ++j) fields[j] = toMerge[j]; int32_t numLeft = numFields; while (numLeft > 0) { BOOST_ASSERT(uptos[0] < fields[0]->docIDs.size()); int32_t minLoc = 0; int32_t minDocID = fields[0]->docIDs[uptos[0]]; for (int32_t j = 1; j < numLeft; ++j) { int32_t docID = fields[j]->docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } BOOST_ASSERT(minDocID < state->numDocs); // Fill hole for (;upto < minDocID; ++upto) normsOut->writeByte(getDefaultNorm()); normsOut->writeByte(fields[minLoc]->norms[uptos[minLoc]]); ++(uptos[minLoc]); ++upto; if (uptos[minLoc] == fields[minLoc]->upto) { fields[minLoc]->reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } --numLeft; } } // Fill final hole with defaultNorm for (;upto < state->numDocs; ++upto) normsOut->writeByte(getDefaultNorm()); } else if (fieldInfo->isIndexed && !fieldInfo->omitNorms) { ++normCount; // Fill entire field with default norm for (;upto < state->numDocs; ++upto) normsOut->writeByte(getDefaultNorm()); } BOOST_ASSERT(4 + normCount * state->numDocs == normsOut->getFilePointer()); // .nrm file size mismatch? } } catch (LuceneException& e) { finally = e; } normsOut->close(); finally.throwException(); } void NormsWriter::closeDocStore(SegmentWriteStatePtr state) { } } LucenePlusPlus-rel_3.0.4/src/core/index/NormsWriterPerField.cpp000066400000000000000000000043501217574114600245550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NormsWriterPerField.h" #include "NormsWriterPerThread.h" #include "Similarity.h" #include "DocInverterPerField.h" #include "DocumentsWriter.h" #include "FieldInfo.h" #include "MiscUtils.h" namespace Lucene { NormsWriterPerField::NormsWriterPerField(DocInverterPerFieldPtr docInverterPerField, NormsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo) { docIDs = Collection::newInstance(1); norms = ByteArray::newInstance(1); upto = 0; this->_perThread = perThread; this->fieldInfo = fieldInfo; docState = perThread->docState; fieldState = docInverterPerField->fieldState; } NormsWriterPerField::~NormsWriterPerField() { } void NormsWriterPerField::reset() { // Shrink back if we are over allocated now docIDs.resize(MiscUtils::getShrinkSize(docIDs.size(), upto)); norms.resize(MiscUtils::getShrinkSize(norms.size(), upto)); upto = 0; } void NormsWriterPerField::abort() { upto = 0; } int32_t NormsWriterPerField::compareTo(LuceneObjectPtr other) { return fieldInfo->name.compare(boost::static_pointer_cast(other)->fieldInfo->name); } void NormsWriterPerField::finish() { BOOST_ASSERT(docIDs.size() == norms.size()); if (fieldInfo->isIndexed && !fieldInfo->omitNorms) { if (docIDs.size() <= upto) { BOOST_ASSERT(docIDs.size() == upto); docIDs.resize(MiscUtils::getNextSize(1 + upto)); norms.resize(MiscUtils::getNextSize(1 + upto)); } double norm = docState->similarity->computeNorm(fieldInfo->name, fieldState); norms[upto] = Similarity::encodeNorm(norm); docIDs[upto] = docState->docID; ++upto; } } } LucenePlusPlus-rel_3.0.4/src/core/index/NormsWriterPerThread.cpp000066400000000000000000000024201217574114600247350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NormsWriterPerThread.h" #include "NormsWriterPerField.h" #include "DocInverterPerThread.h" namespace Lucene { NormsWriterPerThread::NormsWriterPerThread(DocInverterPerThreadPtr docInverterPerThread, NormsWriterPtr normsWriter) { this->_normsWriter = normsWriter; docState = docInverterPerThread->docState; } NormsWriterPerThread::~NormsWriterPerThread() { } InvertedDocEndConsumerPerFieldPtr NormsWriterPerThread::addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo) { return newLucene(docInverterPerField, shared_from_this(), fieldInfo); } void NormsWriterPerThread::abort() { } void NormsWriterPerThread::startDocument() { } void NormsWriterPerThread::finishDocument() { } bool NormsWriterPerThread::freeRAM() { return false; } } LucenePlusPlus-rel_3.0.4/src/core/index/ParallelReader.cpp000066400000000000000000000471071217574114600235350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ParallelReader.h" #include "_ParallelReader.h" #include "Document.h" #include "FieldSelector.h" #include "Term.h" #include "FieldCache.h" #include "StringUtils.h" namespace Lucene { ParallelReader::ParallelReader(bool closeSubReaders) { this->readers = Collection::newInstance(); this->decrefOnClose = Collection::newInstance(); this->fieldToReader = MapStringIndexReader::newInstance(); this->readerToFields = MapIndexReaderSetString::newInstance(); this->storedFieldReaders = Collection::newInstance(); this->_maxDoc = 0; this->_numDocs = 0; this->_hasDeletions = false; this->incRefReaders = !closeSubReaders; } ParallelReader::~ParallelReader() { } void ParallelReader::add(IndexReaderPtr reader) { ensureOpen(); add(reader, false); } void ParallelReader::add(IndexReaderPtr reader, bool ignoreStoredFields) { ensureOpen(); if (readers.empty()) { this->_maxDoc = reader->maxDoc(); this->_numDocs = reader->numDocs(); this->_hasDeletions = reader->hasDeletions(); } if (reader->maxDoc() != _maxDoc) // check compatibility { boost::throw_exception(IllegalArgumentException(L"All readers must have same maxDoc: " + StringUtils::toString(_maxDoc) + L" != " + StringUtils::toString(reader->maxDoc()))); } if (reader->numDocs() != _numDocs) { boost::throw_exception(IllegalArgumentException(L"All readers must have same numDocs: " + StringUtils::toString(_numDocs) + L" != " + StringUtils::toString(reader->numDocs()))); } HashSet fields(reader->getFieldNames(IndexReader::FIELD_OPTION_ALL)); readerToFields.put(reader, fields); for (HashSet::iterator field = fields.begin(); field != fields.end(); ++field) // update fieldToReader map { if (!fieldToReader.contains(*field)) fieldToReader.put(*field, reader); } if (!ignoreStoredFields) storedFieldReaders.add(reader); // add to storedFieldReaders readers.add(reader); if (incRefReaders) reader->incRef(); decrefOnClose.add(incRefReaders); } LuceneObjectPtr ParallelReader::clone(LuceneObjectPtr other) { SyncLock syncLock(this); try { return doReopen(true); } catch (LuceneException& e) { boost::throw_exception(RuntimeException(e.getError())); } return LuceneObjectPtr(); } IndexReaderPtr ParallelReader::reopen() { SyncLock syncLock(this); return doReopen(false); } IndexReaderPtr ParallelReader::doReopen(bool doClone) { ensureOpen(); bool reopened = false; Collection newReaders(Collection::newInstance()); bool success = false; LuceneException finally; try { for (Collection::iterator oldReader = readers.begin(); oldReader != readers.end(); ++oldReader) { IndexReaderPtr newReader; if (doClone) newReader = boost::dynamic_pointer_cast((*oldReader)->clone()); else newReader = (*oldReader)->reopen(); newReaders.add(newReader); // if at least one of the subreaders was updated we remember that and return a new ParallelReader if (newReader != *oldReader) reopened = true; } success = true; } catch (LuceneException& e) { finally = e; } if (!success && reopened) { for (int32_t i = 0; i < newReaders.size(); ++i) { if (newReaders[i] != readers[i]) { try { if (newReaders[i]) newReaders[i]->close(); } catch (...) { // keep going - we want to clean up as much as possible } } } } finally.throwException(); if (reopened) { Collection newDecrefOnClose(Collection::newInstance()); ParallelReaderPtr pr(newLucene()); for (int32_t i = 0; i < readers.size(); ++i) { IndexReaderPtr oldReader(readers[i]); IndexReaderPtr newReader(newReaders[i]); if (newReader == oldReader) { newDecrefOnClose.add(true); newReader->incRef(); } else { // this is a new subreader instance, so on close() we don't decRef but close it newDecrefOnClose.add(false); } pr->add(newReader, !storedFieldReaders.contains(oldReader)); } pr->decrefOnClose = newDecrefOnClose; pr->incRefReaders = incRefReaders; return pr; } else { // No subreader was refreshed return shared_from_this(); } } int32_t ParallelReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) return _numDocs; } int32_t ParallelReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return _maxDoc; } bool ParallelReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return _hasDeletions; } bool ParallelReader::isDeleted(int32_t n) { // Don't call ensureOpen() here (it could affect performance) return !readers.empty() ? readers[0]->isDeleted(n) : false; // check first reader } void ParallelReader::doDelete(int32_t docNum) { // delete in all readers for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) (*reader)->deleteDocument(docNum); _hasDeletions = true; } void ParallelReader::doUndeleteAll() { // undeleteAll in all readers for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) (*reader)->undeleteAll(); _hasDeletions = false; } DocumentPtr ParallelReader::document(int32_t n, FieldSelectorPtr fieldSelector) { ensureOpen(); DocumentPtr result(newLucene()); // append fields from storedFieldReaders for (Collection::iterator reader = storedFieldReaders.begin(); reader != storedFieldReaders.end(); ++reader) { bool include = !fieldSelector; if (!include) { HashSet fields = readerToFields.get(*reader); for (HashSet::iterator field = fields.begin(); field != fields.end(); ++field) { if (fieldSelector->accept(*field) != FieldSelector::SELECTOR_NO_LOAD) { include = true; break; } } } if (include) { Collection fields((*reader)->document(n, fieldSelector)->getFields()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) result->add(*field); } } return result; } Collection ParallelReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); Collection results(Collection::newInstance()); // get all vectors for (MapStringIndexReader::iterator entry = fieldToReader.begin(); entry != fieldToReader.end(); ++entry) { TermFreqVectorPtr vector(entry->second->getTermFreqVector(docNumber, entry->first)); if (vector) results.add(vector); } return results; } TermFreqVectorPtr ParallelReader::getTermFreqVector(int32_t docNumber, const String& field) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); return reader == fieldToReader.end() ? TermFreqVectorPtr() : reader->second->getTermFreqVector(docNumber, field); } void ParallelReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); if (reader != fieldToReader.end()) reader->second->getTermFreqVector(docNumber, field, mapper); } void ParallelReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) { ensureOpen(); for (MapStringIndexReader::iterator entry = fieldToReader.begin(); entry != fieldToReader.end(); ++entry) entry->second->getTermFreqVector(docNumber, entry->first, mapper); } bool ParallelReader::hasNorms(const String& field) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); return reader == fieldToReader.end() ? false : reader->second->hasNorms(field); } ByteArray ParallelReader::norms(const String& field) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); return reader == fieldToReader.end() ? ByteArray() : reader->second->norms(field); } void ParallelReader::norms(const String& field, ByteArray norms, int32_t offset) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); if (reader != fieldToReader.end()) reader->second->norms(field, norms, offset); } void ParallelReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); if (reader != fieldToReader.end()) reader->second->doSetNorm(doc, field, value); } TermEnumPtr ParallelReader::terms() { ensureOpen(); return newLucene(shared_from_this()); } TermEnumPtr ParallelReader::terms(TermPtr t) { ensureOpen(); return newLucene(shared_from_this(), t); } int32_t ParallelReader::docFreq(TermPtr t) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(t->field()); return reader == fieldToReader.end() ? 0 : reader->second->docFreq(t); } TermDocsPtr ParallelReader::termDocs(TermPtr term) { ensureOpen(); return newLucene(shared_from_this(), term); } TermDocsPtr ParallelReader::termDocs() { ensureOpen(); return newLucene(shared_from_this()); } TermPositionsPtr ParallelReader::termPositions(TermPtr term) { ensureOpen(); return newLucene(shared_from_this(), term); } TermPositionsPtr ParallelReader::termPositions() { ensureOpen(); return newLucene(shared_from_this()); } bool ParallelReader::isCurrent() { for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { if (!(*reader)->isCurrent()) return false; } // all subreaders are up to date return true; } bool ParallelReader::isOptimized() { for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { if (!(*reader)->isOptimized()) return false; } // all subindexes are optimized return true; } int64_t ParallelReader::getVersion() { boost::throw_exception(UnsupportedOperationException(L"ParallelReader does not support this method.")); return 0; } Collection ParallelReader::getSubReaders() { return readers; } void ParallelReader::doCommit(MapStringString commitUserData) { for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) (*reader)->commit(commitUserData); } void ParallelReader::doClose() { SyncLock syncLock(this); for (int32_t i = 0; i < readers.size(); ++i) { if (decrefOnClose[i]) readers[i]->decRef(); else readers[i]->close(); } FieldCache::DEFAULT()->purge(shared_from_this()); } HashSet ParallelReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); HashSet fieldSet(HashSet::newInstance()); for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { HashSet names((*reader)->getFieldNames(fieldOption)); fieldSet.addAll(names.begin(), names.end()); } return fieldSet; } ParallelTermEnum::ParallelTermEnum(ParallelReaderPtr reader) { this->setIterator = false; this->_reader = reader; MapStringIndexReader::iterator indexReader = reader->fieldToReader.begin(); if (indexReader != reader->fieldToReader.end()) this->field = indexReader->first; if (!field.empty()) this->termEnum = reader->fieldToReader[field]->terms(); } ParallelTermEnum::ParallelTermEnum(ParallelReaderPtr reader, TermPtr term) { this->setIterator = false; this->_reader = reader; this->field = term->field(); MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(field); if (indexReader != reader->fieldToReader.end()) this->termEnum = indexReader->second->terms(term); } ParallelTermEnum::~ParallelTermEnum() { } bool ParallelTermEnum::next() { if (!termEnum) return false; // another term in this field? if (termEnum->next() && termEnum->term()->field() == field) return true; // yes, keep going termEnum->close(); // close old termEnum ParallelReaderPtr reader(_reader); // find the next field with terms, if any if (!setIterator) { fieldIterator = reader->fieldToReader.find(field); ++fieldIterator; // Skip field to get next one setIterator = false; } while (fieldIterator != reader->fieldToReader.end()) { field = fieldIterator->first; termEnum = fieldIterator->second->terms(newLucene(field)); ++fieldIterator; TermPtr term(termEnum->term()); if (term && term->field() == field) return true; else termEnum->close(); } return false; // no more fields } TermPtr ParallelTermEnum::term() { return termEnum ? termEnum->term() : TermPtr(); } int32_t ParallelTermEnum::docFreq() { return termEnum ? termEnum->docFreq() : 0; } void ParallelTermEnum::close() { if (termEnum) termEnum->close(); } ParallelTermDocs::ParallelTermDocs(ParallelReaderPtr reader) { this->_reader = reader; } ParallelTermDocs::ParallelTermDocs(ParallelReaderPtr reader, TermPtr term) { this->_reader = reader; if (!term) termDocs = reader->readers.empty() ? TermDocsPtr() : reader->readers[0]->termDocs(TermPtr()); else seek(term); } ParallelTermDocs::~ParallelTermDocs() { } int32_t ParallelTermDocs::doc() { return termDocs->doc(); } int32_t ParallelTermDocs::freq() { return termDocs->freq(); } void ParallelTermDocs::seek(TermPtr term) { ParallelReaderPtr reader(_reader); MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(term->field()); termDocs = indexReader != reader->fieldToReader.end() ? indexReader->second->termDocs(term) : TermDocsPtr(); } void ParallelTermDocs::seek(TermEnumPtr termEnum) { seek(termEnum->term()); } bool ParallelTermDocs::next() { return termDocs ? termDocs->next() : false; } int32_t ParallelTermDocs::read(Collection docs, Collection freqs) { return termDocs ? termDocs->read(docs, freqs) : 0; } bool ParallelTermDocs::skipTo(int32_t target) { return termDocs ? termDocs->skipTo(target) : false; } void ParallelTermDocs::close() { if (termDocs) termDocs->close(); } ParallelTermPositions::ParallelTermPositions(ParallelReaderPtr reader) : ParallelTermDocs(reader) { } ParallelTermPositions::ParallelTermPositions(ParallelReaderPtr reader, TermPtr term) : ParallelTermDocs(reader) { seek(term); } ParallelTermPositions::~ParallelTermPositions() { } void ParallelTermPositions::seek(TermPtr term) { ParallelReaderPtr reader(_reader); MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(term->field()); termDocs = indexReader != reader->fieldToReader.end() ? indexReader->second->termPositions(term) : TermDocsPtr(); } int32_t ParallelTermPositions::nextPosition() { // It is an error to call this if there is no next position, eg. if termDocs==null return boost::static_pointer_cast(termDocs)->nextPosition(); } int32_t ParallelTermPositions::getPayloadLength() { // It is an error to call this if there is no next position, eg. if termDocs==null return boost::static_pointer_cast(termDocs)->getPayloadLength(); } ByteArray ParallelTermPositions::getPayload(ByteArray data, int32_t offset) { // It is an error to call this if there is no next position, eg. if termDocs==null return boost::static_pointer_cast(termDocs)->getPayload(data, offset); } bool ParallelTermPositions::isPayloadAvailable() { // It is an error to call this if there is no next position, eg. if termDocs==null return boost::static_pointer_cast(termDocs)->isPayloadAvailable(); } } LucenePlusPlus-rel_3.0.4/src/core/index/Payload.cpp000066400000000000000000000073561217574114600222510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Payload.h" #include "MiscUtils.h" namespace Lucene { Payload::Payload() { this->offset = 0; this->_length = 0; } Payload::Payload(ByteArray data) { this->data = data; this->offset = 0; this->_length = data.size(); } Payload::Payload(ByteArray data, int32_t offset, int32_t length) { if (offset < 0 || offset + length > data.size()) boost::throw_exception(IllegalArgumentException()); this->data = data; this->offset = offset; this->_length = length; } Payload::~Payload() { } void Payload::setData(ByteArray data) { setData(data, 0, data.size()); } void Payload::setData(ByteArray data, int32_t offset, int32_t length) { this->data = data; this->offset = offset; this->_length = length; } ByteArray Payload::getData() { return this->data; } int32_t Payload::getOffset() { return this->offset; } int32_t Payload::length() { return this->_length; } uint8_t Payload::byteAt(int32_t index) { if (0 <= index && index < this->_length) return this->data[this->offset + index]; boost::throw_exception(IndexOutOfBoundsException()); return 0; } ByteArray Payload::toByteArray() { ByteArray retArray(ByteArray::newInstance(this->_length)); MiscUtils::arrayCopy(this->data.get(), this->offset, retArray.get(), 0, this->_length); return retArray; } void Payload::copyTo(ByteArray target, int32_t targetOffset) { if (this->_length > target.size() + targetOffset) boost::throw_exception(IndexOutOfBoundsException()); MiscUtils::arrayCopy(this->data.get(), this->offset, target.get(), targetOffset, this->_length); } LuceneObjectPtr Payload::clone(LuceneObjectPtr other) { // Start with a shallow copy of data LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); PayloadPtr clonePayload(boost::dynamic_pointer_cast(clone)); clonePayload->offset = offset; clonePayload->_length = _length; // Only copy the part of data that belongs to this Payload if (offset == 0 && _length == data.size()) { // It is the whole thing, so just clone it. clonePayload->data = ByteArray::newInstance(data.size()); MiscUtils::arrayCopy(data.get(), 0, clonePayload->data.get(), 0, data.size()); } else { // Just get the part clonePayload->data = toByteArray(); clonePayload->offset = 0; } return clonePayload; } bool Payload::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; PayloadPtr otherPayload(boost::dynamic_pointer_cast(other)); if (otherPayload) { if (_length == otherPayload->_length) return (std::memcmp(data.get(), otherPayload->data.get(), _length) == 0); else return false; } return false; } int32_t Payload::hashCode() { return MiscUtils::hashCode(data.get(), offset, offset + _length); } } LucenePlusPlus-rel_3.0.4/src/core/index/PositionBasedTermVectorMapper.cpp000066400000000000000000000060051217574114600265710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PositionBasedTermVectorMapper.h" namespace Lucene { PositionBasedTermVectorMapper::PositionBasedTermVectorMapper(bool ignoringOffsets) : TermVectorMapper(false, ignoringOffsets) { storeOffsets = false; } PositionBasedTermVectorMapper::~PositionBasedTermVectorMapper() { } bool PositionBasedTermVectorMapper::isIgnoringPositions() { return false; } void PositionBasedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { for (int32_t i = 0; i < positions.size(); ++i) { TermVectorsPositionInfoPtr pos(currentPositions.get(positions[i])); if (!pos) { pos = newLucene(positions[i], storeOffsets); currentPositions.put(positions[i], pos); } pos->addTerm(term, offsets ? offsets[i] : TermVectorOffsetInfoPtr()); } } void PositionBasedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { if (storePositions == false) boost::throw_exception(RuntimeException(L"You must store positions in order to use this Mapper")); if (storeOffsets == true) { // ignoring offsets } this->fieldToTerms = MapStringMapIntTermVectorsPositionInfo::newInstance(); this->storeOffsets = storeOffsets; currentField = field; this->currentPositions = MapIntTermVectorsPositionInfo::newInstance(); fieldToTerms.put(currentField, currentPositions); } MapStringMapIntTermVectorsPositionInfo PositionBasedTermVectorMapper::getFieldToTerms() { return fieldToTerms; } TermVectorsPositionInfo::TermVectorsPositionInfo(int32_t position, bool storeOffsets) { this->position = position; this->terms = Collection::newInstance(); if (storeOffsets) offsets = Collection::newInstance(); } TermVectorsPositionInfo::~TermVectorsPositionInfo() { } void TermVectorsPositionInfo::addTerm(const String& term, TermVectorOffsetInfoPtr info) { terms.add(term); if (offsets) offsets.add(info); } int32_t TermVectorsPositionInfo::getPosition() { return position; } Collection TermVectorsPositionInfo::getTerms() { return terms; } Collection TermVectorsPositionInfo::getOffsets() { return offsets; } } LucenePlusPlus-rel_3.0.4/src/core/index/RawPostingList.cpp000066400000000000000000000013431217574114600235770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RawPostingList.h" #include "DocumentsWriter.h" namespace Lucene { const int32_t RawPostingList::BYTES_SIZE = DocumentsWriter::OBJECT_HEADER_BYTES + 3 * DocumentsWriter::INT_NUM_BYTE; RawPostingList::RawPostingList() { textStart = 0; intStart = 0; byteStart = 0; } RawPostingList::~RawPostingList() { } } LucenePlusPlus-rel_3.0.4/src/core/index/ReadOnlyDirectoryReader.cpp000066400000000000000000000032741217574114600254000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReadOnlyDirectoryReader.h" #include "ReadOnlySegmentReader.h" namespace Lucene { ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(DirectoryPtr directory, SegmentInfosPtr sis, IndexDeletionPolicyPtr deletionPolicy, int32_t termInfosIndexDivisor) : DirectoryReader(directory, sis, deletionPolicy, true, termInfosIndexDivisor) { } ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(DirectoryPtr directory, SegmentInfosPtr infos, Collection oldReaders, Collection oldStarts, MapStringByteArray oldNormsCache, bool doClone, int32_t termInfosIndexDivisor) : DirectoryReader(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor) { } ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(IndexWriterPtr writer, SegmentInfosPtr infos, int32_t termInfosIndexDivisor) : DirectoryReader(writer, infos, termInfosIndexDivisor) { } ReadOnlyDirectoryReader::~ReadOnlyDirectoryReader() { } void ReadOnlyDirectoryReader::acquireWriteLock() { ReadOnlySegmentReader::noWrite(); } } LucenePlusPlus-rel_3.0.4/src/core/index/ReadOnlySegmentReader.cpp000066400000000000000000000016451217574114600250360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReadOnlySegmentReader.h" #include "BitVector.h" namespace Lucene { ReadOnlySegmentReader::~ReadOnlySegmentReader() { } void ReadOnlySegmentReader::noWrite() { boost::throw_exception(UnsupportedOperationException(L"This IndexReader cannot make any changes to the index (it was opened with readOnly = true)")); } void ReadOnlySegmentReader::acquireWriteLock() { noWrite(); } bool ReadOnlySegmentReader::isDeleted(int32_t n) { return (deletedDocs && deletedDocs->get(n)); } } LucenePlusPlus-rel_3.0.4/src/core/index/ReusableStringReader.cpp000066400000000000000000000026121217574114600247220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReusableStringReader.h" #include "MiscUtils.h" namespace Lucene { ReusableStringReader::ReusableStringReader() { upto = 0; left = 0; } ReusableStringReader::~ReusableStringReader() { } void ReusableStringReader::init(const String& s) { this->s = s; left = s.length(); this->upto = 0; } int32_t ReusableStringReader::read(wchar_t* buffer, int32_t offset, int32_t length) { if (left > length) { MiscUtils::arrayCopy(s.begin(), upto, buffer, offset, length); upto += length; left -= length; return length; } else if (left == 0) { s.clear(); return -1; } else { MiscUtils::arrayCopy(s.begin(), upto, buffer, offset, left); int32_t r = left; left = 0; upto = s.length(); return r; } } void ReusableStringReader::close() { } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentInfo.cpp000066400000000000000000000516041217574114600230710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SegmentInfo.h" #include "SegmentInfos.h" #include "Directory.h" #include "IndexInput.h" #include "IndexOutput.h" #include "IndexFileNames.h" #include "IndexFileNameFilter.h" #include "BitVector.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t SegmentInfo::NO = -1; // no norms; no deletes; const int32_t SegmentInfo::YES = 1; // have norms; have deletes; const int32_t SegmentInfo::CHECK_DIR = 0; // must check dir to see if there are norms/deletions const int32_t SegmentInfo::WITHOUT_GEN = 0; // a file name that has no GEN in it. SegmentInfo::SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir) { _sizeInBytes = -1; this->name = name; this->docCount = docCount; this->dir = dir; delGen = NO; this->isCompoundFile = CHECK_DIR; preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; delCount = 0; hasProx = true; } SegmentInfo::SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir, bool isCompoundFile, bool hasSingleNormFile) { _sizeInBytes = -1; this->name = name; this->docCount = docCount; this->dir = dir; delGen = NO; this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); this->hasSingleNormFile = hasSingleNormFile; preLockless = false; docStoreOffset = -1; docStoreIsCompoundFile = false; delCount = 0; hasProx = true; } SegmentInfo::SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir, bool isCompoundFile, bool hasSingleNormFile, int32_t docStoreOffset, const String& docStoreSegment, bool docStoreIsCompoundFile, bool hasProx) { _sizeInBytes = -1; this->name = name; this->docCount = docCount; this->dir = dir; delGen = NO; this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); this->hasSingleNormFile = hasSingleNormFile; preLockless = false; this->docStoreOffset = docStoreOffset; this->docStoreSegment = docStoreSegment; this->docStoreIsCompoundFile = docStoreIsCompoundFile; delCount = 0; this->hasProx = hasProx; } SegmentInfo::SegmentInfo(DirectoryPtr dir, int32_t format, IndexInputPtr input) { _sizeInBytes = -1; this->dir = dir; name = input->readString(); docCount = input->readInt(); if (format <= SegmentInfos::FORMAT_LOCKLESS) { delGen = input->readLong(); if (format <= SegmentInfos::FORMAT_SHARED_DOC_STORE) { docStoreOffset = input->readInt(); if (docStoreOffset != -1) { docStoreSegment = input->readString(); docStoreIsCompoundFile = (input->readByte() == 1); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos::FORMAT_SINGLE_NORM_FILE) hasSingleNormFile = (input->readByte() == 1); else hasSingleNormFile = false; int32_t numNormGen = input->readInt(); if (numNormGen != NO) { normGen = Collection::newInstance(numNormGen); for (int32_t j = 0; j < numNormGen; ++j) normGen[j] = input->readLong(); } isCompoundFile = input->readByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos::FORMAT_DEL_COUNT) { delCount = input->readInt(); BOOST_ASSERT(delCount <= docCount); } else delCount = -1; if (format <= SegmentInfos::FORMAT_HAS_PROX) hasProx = (input->readByte() == 1); else hasProx = true; if (format <= SegmentInfos::FORMAT_DIAGNOSTICS) diagnostics = input->readStringStringMap(); else diagnostics = MapStringString::newInstance(); } else { delGen = CHECK_DIR; isCompoundFile = CHECK_DIR; preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreIsCompoundFile = false; delCount = -1; hasProx = true; diagnostics = MapStringString::newInstance(); } } SegmentInfo::~SegmentInfo() { } void SegmentInfo::reset(SegmentInfoPtr src) { clearFiles(); name = src->name; docCount = src->docCount; dir = src->dir; preLockless = src->preLockless; delGen = src->delGen; docStoreOffset = src->docStoreOffset; docStoreIsCompoundFile = src->docStoreIsCompoundFile; if (!src->normGen) normGen = src->normGen; else normGen = Collection::newInstance(src->normGen.begin(), src->normGen.end()); isCompoundFile = src->isCompoundFile; hasSingleNormFile = src->hasSingleNormFile; delCount = src->delCount; } void SegmentInfo::setDiagnostics(MapStringString diagnostics) { this->diagnostics = diagnostics; } MapStringString SegmentInfo::getDiagnostics() { return diagnostics; } void SegmentInfo::setNumFields(int32_t numFields) { if (!normGen) { // normGen is null if we loaded a pre-2.1 segment file, or, if this segments file hasn't had any // norms set against it yet normGen = Collection::newInstance(numFields); if (!preLockless) { // Do nothing: thus leaving normGen[k] == CHECK_DIR (==0), so that later we know } // we have to check filesystem for norm files, because this is prelockless. else { // This is a FORMAT_LOCKLESS segment, which means there are no separate norms for (int32_t i = 0; i < numFields; ++i) normGen[i] = NO; } } } int64_t SegmentInfo::sizeInBytes() { if (_sizeInBytes == -1) { HashSet _files(files()); _sizeInBytes = 0; for (HashSet::iterator fileName = _files.begin(); fileName != _files.end(); ++fileName) { // we don't count bytes used by a shared doc store against this segment if (docStoreOffset == -1 || !IndexFileNames::isDocStoreFile(*fileName)) _sizeInBytes += dir->fileLength(*fileName); } } return _sizeInBytes; } bool SegmentInfo::hasDeletions() { if (delGen == NO) return false; else if (delGen >= YES) return true; else return dir->fileExists(getDelFileName()); } void SegmentInfo::advanceDelGen() { // delGen 0 is reserved for pre-LOCKLESS format if (delGen == NO) delGen = YES; else delGen++; clearFiles(); } void SegmentInfo::clearDelGen() { delGen = NO; clearFiles(); } LuceneObjectPtr SegmentInfo::clone(LuceneObjectPtr other) { SegmentInfoPtr si(newLucene(name, docCount, dir)); si->isCompoundFile = isCompoundFile; si->delGen = delGen; si->delCount = delCount; si->hasProx = hasProx; si->preLockless = preLockless; si->hasSingleNormFile = hasSingleNormFile; si->diagnostics = MapStringString::newInstance(); si->diagnostics.putAll(diagnostics.begin(), diagnostics.end()); if (normGen) si->normGen = Collection::newInstance(normGen.begin(), normGen.end()); si->docStoreOffset = docStoreOffset; si->docStoreSegment = docStoreSegment; si->docStoreIsCompoundFile = docStoreIsCompoundFile; return si; } String SegmentInfo::getDelFileName() { if (delGen == NO) { // in this case we know there is no deletion filename against this segment return L""; } else { // if delgen is check_dir, it's the pre-lockless-commit file format return IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::DELETES_EXTENSION(), delGen); } } bool SegmentInfo::hasSeparateNorms(int32_t fieldNumber) { if ((!normGen && preLockless) || (normGen && normGen[fieldNumber] == CHECK_DIR)) { // must fallback to directory file exists check return dir->fileExists(name + L".s" + StringUtils::toString(fieldNumber)); } else if (!normGen || normGen[fieldNumber] == NO) return false; else return true; } bool SegmentInfo::hasSeparateNorms() { if (!normGen) { if (!preLockless) { // this means we were created with lockless code and no norms are written yet return false; } else { HashSet result(dir->listAll()); if (!result) boost::throw_exception(IOException(L"Cannot read directory " + dir->toString() + L": listAll() returned null")); String pattern(name + L".s"); int32_t patternLength = pattern.length(); for (HashSet::iterator fileName = result.begin(); fileName != result.end(); ++fileName) { if (IndexFileNameFilter::accept(L"", *fileName) && boost::starts_with(*fileName, pattern) && UnicodeUtil::isDigit((*fileName)[patternLength])) return true; } return false; } } else { // This means this segment was saved with LOCKLESS code so we first check whether any normGen's are >= 1 // (meaning they definitely have separate norms) for (Collection::iterator gen = normGen.begin(); gen != normGen.end(); ++gen) { if (*gen >= YES) return true; } // Next we look for any == 0. These cases were pre-LOCKLESS and must be checked in directory for (int32_t gen = 0; gen < normGen.size(); ++gen) { if (normGen[gen] == CHECK_DIR && hasSeparateNorms(gen)) return true; } } return false; } void SegmentInfo::advanceNormGen(int32_t fieldIndex) { if (normGen[fieldIndex] == NO) normGen[fieldIndex] = YES; else normGen[fieldIndex]++; clearFiles(); } String SegmentInfo::getNormFileName(int32_t number) { String prefix; int64_t gen = !normGen ? CHECK_DIR : normGen[number]; if (hasSeparateNorms(number)) { // case 1: separate norm prefix = L".s"; return IndexFileNames::fileNameFromGeneration(name, prefix + StringUtils::toString(number), gen); } if (hasSingleNormFile) { // case 2: lockless (or nrm file exists) - single file for all norms prefix = String(L".") + IndexFileNames::NORMS_EXTENSION(); return IndexFileNames::fileNameFromGeneration(name, prefix, WITHOUT_GEN); } // case 3: norm file for each field prefix = L".f"; return IndexFileNames::fileNameFromGeneration(name, prefix + StringUtils::toString(number), WITHOUT_GEN); } void SegmentInfo::setUseCompoundFile(bool isCompoundFile) { this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); clearFiles(); } bool SegmentInfo::getUseCompoundFile() { if (isCompoundFile == (uint8_t)NO) return false; else if (isCompoundFile == (uint8_t)YES) return true; else return dir->fileExists(name + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); } int32_t SegmentInfo::getDelCount() { if (delCount == -1) delCount = hasDeletions() ? BitVector(dir, getDelFileName()).count() : 0; BOOST_ASSERT(delCount <= docCount); return delCount; } void SegmentInfo::setDelCount(int32_t delCount) { this->delCount = delCount; BOOST_ASSERT(delCount <= docCount); } int32_t SegmentInfo::getDocStoreOffset() { return docStoreOffset; } bool SegmentInfo::getDocStoreIsCompoundFile() { return docStoreIsCompoundFile; } void SegmentInfo::setDocStoreIsCompoundFile(bool v) { docStoreIsCompoundFile = v; clearFiles(); } String SegmentInfo::getDocStoreSegment() { return docStoreSegment; } void SegmentInfo::setDocStoreOffset(int32_t offset) { docStoreOffset = offset; clearFiles(); } void SegmentInfo::setDocStore(int32_t offset, const String& segment, bool isCompoundFile) { docStoreOffset = offset; docStoreSegment = segment; docStoreIsCompoundFile = isCompoundFile; } void SegmentInfo::write(IndexOutputPtr output) { output->writeString(name); output->writeInt(docCount); output->writeLong(delGen); output->writeInt(docStoreOffset); if (docStoreOffset != -1) { output->writeString(docStoreSegment); output->writeByte((uint8_t)(docStoreIsCompoundFile ? 1 : 0)); } output->writeByte((uint8_t)(hasSingleNormFile ? 1 : 0)); if (!normGen) output->writeInt(NO); else { output->writeInt(normGen.size()); for (Collection::iterator gen = normGen.begin(); gen != normGen.end(); ++gen) output->writeLong(*gen); } output->writeByte(isCompoundFile); output->writeInt(delCount); output->writeByte((uint8_t)(hasProx ? 1 : 0)); output->writeStringStringMap(diagnostics); } void SegmentInfo::setHasProx(bool hasProx) { this->hasProx = hasProx; clearFiles(); } bool SegmentInfo::getHasProx() { return hasProx; } void SegmentInfo::addIfExists(HashSet files, const String& fileName) { if (dir->fileExists(fileName)) files.add(fileName); } HashSet SegmentInfo::files() { if (_files) { // already cached return _files; } _files = HashSet::newInstance(); bool useCompoundFile = getUseCompoundFile(); if (useCompoundFile) _files.add(name + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); else { for (HashSet::iterator ext = IndexFileNames::NON_STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::NON_STORE_INDEX_EXTENSIONS().end(); ++ext) addIfExists(_files, name + L"." + *ext); } if (docStoreOffset != -1) { // we are sharing doc stores (stored fields, term vectors) with other segments BOOST_ASSERT(!docStoreSegment.empty()); if (docStoreIsCompoundFile) _files.add(docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); else { for (HashSet::iterator ext = IndexFileNames::STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::STORE_INDEX_EXTENSIONS().end(); ++ext) addIfExists(_files, docStoreSegment + L"." + *ext); } } else if (!useCompoundFile) { // we are not sharing, and, these files were not included in the compound file for (HashSet::iterator ext = IndexFileNames::STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::STORE_INDEX_EXTENSIONS().end(); ++ext) addIfExists(_files, name + L"." + *ext); } String delFileName(IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::DELETES_EXTENSION(), delGen)); if (!delFileName.empty() && (delGen >= YES || dir->fileExists(delFileName))) _files.add(delFileName); // careful logic for norms files if (normGen) { for (int32_t gen = 0; gen < normGen.size(); ++gen) { if (normGen[gen] >= YES) { // definitely a separate norm file, with generation _files.add(IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::SEPARATE_NORMS_EXTENSION() + StringUtils::toString(gen), normGen[gen])); } else if (normGen[gen] == NO) { // no separate norms but maybe plain norms in the non compound file case if (!hasSingleNormFile && !useCompoundFile) { String fileName(name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION() + StringUtils::toString(gen)); if (dir->fileExists(fileName)) _files.add(fileName); } } else if (normGen[gen] == CHECK_DIR) { // pre-2.1: we have to check file existence String fileName; if (useCompoundFile) fileName = name + L"." + IndexFileNames::SEPARATE_NORMS_EXTENSION() + StringUtils::toString(gen); else if (!hasSingleNormFile) fileName = name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION() + StringUtils::toString(gen); if (!fileName.empty() && dir->fileExists(fileName)) _files.add(fileName); } } } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) { // pre-2.1: we have to scan the dir to find all matching _x.sn/_x.fn files for our segment String prefix; if (useCompoundFile) prefix = name + L"." + IndexFileNames::SEPARATE_NORMS_EXTENSION(); else prefix = name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION(); int32_t prefixLength = prefix.length(); HashSet allFiles(dir->listAll()); for (HashSet::iterator fileName = allFiles.begin(); fileName != allFiles.end(); ++fileName) { if (IndexFileNameFilter::accept(L"", *fileName) && (int32_t)fileName->length() > prefixLength && UnicodeUtil::isDigit((*fileName)[prefixLength]) && boost::starts_with(*fileName, prefix)) _files.add(*fileName); } } return _files; } void SegmentInfo::clearFiles() { _files.reset(); _sizeInBytes = -1; } String SegmentInfo::segString(DirectoryPtr dir) { String cfs; try { cfs = getUseCompoundFile() ? L"c" : L"C"; } catch (LuceneException&) { cfs = L"?"; } String docStore; if (docStoreOffset != -1) docStore = L"->" + docStoreSegment; return name + L":" + cfs + (this->dir == dir ? L"" : L"x") + StringUtils::toString(docCount) + docStore; } bool SegmentInfo::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; SegmentInfoPtr otherSegmentInfo(boost::dynamic_pointer_cast(other)); if (!otherSegmentInfo) return false; return (otherSegmentInfo->dir == dir && otherSegmentInfo->name == name); } int32_t SegmentInfo::hashCode() { return dir->hashCode() + StringUtils::hashCode(name); } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentInfoCollection.cpp000066400000000000000000000053741217574114600251100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentInfoCollection.h" #include "SegmentInfo.h" namespace Lucene { SegmentInfoCollection::SegmentInfoCollection() { segmentInfos = Collection::newInstance(); } SegmentInfoCollection::~SegmentInfoCollection() { } int32_t SegmentInfoCollection::size() { return segmentInfos.size(); } bool SegmentInfoCollection::empty() { return segmentInfos.empty(); } void SegmentInfoCollection::clear() { segmentInfos.clear(); } void SegmentInfoCollection::add(SegmentInfoPtr info) { segmentInfos.add(info); } void SegmentInfoCollection::add(int32_t pos, SegmentInfoPtr info) { segmentInfos.add(pos, info); } void SegmentInfoCollection::addAll(SegmentInfoCollectionPtr segmentInfos) { this->segmentInfos.addAll(segmentInfos->segmentInfos.begin(), segmentInfos->segmentInfos.end()); } bool SegmentInfoCollection::equals(SegmentInfoCollectionPtr other) { if (LuceneObject::equals(other)) return true; return segmentInfos.equals(other->segmentInfos, luceneEquals()); } int32_t SegmentInfoCollection::find(SegmentInfoPtr info) { Collection::iterator idx = segmentInfos.find_if(luceneEqualTo(info)); return idx == segmentInfos.end() ? -1 : std::distance(segmentInfos.begin(), idx); } bool SegmentInfoCollection::contains(SegmentInfoPtr info) { return segmentInfos.contains_if(luceneEqualTo(info)); } void SegmentInfoCollection::remove(int32_t pos) { segmentInfos.remove(segmentInfos.begin() + pos); } void SegmentInfoCollection::remove(int32_t start, int32_t end) { segmentInfos.remove(segmentInfos.begin() + start, segmentInfos.begin() + end); } LuceneObjectPtr SegmentInfoCollection::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); SegmentInfoCollectionPtr cloneInfos(boost::dynamic_pointer_cast(clone)); for (Collection::iterator info = segmentInfos.begin(); info != segmentInfos.end(); ++info) cloneInfos->segmentInfos.add(*info); return cloneInfos; } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentInfos.cpp000066400000000000000000000666061217574114600232640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SegmentInfos.h" #include "_SegmentInfos.h" #include "SegmentInfo.h" #include "IndexFileNames.h" #include "Directory.h" #include "ChecksumIndexInput.h" #include "ChecksumIndexOutput.h" #include "IndexCommit.h" #include "LuceneThread.h" #include "InfoStream.h" #include "TestPoint.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// The file format version, a negative number. Works since counter, the old 1st entry, is always >= 0 const int32_t SegmentInfos::FORMAT = -1; /// This format adds details used for lockless commits. It differs slightly from the previous format in that file names /// are never re-used (write once). Instead, each file is written to the next generation. For example, segments_1, /// segments_2, etc. This allows us to not use a commit lock. const int32_t SegmentInfos::FORMAT_LOCKLESS = -2; /// This format adds a "hasSingleNormFile" flag into each segment info. const int32_t SegmentInfos::FORMAT_SINGLE_NORM_FILE = -3; /// This format allows multiple segments to share a single vectors and stored fields file. const int32_t SegmentInfos::FORMAT_SHARED_DOC_STORE = -4; /// This format adds a checksum at the end of the file to ensure all bytes were successfully written. const int32_t SegmentInfos::FORMAT_CHECKSUM = -5; /// This format adds the deletion count for each segment. This way IndexWriter can efficiently report numDocs(). const int32_t SegmentInfos::FORMAT_DEL_COUNT = -6; /// This format adds the boolean hasProx to record if any fields in the segment store prox information (ie, have /// omitTermFreqAndPositions == false) const int32_t SegmentInfos::FORMAT_HAS_PROX = -7; /// This format adds optional commit userData storage. const int32_t SegmentInfos::FORMAT_USER_DATA = -8; /// This format adds optional per-segment string diagnostics storage, and switches userData to Map const int32_t SegmentInfos::FORMAT_DIAGNOSTICS = -9; /// This must always point to the most recent file format. const int32_t SegmentInfos::CURRENT_FORMAT = SegmentInfos::FORMAT_DIAGNOSTICS; /// Advanced configuration of retry logic in loading segments_N file. int32_t SegmentInfos::defaultGenFileRetryCount = 10; int32_t SegmentInfos::defaultGenFileRetryPauseMsec = 50; int32_t SegmentInfos::defaultGenLookaheadCount = 10; MapStringString SegmentInfos::singletonUserData; InfoStreamPtr SegmentInfos::infoStream; SegmentInfos::SegmentInfos() { userData = MapStringString::newInstance(); lastGeneration = 0; generation = 0; counter = 0; version = MiscUtils::currentTimeMillis(); } SegmentInfos::~SegmentInfos() { } SegmentInfoPtr SegmentInfos::info(int32_t i) { return segmentInfos[i]; } int64_t SegmentInfos::getCurrentSegmentGeneration(HashSet files) { if (!files) return -1; int64_t max = -1; for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { if (boost::starts_with(*file, IndexFileNames::SEGMENTS()) && *file != IndexFileNames::SEGMENTS_GEN()) max = std::max(generationFromSegmentsFileName(*file), max); } return max; } int64_t SegmentInfos::getCurrentSegmentGeneration(DirectoryPtr directory) { try { return getCurrentSegmentGeneration(directory->listAll()); } catch (LuceneException&) { return -1; } } String SegmentInfos::getCurrentSegmentFileName(HashSet files) { return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", getCurrentSegmentGeneration(files)); } String SegmentInfos::getCurrentSegmentFileName(DirectoryPtr directory) { return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", getCurrentSegmentGeneration(directory)); } String SegmentInfos::getCurrentSegmentFileName() { return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", lastGeneration); } int64_t SegmentInfos::generationFromSegmentsFileName(const String& fileName) { if (fileName == IndexFileNames::SEGMENTS()) return 0; else if (boost::starts_with(fileName, IndexFileNames::SEGMENTS())) return StringUtils::toLong(fileName.substr(wcslen(IndexFileNames::SEGMENTS().c_str()) + 1), StringUtils::CHARACTER_MAX_RADIX); else boost::throw_exception(IllegalArgumentException(L"FileName '" + fileName + L"' is not a segments file")); return 0; } String SegmentInfos::getNextSegmentFileName() { return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation == -1 ? 1 : generation + 1); } void SegmentInfos::read(DirectoryPtr directory, const String& segmentFileName) { bool success = false; // clear any previous segments segmentInfos.clear(); ChecksumIndexInputPtr input(newLucene(directory->openInput(segmentFileName))); generation = generationFromSegmentsFileName(segmentFileName); lastGeneration = generation; LuceneException finally; try { int32_t format = input->readInt(); if (format < 0) // file contains explicit format info { if (format < CURRENT_FORMAT) boost::throw_exception(CorruptIndexException(L"Unknown format version: " + StringUtils::toString(format))); version = input->readLong(); // read version counter = input->readInt(); // read counter } else counter = format; for (int32_t i = input->readInt(); i > 0; --i) // read segmentInfos segmentInfos.add(newLucene(directory, format, input)); // in old format the version number may be at the end of the file if (format >= 0) { if (input->getFilePointer() >= input->length()) version = MiscUtils::currentTimeMillis(); // old file format without version number else input->readLong(); // read version } if (format <= FORMAT_USER_DATA) { if (format <= FORMAT_DIAGNOSTICS) userData = input->readStringStringMap(); else if (input->readByte() != 0) { if (!singletonUserData) singletonUserData = MapStringString::newInstance(); singletonUserData[String(L"userData")] = input->readString(); userData = singletonUserData; } else userData.clear(); } else userData.clear(); if (format <= FORMAT_CHECKSUM) { int64_t checksumNow = input->getChecksum(); int64_t checksumThen = input->readLong(); if (checksumNow != checksumThen) boost::throw_exception(CorruptIndexException(L"Checksum mismatch in segments file")); } success = true; } catch (LuceneException& e) { finally = e; } input->close(); // clear any segment infos we had loaded so we have a clean slate on retry if (!success) segmentInfos.clear(); finally.throwException(); } void SegmentInfos::read(DirectoryPtr directory) { lastGeneration = -1; generation = lastGeneration; newLucene(shared_from_this(), directory)->run(); } void SegmentInfos::write(DirectoryPtr directory) { String segmentFileName(getNextSegmentFileName()); // always advance the generation on write if (generation == -1) generation = 1; else ++generation; ChecksumIndexOutputPtr segnOutput(newLucene(directory->createOutput(segmentFileName))); bool success = false; LuceneException finally; try { segnOutput->writeInt(CURRENT_FORMAT); // write FORMAT segnOutput->writeLong(++version); // every write changes the index segnOutput->writeInt(counter); // write counter segnOutput->writeInt(segmentInfos.size()); // write infos for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) (*seginfo)->write(segnOutput); segnOutput->writeStringStringMap(userData); segnOutput->prepareCommit(); success = true; pendingSegnOutput = segnOutput; } catch (LuceneException& e) { finally = e; } if (!success) { // We hit an exception above; try to close the file but suppress any exception try { segnOutput->close(); } catch (...) { // Suppress so we keep throwing the original exception } try { // try not to leave a truncated segments_n file in the index directory->deleteFile(segmentFileName); } catch (...) { // Suppress so we keep throwing the original exception } } finally.throwException(); } LuceneObjectPtr SegmentInfos::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = SegmentInfoCollection::clone(other ? other : newLucene()); SegmentInfosPtr cloneInfos(boost::dynamic_pointer_cast(clone)); cloneInfos->counter = counter; cloneInfos->generation = generation; cloneInfos->lastGeneration = lastGeneration; cloneInfos->version = version; cloneInfos->pendingSegnOutput = pendingSegnOutput; for (int32_t i = 0; i < cloneInfos->size(); ++i) cloneInfos->segmentInfos[i] = boost::dynamic_pointer_cast(cloneInfos->info(i)->clone()); cloneInfos->userData = MapStringString::newInstance(); cloneInfos->userData.putAll(userData.begin(), userData.end()); return cloneInfos; } int64_t SegmentInfos::getVersion() { return version; } int64_t SegmentInfos::getGeneration() { return generation; } int64_t SegmentInfos::getLastGeneration() { return lastGeneration; } int64_t SegmentInfos::readCurrentVersion(DirectoryPtr directory) { // Fully read the segments file: this ensures that it's completely written so that if IndexWriter.prepareCommit has been called // (but not yet commit), then the reader will still see itself as current. SegmentInfosPtr sis(newLucene()); sis->read(directory); return sis->getVersion(); } MapStringString SegmentInfos::readCurrentUserData(DirectoryPtr directory) { SegmentInfosPtr sis(newLucene()); sis->read(directory); return sis->getUserData(); } void SegmentInfos::setInfoStream(InfoStreamPtr infoStream) { SegmentInfos::infoStream = infoStream; } void SegmentInfos::setDefaultGenFileRetryCount(int32_t count) { defaultGenFileRetryCount = count; } int32_t SegmentInfos::getDefaultGenFileRetryCount() { return defaultGenFileRetryCount; } void SegmentInfos::setDefaultGenFileRetryPauseMsec(int32_t msec) { defaultGenFileRetryPauseMsec = msec; } int32_t SegmentInfos::getDefaultGenFileRetryPauseMsec() { return defaultGenFileRetryPauseMsec; } void SegmentInfos::setDefaultGenLookaheadCount(int32_t count) { defaultGenLookaheadCount = count; } int32_t SegmentInfos::getDefaultGenLookahedCount() { return defaultGenLookaheadCount; } InfoStreamPtr SegmentInfos::getInfoStream() { return infoStream; } void SegmentInfos::message(const String& message) { if (infoStream) *infoStream << L"SIS [" << message << L"]\n"; } FindSegmentsFile::FindSegmentsFile(SegmentInfosPtr infos, DirectoryPtr directory) { this->_segmentInfos = infos; this->directory = directory; } FindSegmentsFile::~FindSegmentsFile() { } void FindSegmentsFile::doRun(IndexCommitPtr commit) { if (commit) { if (directory != commit->getDirectory()) boost::throw_exception(IOException(L"The specified commit does not match the specified Directory")); runBody(commit->getSegmentsFileName()); return; } String segmentFileName; int64_t lastGen = -1; int64_t gen = 0; int32_t genLookaheadCount = 0; bool retry = false; LuceneException exc; SegmentInfosPtr segmentInfos(_segmentInfos); int32_t method = 0; // Loop until we succeed in calling runBody() without hitting an IOException. An IOException most likely // means a commit was in process and has finished, in the time it took us to load the now-old infos files // (and segments files). It's also possible it's a true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on which generation we are trying to load. If we don't, // then the original error is real and we throw it. // We have three methods for determining the current generation. We try the first two in parallel, and // fall back to the third when necessary. while (true) { if (method == 0) { // Method 1: list the directory and use the highest segments_N file. This method works well as long // as there is no stale caching on the directory contents (NOTE: NFS clients often have such stale caching) HashSet files(directory->listAll()); int64_t genA = segmentInfos->getCurrentSegmentGeneration(files); segmentInfos->message(L"directory listing genA=" + genA); // Method 2: open segments.gen and read its contents. Then we take the larger of the two gens. This way, // if either approach is hitting a stale cache (NFS) we have a better chance of getting the right generation. int64_t genB = -1; for (int32_t i = 0; i < SegmentInfos::defaultGenFileRetryCount; ++i) { IndexInputPtr genInput; try { genInput = directory->openInput(IndexFileNames::SEGMENTS_GEN()); } catch (FileNotFoundException& e) { segmentInfos->message(L"Segments.gen open: FileNotFoundException " + e.getError()); break; } catch (IOException& e) { segmentInfos->message(L"Segments.gen open: IOException " + e.getError()); } if (genInput) { LuceneException finally; bool fileConsistent = false; try { int32_t version = genInput->readInt(); if (version == SegmentInfos::FORMAT_LOCKLESS) { int64_t gen0 = genInput->readLong(); int64_t gen1 = genInput->readLong(); segmentInfos->message(L"fallback check: " + StringUtils::toString(gen0) + L"; " + StringUtils::toString(gen1)); if (gen0 == gen1) { // the file is consistent genB = gen0; fileConsistent = true; } } } catch (IOException&) { // will retry } catch (LuceneException& e) { finally = e; } genInput->close(); finally.throwException(); if (fileConsistent) break; } LuceneThread::threadSleep(SegmentInfos::defaultGenFileRetryPauseMsec); } segmentInfos->message(String(IndexFileNames::SEGMENTS_GEN()) + L" check: genB=" + StringUtils::toString(genB)); // pick the larger of the two gen's gen = std::max(genA, genB); // neither approach found a generation if (gen == -1) boost::throw_exception(FileNotFoundException(L"No segments* file found in directory")); } // Third method (fallback if first & second methods are not reliable): since both directory cache and // file contents cache seem to be stale, just advance the generation. if (method == 1 || (method == 0 && lastGen == gen && retry)) { method = 1; if (genLookaheadCount < SegmentInfos::defaultGenLookaheadCount) { ++gen; ++genLookaheadCount; segmentInfos->message(L"look ahead increment gen to " + StringUtils::toString(gen)); } } if (lastGen == gen) { // This means we're about to try the same segments_N last tried. This is allowed, exactly once, because // writer could have been in the process of writing segments_N last time. if (retry) { // OK, we've tried the same segments_N file twice in a row, so this must be a real error. exc.throwException(); } else retry = true; } else if (method == 0) { // Segment file has advanced since our last loop, so reset retry retry = false; } lastGen = gen; segmentFileName = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen); try { runBody(segmentFileName); segmentInfos->message(L"success on " + segmentFileName); return; } catch (LuceneException& err) { // Save the original root cause if (exc.isNull()) exc = err; segmentInfos->message(L"primary Exception on '" + segmentFileName + L"': " + err.getError() + L"'; will retry: retry=" + StringUtils::toString(retry) + L"; gen = " + StringUtils::toString(gen)); if (!retry && gen > 1) { // This is our first time trying this segments file (because retry is false), and, there is possibly a // segments_(N-1) (because gen > 1). So, check if the segments_(N-1) exists and try it if so. String prevSegmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen - 1)); if (directory->fileExists(prevSegmentFileName)) { segmentInfos->message(L"fallback to prior segment file '" + prevSegmentFileName + L"'"); try { runBody(prevSegmentFileName); if (!exc.isNull()) segmentInfos->message(L"success on fallback " + prevSegmentFileName); return; } catch (LuceneException& err2) { segmentInfos->message(L"secondary Exception on '" + prevSegmentFileName + L"': " + err2.getError() + L"'; will retry"); } } } } } } FindSegmentsRead::FindSegmentsRead(SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFileT(infos, directory) { result = 0; } FindSegmentsRead::~FindSegmentsRead() { } int64_t FindSegmentsRead::doBody(const String& segmentFileName) { SegmentInfosPtr(_segmentInfos)->read(directory, segmentFileName); return 0; } SegmentInfosPtr SegmentInfos::range(int32_t first, int32_t last) { SegmentInfosPtr infos(newLucene()); infos->segmentInfos.addAll(segmentInfos.begin() + first, segmentInfos.begin() + last); return infos; } void SegmentInfos::updateGeneration(SegmentInfosPtr other) { lastGeneration = other->lastGeneration; generation = other->generation; version = other->version; } void SegmentInfos::rollbackCommit(DirectoryPtr dir) { if (pendingSegnOutput) { try { pendingSegnOutput->close(); } catch (...) { } // must carefully compute filename from "generation" since lastgeneration isn't incremented try { String segmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation)); dir->deleteFile(segmentFileName); } catch (...) { } pendingSegnOutput.reset(); } } void SegmentInfos::prepareCommit(DirectoryPtr dir) { TestScope testScope(L"SegmentInfos", L"prepareCommit"); if (pendingSegnOutput) boost::throw_exception(IllegalStateException(L"prepareCommit was already called")); write(dir); } HashSet SegmentInfos::files(DirectoryPtr dir, bool includeSegmentsFile) { HashSet files(HashSet::newInstance()); if (includeSegmentsFile) files.add(getCurrentSegmentFileName()); for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { if ((*seginfo)->dir == dir) { HashSet segFiles((*seginfo)->files()); files.addAll(segFiles.begin(), segFiles.end()); } } return files; } void SegmentInfos::finishCommit(DirectoryPtr dir) { if (!pendingSegnOutput) boost::throw_exception(IllegalStateException(L"prepareCommit was not called")); bool success = false; LuceneException finally; try { pendingSegnOutput->finishCommit(); pendingSegnOutput->close(); pendingSegnOutput.reset(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) rollbackCommit(dir); finally.throwException(); // NOTE: if we crash here, we have left a segments_N file in the directory in a possibly corrupt state (if // some bytes made it to stable storage and others didn't). But, the segments_N file includes checksum // at the end, which should catch this case. So when a reader tries to read it, it will throw a // CorruptIndexException, which should cause the retry logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. String fileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation)); success = false; try { dir->sync(fileName); success = true; } catch (...) { } if (!success) dir->deleteFile(fileName); lastGeneration = generation; IndexOutputPtr genOutput; try { genOutput = dir->createOutput(IndexFileNames::SEGMENTS_GEN()); try { genOutput->writeInt(FORMAT_LOCKLESS); genOutput->writeLong(generation); genOutput->writeLong(generation); } catch (LuceneException& e) { finally = e; } genOutput->close(); finally.throwException(); } catch (...) { } } void SegmentInfos::commit(DirectoryPtr dir) { prepareCommit(dir); finishCommit(dir); } String SegmentInfos::segString(DirectoryPtr directory) { SyncLock syncLock(this); String buffer; for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { if (seginfo != segmentInfos.begin()) buffer += L' '; buffer += (*seginfo)->segString(directory); if ((*seginfo)->dir != directory) buffer += L"**"; } return buffer; } MapStringString SegmentInfos::getUserData() { return userData; } void SegmentInfos::setUserData(MapStringString data) { if (!data) userData = MapStringString::newInstance(); else userData = data; } void SegmentInfos::replace(SegmentInfosPtr other) { segmentInfos.clear(); segmentInfos.addAll(other->segmentInfos.begin(), other->segmentInfos.end()); lastGeneration = other->lastGeneration; } bool SegmentInfos::hasExternalSegments(DirectoryPtr dir) { for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { if ((*seginfo)->dir != dir) return true; } return false; } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentMergeInfo.cpp000066400000000000000000000041641217574114600240500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentMergeInfo.h" #include "IndexReader.h" #include "TermEnum.h" #include "TermPositions.h" namespace Lucene { SegmentMergeInfo::SegmentMergeInfo(int32_t b, TermEnumPtr te, IndexReaderPtr r) { base = b; _reader = r; termEnum = te; term = te->term(); ord = 0; delCount = 0; } SegmentMergeInfo::~SegmentMergeInfo() { } Collection SegmentMergeInfo::getDocMap() { if (!docMap) { delCount = 0; IndexReaderPtr reader(_reader); // build array which maps document numbers around deletions if (reader->hasDeletions()) { int32_t maxDoc = reader->maxDoc(); docMap = Collection::newInstance(maxDoc); int32_t j = 0; for (int32_t i = 0; i < maxDoc; ++i) { if (reader->isDeleted(i)) { ++delCount; docMap[i] = -1; } else docMap[i] = j++; } } } return docMap; } TermPositionsPtr SegmentMergeInfo::getPositions() { if (!postings) postings = IndexReaderPtr(_reader)->termPositions(); return postings; } bool SegmentMergeInfo::next() { if (termEnum->next()) { term = termEnum->term(); return true; } else { term.reset(); return false; } } void SegmentMergeInfo::close() { termEnum->close(); if (postings) postings->close(); } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentMergeQueue.cpp000066400000000000000000000017371217574114600242440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentMergeQueue.h" #include "SegmentMergeInfo.h" namespace Lucene { SegmentMergeQueue::SegmentMergeQueue(int32_t size) : PriorityQueue(size) { } SegmentMergeQueue::~SegmentMergeQueue() { } void SegmentMergeQueue::close() { while (top()) pop()->close(); } bool SegmentMergeQueue::lessThan(const SegmentMergeInfoPtr& first, const SegmentMergeInfoPtr& second) { int32_t comparison = first->term->compareTo(second->term); return comparison == 0 ? (first->base < second->base) : (comparison < 0); } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentMerger.cpp000066400000000000000000000735701217574114600234250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentMerger.h" #include "MergePolicy.h" #include "IndexWriter.h" #include "IndexOutput.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "FieldsReader.h" #include "FieldsWriter.h" #include "IndexFileNames.h" #include "CompoundFileWriter.h" #include "SegmentReader.h" #include "_SegmentReader.h" #include "Directory.h" #include "TermPositions.h" #include "TermVectorsReader.h" #include "TermVectorsWriter.h" #include "FormatPostingsDocsConsumer.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsPositionsConsumer.h" #include "FormatPostingsTermsConsumer.h" #include "SegmentMergeInfo.h" #include "SegmentMergeQueue.h" #include "SegmentWriteState.h" #include "TestPoint.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// Maximum number of contiguous documents to bulk-copy when merging stored fields const int32_t SegmentMerger::MAX_RAW_MERGE_DOCS = 4192; /// norms header placeholder const uint8_t SegmentMerger::NORMS_HEADER[] = {'N', 'R', 'M', -1}; const int32_t SegmentMerger::NORMS_HEADER_LENGTH = 4; SegmentMerger::SegmentMerger(DirectoryPtr dir, const String& name) { readers = Collection::newInstance(); termIndexInterval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL; mergedDocs = 0; mergeDocStores = false; omitTermFreqAndPositions = false; directory = dir; segment = name; checkAbort = newLucene(); } SegmentMerger::SegmentMerger(IndexWriterPtr writer, const String& name, OneMergePtr merge) { readers = Collection::newInstance(); mergedDocs = 0; mergeDocStores = false; omitTermFreqAndPositions = false; directory = writer->getDirectory(); segment = name; if (merge) checkAbort = newLucene(merge, directory); else checkAbort = newLucene(); termIndexInterval = writer->getTermIndexInterval(); } SegmentMerger::~SegmentMerger() { } bool SegmentMerger::hasProx() { return fieldInfos->hasProx(); } void SegmentMerger::add(IndexReaderPtr reader) { readers.add(reader); } IndexReaderPtr SegmentMerger::segmentReader(int32_t i) { return readers[i]; } int32_t SegmentMerger::merge() { return merge(true); } int32_t SegmentMerger::merge(bool mergeDocStores) { this->mergeDocStores = mergeDocStores; // NOTE: it's important to add calls to checkAbort.work(...) if you make any changes to this method that will spend a lot of time. // The frequency of this check impacts how long IndexWriter.close(false) takes to actually stop the threads. mergedDocs = mergeFields(); mergeTerms(); mergeNorms(); if (mergeDocStores && fieldInfos->hasVectors()) mergeVectors(); return mergedDocs; } void SegmentMerger::closeReaders() { for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) (*reader)->close(); } HashSet SegmentMerger::getMergedFiles() { HashSet fileSet(HashSet::newInstance()); // Basic files for (HashSet::iterator ext = IndexFileNames::COMPOUND_EXTENSIONS().begin(); ext != IndexFileNames::COMPOUND_EXTENSIONS().end(); ++ext) { if (*ext == IndexFileNames::PROX_EXTENSION() && !hasProx()) continue; if (mergeDocStores || (*ext != IndexFileNames::FIELDS_EXTENSION() && *ext != IndexFileNames::FIELDS_INDEX_EXTENSION())) fileSet.add(segment + L"." + *ext); } // Fieldable norm files for (int32_t i = 0; i < fieldInfos->size(); ++i) { FieldInfoPtr fi(fieldInfos->fieldInfo(i)); if (fi->isIndexed && !fi->omitNorms) { fileSet.add(segment + L"." + IndexFileNames::NORMS_EXTENSION()); break; } } // Vector files if (fieldInfos->hasVectors() && mergeDocStores) { for (HashSet::iterator ext = IndexFileNames::VECTOR_EXTENSIONS().begin(); ext != IndexFileNames::VECTOR_EXTENSIONS().end(); ++ext) fileSet.add(segment + L"." + *ext); } return fileSet; } HashSet SegmentMerger::createCompoundFile(const String& fileName) { HashSet files(getMergedFiles()); CompoundFileWriterPtr cfsWriter(newLucene(directory, fileName, checkAbort)); // Now merge all added files for (HashSet::iterator file = files.begin(); file != files.end(); ++file) cfsWriter->addFile(*file); // Perform the merge cfsWriter->close(); return files; } void SegmentMerger::addIndexed(IndexReaderPtr reader, FieldInfosPtr fInfos, HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions) { for (HashSet::iterator field = names.begin(); field != names.end(); ++field) { fInfos->add(*field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader->hasNorms(*field), storePayloads, omitTFAndPositions); } } void SegmentMerger::setMatchingSegmentReaders() { // If the i'th reader is a SegmentReader and has identical fieldName -> number mapping, then // this array will be non-null at position i int32_t numReaders = readers.size(); matchingSegmentReaders = Collection::newInstance(numReaders); // If this reader is a SegmentReader, and all of its field name -> number mappings match the // "merged" FieldInfos, then we can do a bulk copy of the stored fields for (int32_t i = 0; i < numReaders; ++i) { IndexReaderPtr reader(readers[i]); SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(reader)); if (segmentReader) { bool same = true; FieldInfosPtr segmentFieldInfos(segmentReader->fieldInfos()); int32_t numFieldInfos = segmentFieldInfos->size(); for (int32_t j = 0; same && j < numFieldInfos; ++j) same = (fieldInfos->fieldName(j) == segmentFieldInfos->fieldName(j)); if (same) matchingSegmentReaders[i] = segmentReader; } } // Used for bulk-reading raw bytes for stored fields rawDocLengths = Collection::newInstance(MAX_RAW_MERGE_DOCS); rawDocLengths2 = Collection::newInstance(MAX_RAW_MERGE_DOCS); } int32_t SegmentMerger::mergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, their field name -> number mapping are the same. // So, we start with the fieldInfos of the last segment in this case, to keep that numbering fieldInfos = boost::dynamic_pointer_cast(boost::dynamic_pointer_cast(readers[readers.size() - 1])->core->fieldInfos->clone()); } else fieldInfos = newLucene(); // merge field names for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(*reader)); if (segmentReader) { FieldInfosPtr readerFieldInfos(segmentReader->fieldInfos()); int32_t numReaderFieldInfos = readerFieldInfos->size(); for (int32_t j = 0; j < numReaderFieldInfos; ++j) { FieldInfoPtr fi(readerFieldInfos->fieldInfo(j)); fieldInfos->add(fi->name, fi->isIndexed, fi->storeTermVector, fi->storePositionWithTermVector, fi->storeOffsetWithTermVector, !(*reader)->hasNorms(fi->name), fi->storePayloads, fi->omitTermFreqAndPositions); } } else { addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION), true, true, false, false, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_OFFSET), true, false, true, false, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR), true, false, false, false, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_STORES_PAYLOADS), false, false, false, true, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_INDEXED), false, false, false, false, false); fieldInfos->add((*reader)->getFieldNames(IndexReader::FIELD_OPTION_UNINDEXED), false); } } fieldInfos->write(directory, segment + L".fnm"); int32_t docCount = 0; setMatchingSegmentReaders(); if (mergeDocStores) { // merge field values FieldsWriterPtr fieldsWriter(newLucene(directory, segment, fieldInfos)); LuceneException finally; try { int32_t idx = 0; for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { SegmentReaderPtr matchingSegmentReader(matchingSegmentReaders[idx++]); FieldsReaderPtr matchingFieldsReader; if (matchingSegmentReader) { FieldsReaderPtr fieldsReader(matchingSegmentReader->getFieldsReader()); if (fieldsReader && fieldsReader->canReadRawDocs()) matchingFieldsReader = fieldsReader; } if ((*reader)->hasDeletions()) docCount += copyFieldsWithDeletions(fieldsWriter, *reader, matchingFieldsReader); else docCount += copyFieldsNoDeletions(fieldsWriter, *reader, matchingFieldsReader); } } catch (LuceneException& e) { finally = e; } fieldsWriter->close(); finally.throwException(); String fileName(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); int64_t fdxFileLength = directory->fileLength(fileName); if (4 + ((int64_t)docCount) * 8 != fdxFileLength) { boost::throw_exception(RuntimeException(L"mergeFields produced an invalid result: docCount is " + StringUtils::toString(docCount) + L" but fdx file size is " + StringUtils::toString(fdxFileLength) + L" file=" + fileName + L" file exists?=" + StringUtils::toString(directory->fileExists(fileName)) + L"; now aborting this merge to prevent index corruption")); } } else { // If we are skipping the doc stores, that means there are no deletions in any of these segments, // so we just sum numDocs() of each segment to get total docCount for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) docCount += (*reader)->numDocs(); } return docCount; } int32_t SegmentMerger::copyFieldsWithDeletions(FieldsWriterPtr fieldsWriter, IndexReaderPtr reader, FieldsReaderPtr matchingFieldsReader) { int32_t docCount = 0; int32_t maxDoc = reader->maxDoc(); if (matchingFieldsReader) { // We can bulk-copy because the fieldInfos are "congruent" for (int32_t j = 0; j < maxDoc;) { if (reader->isDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field numbers are identical int32_t start = j; int32_t numDocs = 0; do { ++j; ++numDocs; if (j >= maxDoc) break; if (reader->isDeleted(j)) { ++j; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInputPtr stream(matchingFieldsReader->rawDocs(rawDocLengths, start, numDocs)); fieldsWriter->addRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort->work(300 * numDocs); } } else { for (int32_t j = 0; j < maxDoc; ++j) { if (reader->isDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to termVectorsWriter.addAllDocVectors fieldsWriter->addDocument(reader->document(j)); ++docCount; checkAbort->work(300); } } return docCount; } int32_t SegmentMerger::copyFieldsNoDeletions(FieldsWriterPtr fieldsWriter, IndexReaderPtr reader, FieldsReaderPtr matchingFieldsReader) { int32_t docCount = 0; int32_t maxDoc = reader->maxDoc(); if (matchingFieldsReader) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int32_t len = std::min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInputPtr stream(matchingFieldsReader->rawDocs(rawDocLengths, docCount, len)); fieldsWriter->addRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort->work(300 * len); } } else { for (; docCount < maxDoc; ++docCount) { // NOTE: it's very important to first assign to doc then pass it to termVectorsWriter.addAllDocVectors fieldsWriter->addDocument(reader->document(docCount)); checkAbort->work(300); } } return docCount; } void SegmentMerger::mergeVectors() { TermVectorsWriterPtr termVectorsWriter(newLucene(directory, segment, fieldInfos)); LuceneException finally; try { int32_t idx = 0; for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { SegmentReaderPtr matchingSegmentReader(matchingSegmentReaders[idx++]); TermVectorsReaderPtr matchingVectorsReader; if (matchingSegmentReader) { TermVectorsReaderPtr vectorsReader(matchingSegmentReader->getTermVectorsReaderOrig()); // If the TV* files are an older format then they cannot read raw docs if (vectorsReader && vectorsReader->canReadRawDocs()) matchingVectorsReader = vectorsReader; } if ((*reader)->hasDeletions()) copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, *reader); else copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, *reader); } } catch (LuceneException& e) { finally = e; } termVectorsWriter->close(); finally.throwException(); String fileName(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); int64_t tvxSize = directory->fileLength(fileName); if (4 + ((int64_t)mergedDocs) * 16 != tvxSize) { boost::throw_exception(RuntimeException(L"mergeVectors produced an invalid result: mergedDocs is " + StringUtils::toString(mergedDocs) + L" but tvx size is " + StringUtils::toString(tvxSize) + L" file=" + fileName + L" file exists?=" + StringUtils::toString(directory->fileExists(fileName)) + L"; now aborting this merge to prevent index corruption")); } } void SegmentMerger::copyVectorsWithDeletions(TermVectorsWriterPtr termVectorsWriter, TermVectorsReaderPtr matchingVectorsReader, IndexReaderPtr reader) { int32_t maxDoc = reader->maxDoc(); if (matchingVectorsReader) { // We can bulk-copy because the fieldInfos are "congruent" for (int32_t docNum = 0; docNum < maxDoc;) { if (reader->isDeleted(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field numbers are identical int32_t start = docNum; int32_t numDocs = 0; do { ++docNum; ++numDocs; if (docNum >= maxDoc) break; if (reader->isDeleted(docNum)) { ++docNum; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader->rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter->addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort->work(300 * numDocs); } } else { for (int32_t docNum = 0; docNum < maxDoc; ++docNum) { if (reader->isDeleted(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to termVectorsWriter.addAllDocVectors termVectorsWriter->addAllDocVectors(reader->getTermFreqVectors(docNum)); checkAbort->work(300); } } } void SegmentMerger::copyVectorsNoDeletions(TermVectorsWriterPtr termVectorsWriter, TermVectorsReaderPtr matchingVectorsReader, IndexReaderPtr reader) { int32_t maxDoc = reader->maxDoc(); if (matchingVectorsReader) { // We can bulk-copy because the fieldInfos are "congruent" int32_t docCount = 0; while (docCount < maxDoc) { int32_t len = std::min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader->rawDocs(rawDocLengths, rawDocLengths2, docCount, len); termVectorsWriter->addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; checkAbort->work(300 * len); } } else { for (int32_t docNum = 0; docNum < maxDoc; ++docNum) { // NOTE: it's very important to first assign to vectors then pass it to termVectorsWriter.addAllDocVectors termVectorsWriter->addAllDocVectors(reader->getTermFreqVectors(docNum)); checkAbort->work(300); } } } void SegmentMerger::mergeTerms() { TestScope testScope(L"SegmentMerger", L"mergeTerms"); SegmentWriteStatePtr state(newLucene(DocumentsWriterPtr(), directory, segment, L"", mergedDocs, 0, termIndexInterval)); FormatPostingsFieldsConsumerPtr consumer(newLucene(state, fieldInfos)); LuceneException finally; try { queue = newLucene(readers.size()); mergeTermInfos(consumer); } catch (LuceneException& e) { finally = e; } consumer->finish(); if (queue) queue->close(); finally.throwException(); } void SegmentMerger::mergeTermInfos(FormatPostingsFieldsConsumerPtr consumer) { int32_t base = 0; int32_t readerCount = readers.size(); for (int32_t i = 0; i < readerCount; ++i) { IndexReaderPtr reader(readers[i]); TermEnumPtr termEnum(reader->terms()); SegmentMergeInfoPtr smi(newLucene(base, termEnum, reader)); Collection docMap(smi->getDocMap()); if (docMap) { if (!docMaps) { docMaps = Collection< Collection >::newInstance(readerCount); delCounts = Collection::newInstance(readerCount); } docMaps[i] = docMap; IndexReaderPtr segmentMergeReader(smi->_reader); delCounts[i] = segmentMergeReader->maxDoc() - segmentMergeReader->numDocs(); } base += reader->numDocs(); BOOST_ASSERT(reader->numDocs() == reader->maxDoc() - smi->delCount); if (smi->next()) queue->add(smi); // initialize queue else smi->close(); } Collection match(Collection::newInstance(readers.size())); String currentField; FormatPostingsTermsConsumerPtr termsConsumer; while (!queue->empty()) { int32_t matchSize = 0; // pop matching terms match[matchSize++] = queue->pop(); TermPtr term(match[0]->term); SegmentMergeInfoPtr top(queue->empty() ? SegmentMergeInfoPtr() : queue->top()); while (top && term->compareTo(top->term) == 0) { match[matchSize++] = queue->pop(); top = queue->top(); } if (currentField != term->_field) { currentField = term->_field; if (termsConsumer) termsConsumer->finish(); FieldInfoPtr fieldInfo(fieldInfos->fieldInfo(currentField)); termsConsumer = consumer->addField(fieldInfo); omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; } int32_t df = appendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort->work(df / 3.0); while (matchSize > 0) { SegmentMergeInfoPtr smi(match[--matchSize]); if (smi->next()) queue->add(smi); // restore queue else smi->close(); // done with a segment } } } Collection< Collection > SegmentMerger::getDocMaps() { return docMaps; } Collection SegmentMerger::getDelCounts() { return delCounts; } int32_t SegmentMerger::appendPostings(FormatPostingsTermsConsumerPtr termsConsumer, Collection smis, int32_t n) { FormatPostingsDocsConsumerPtr docConsumer(termsConsumer->addTerm(smis[0]->term->_text)); int32_t df = 0; for (int32_t i = 0; i < n; ++i) { SegmentMergeInfoPtr smi(smis[i]); TermPositionsPtr postings(smi->getPositions()); BOOST_ASSERT(postings); int32_t base = smi->base; Collection docMap(smi->getDocMap()); postings->seek(smi->termEnum); while (postings->next()) { ++df; int32_t doc = postings->doc(); if (docMap) doc = docMap[doc]; // map around deletions doc += base; // convert to merged space int32_t freq = postings->freq(); FormatPostingsPositionsConsumerPtr posConsumer(docConsumer->addDoc(doc, freq)); if (!omitTermFreqAndPositions) { for (int32_t j = 0; j < freq; ++j) { int32_t position = postings->nextPosition(); int32_t payloadLength = postings->getPayloadLength(); if (payloadLength > 0) { if (!payloadBuffer) payloadBuffer = ByteArray::newInstance(payloadLength); if (payloadBuffer.size() < payloadLength) payloadBuffer.resize(payloadLength); postings->getPayload(payloadBuffer, 0); } posConsumer->addPosition(position, payloadBuffer, 0, payloadLength); } posConsumer->finish(); } } } docConsumer->finish(); return df; } void SegmentMerger::mergeNorms() { ByteArray normBuffer; IndexOutputPtr output; LuceneException finally; try { int32_t numFieldInfos = fieldInfos->size(); for (int32_t i = 0; i < numFieldInfos; ++i) { FieldInfoPtr fi(fieldInfos->fieldInfo(i)); if (fi->isIndexed && !fi->omitNorms) { if (!output) { output = directory->createOutput(segment + L"." + IndexFileNames::NORMS_EXTENSION()); output->writeBytes(NORMS_HEADER, SIZEOF_ARRAY(NORMS_HEADER)); } for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { int32_t maxDoc = (*reader)->maxDoc(); if (!normBuffer) normBuffer = ByteArray::newInstance(maxDoc); if (normBuffer.size() < maxDoc) // the buffer is too small for the current segment normBuffer.resize(maxDoc); MiscUtils::arrayFill(normBuffer.get(), 0, normBuffer.size(), 0); (*reader)->norms(fi->name, normBuffer, 0); if (!(*reader)->hasDeletions()) { // optimized case for segments without deleted docs output->writeBytes(normBuffer.get(), maxDoc); } else { // this segment has deleted docs, so we have to check for every doc if it is deleted or not for (int32_t k = 0; k < maxDoc; ++k) { if (!(*reader)->isDeleted(k)) output->writeByte(normBuffer[k]); } } checkAbort->work(maxDoc); } } } } catch (LuceneException& e) { finally = e; } if (output) output->close(); finally.throwException(); } CheckAbort::CheckAbort(OneMergePtr merge, DirectoryPtr dir) { workCount = 0; this->merge = merge; this->_dir = dir; } CheckAbort::~CheckAbort() { } void CheckAbort::work(double units) { workCount += units; if (workCount >= 10000.0) { merge->checkAborted(DirectoryPtr(_dir)); workCount = 0; } } CheckAbortNull::CheckAbortNull() : CheckAbort(OneMergePtr(), DirectoryPtr()) { } CheckAbortNull::~CheckAbortNull() { } void CheckAbortNull::work(double units) { // do nothing } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentReader.cpp000066400000000000000000001266101217574114600234000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SegmentReader.h" #include "_SegmentReader.h" #include "IndexFileNames.h" #include "DirectoryReader.h" #include "CompoundFileReader.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "FieldsReader.h" #include "TermInfo.h" #include "TermInfosReader.h" #include "TermVectorsReader.h" #include "IndexOutput.h" #include "ReadOnlySegmentReader.h" #include "BitVector.h" #include "SegmentTermEnum.h" #include "SegmentTermPositions.h" #include "SegmentInfo.h" #include "SegmentMerger.h" #include "AllTermDocs.h" #include "DefaultSimilarity.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { SegmentReader::SegmentReader() { _norms = MapStringNorm::newInstance(); readOnly = false; deletedDocsDirty = false; normsDirty = false; rollbackHasChanges = false; rollbackDeletedDocsDirty = false; rollbackNormsDirty = false; readBufferSize = 0; pendingDeleteCount = 0; rollbackPendingDeleteCount = 0; } SegmentReader::~SegmentReader() { } void SegmentReader::initialize() { fieldsReaderLocal = newLucene(shared_from_this()); } SegmentReaderPtr SegmentReader::get(bool readOnly, SegmentInfoPtr si, int32_t termInfosIndexDivisor) { return get(readOnly, si->dir, si, BufferedIndexInput::BUFFER_SIZE, true, termInfosIndexDivisor); } SegmentReaderPtr SegmentReader::get(bool readOnly, DirectoryPtr dir, SegmentInfoPtr si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor) { SegmentReaderPtr instance(readOnly ? newLucene() : newLucene()); instance->readOnly = readOnly; instance->si = si; instance->readBufferSize = readBufferSize; bool success = false; LuceneException finally; try { instance->core = newLucene(instance, dir, si, readBufferSize, termInfosIndexDivisor); if (doOpenStores) instance->core->openDocStores(si); instance->loadDeletedDocs(); instance->openNorms(instance->core->cfsDir, readBufferSize); success = true; } catch (LuceneException& e) { finally = e; } // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. // In this case, we want to explicitly close any subset of things that were opened if (!success) instance->doClose(); finally.throwException(); return instance; } void SegmentReader::openDocStores() { core->openDocStores(si); } bool SegmentReader::checkDeletedCounts() { int32_t recomputedCount = deletedDocs->getRecomputedCount(); BOOST_ASSERT(deletedDocs->count() == recomputedCount); BOOST_ASSERT(si->getDelCount() == recomputedCount); // Verify # deletes does not exceed maxDoc for this segment BOOST_ASSERT(si->getDelCount() <= maxDoc()); return true; } void SegmentReader::loadDeletedDocs() { // NOTE: the bitvector is stored using the regular directory, not cfs if (hasDeletions(si)) { deletedDocs = newLucene(directory(), si->getDelFileName()); deletedDocsRef = newLucene(); BOOST_ASSERT(checkDeletedCounts()); } else BOOST_ASSERT(si->getDelCount() == 0); } ByteArray SegmentReader::cloneNormBytes(ByteArray bytes) { ByteArray cloneBytes(ByteArray::newInstance(bytes.size())); MiscUtils::arrayCopy(bytes.get(), 0, cloneBytes.get(), 0, bytes.size()); return cloneBytes; } BitVectorPtr SegmentReader::cloneDeletedDocs(BitVectorPtr bv) { return boost::dynamic_pointer_cast(bv->clone()); } LuceneObjectPtr SegmentReader::clone(LuceneObjectPtr other) { try { return SegmentReader::clone(readOnly, other); // Preserve current readOnly } catch (...) { boost::throw_exception(RuntimeException()); } return LuceneObjectPtr(); } LuceneObjectPtr SegmentReader::clone(bool openReadOnly, LuceneObjectPtr other) { SyncLock syncLock(this); return reopenSegment(si, true, openReadOnly); } SegmentReaderPtr SegmentReader::reopenSegment(SegmentInfoPtr si, bool doClone, bool openReadOnly) { SyncLock syncLock(this); bool deletionsUpToDate = (this->si->hasDeletions() == si->hasDeletions() && (!si->hasDeletions() || this->si->getDelFileName() == si->getDelFileName())); bool normsUpToDate = true; int32_t fieldCount = core->fieldInfos->size(); Collection fieldNormsChanged(Collection::newInstance(fieldCount)); for (int32_t i = 0; i < fieldCount; ++i) { if (this->si->getNormFileName(i) != si->getNormFileName(i)) { normsUpToDate = false; fieldNormsChanged[i] = true; } } // if we're cloning we need to run through the reopenSegment logic also if both old and new readers // aren't readonly, we clone to avoid sharing modifications if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) return shared_from_this(); // When cloning, the incoming SegmentInfos should not have any changes in it BOOST_ASSERT(!doClone || (normsUpToDate && deletionsUpToDate)); // clone reader SegmentReaderPtr clone(openReadOnly ? newLucene() : newLucene()); bool success = false; LuceneException finally; try { core->incRef(); clone->core = core; clone->readOnly = openReadOnly; clone->si = si; clone->readBufferSize = readBufferSize; if (!openReadOnly && _hasChanges) { // My pending changes transfer to the new reader clone->pendingDeleteCount = pendingDeleteCount; clone->deletedDocsDirty = deletedDocsDirty; clone->normsDirty = normsDirty; clone->_hasChanges = _hasChanges; _hasChanges = false; } if (doClone) { if (deletedDocs) { deletedDocsRef->incRef(); clone->deletedDocs = deletedDocs; clone->deletedDocsRef = deletedDocsRef; } } else { if (!deletionsUpToDate) { // load deleted docs BOOST_ASSERT(!clone->deletedDocs); clone->loadDeletedDocs(); } else if (deletedDocs) { deletedDocsRef->incRef(); clone->deletedDocs = deletedDocs; clone->deletedDocsRef = deletedDocsRef; } } clone->_norms = MapStringNorm::newInstance(); // Clone norms for (int32_t i = 0; i < fieldNormsChanged.size(); ++i) { // Clone unchanged norms to the cloned reader if (doClone || !fieldNormsChanged[i]) { String curField(core->fieldInfos->fieldInfo(i)->name); NormPtr norm(this->_norms.get(curField)); if (norm) { NormPtr cloneNorm(boost::dynamic_pointer_cast(norm->clone())); cloneNorm->_reader = clone; clone->_norms.put(curField, cloneNorm); } } } // If we are not cloning, then this will open anew any norms that have changed clone->openNorms(si->getUseCompoundFile() ? core->getCFSReader() : directory(), readBufferSize); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // An exception occurred during reopen, we have to decRef the norms that we incRef'ed already // and close singleNormsStream and FieldsReader clone->decRef(); } finally.throwException(); return clone; } void SegmentReader::doCommit(MapStringString commitUserData) { if (_hasChanges) { startCommit(); bool success = false; LuceneException finally; try { commitChanges(commitUserData); success = true; } catch (LuceneException& e) { finally = e; } if (!success) rollbackCommit(); finally.throwException(); } } void SegmentReader::commitChanges(MapStringString commitUserData) { if (deletedDocsDirty) // re-write deleted { si->advanceDelGen(); // We can write directly to the actual name (vs to a .tmp & renaming it) because the file // is not live until segments file is written String delFileName(si->getDelFileName()); bool success = false; LuceneException finally; try { deletedDocs->write(directory(), delFileName); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { try { directory()->deleteFile(delFileName); } catch (...) { // suppress this so we keep throwing the original exception } } finally.throwException(); si->setDelCount(si->getDelCount() + pendingDeleteCount); pendingDeleteCount = 0; BOOST_ASSERT(deletedDocs->count() == si->getDelCount()); // delete count mismatch during commit? } else { BOOST_ASSERT(pendingDeleteCount == 0); } if (normsDirty) // re-write norms { si->setNumFields(core->fieldInfos->size()); for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { if (norm->second->dirty) norm->second->reWrite(si); } } deletedDocsDirty = false; normsDirty = false; _hasChanges = false; } FieldsReaderPtr SegmentReader::getFieldsReader() { return fieldsReaderLocal->get(); } void SegmentReader::doClose() { termVectorsLocal.close(); fieldsReaderLocal->close(); if (deletedDocs) { deletedDocsRef->decRef(); deletedDocs.reset(); // null so if an app hangs on to us we still free most ram } for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) norm->second->decRef(); if (core) core->decRef(); } bool SegmentReader::hasDeletions(SegmentInfoPtr si) { // Don't call ensureOpen() here (it could affect performance) return si->hasDeletions(); } bool SegmentReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return deletedDocs; } bool SegmentReader::usesCompoundFile(SegmentInfoPtr si) { return si->getUseCompoundFile(); } bool SegmentReader::hasSeparateNorms(SegmentInfoPtr si) { return si->hasSeparateNorms(); } void SegmentReader::doDelete(int32_t docNum) { if (!deletedDocs) { deletedDocs = newLucene(maxDoc()); deletedDocsRef = newLucene(); } // there is more than 1 SegmentReader with a reference to this deletedDocs BitVector so decRef // the current deletedDocsRef, clone the BitVector, create a new deletedDocsRef if (deletedDocsRef->refCount() > 1) { SegmentReaderRefPtr oldRef(deletedDocsRef); deletedDocs = cloneDeletedDocs(deletedDocs); deletedDocsRef = newLucene(); oldRef->decRef(); } deletedDocsDirty = true; if (!deletedDocs->getAndSet(docNum)) ++pendingDeleteCount; } void SegmentReader::doUndeleteAll() { deletedDocsDirty = false; if (deletedDocs) { BOOST_ASSERT(deletedDocsRef); deletedDocsRef->decRef(); deletedDocs.reset(); deletedDocsRef.reset(); pendingDeleteCount = 0; si->clearDelGen(); si->setDelCount(0); } else { BOOST_ASSERT(!deletedDocsRef); BOOST_ASSERT(pendingDeleteCount == 0); } } HashSet SegmentReader::files() { return si->files(); } TermEnumPtr SegmentReader::terms() { ensureOpen(); return core->getTermsReader()->terms(); } TermEnumPtr SegmentReader::terms(TermPtr t) { ensureOpen(); return core->getTermsReader()->terms(t); } FieldInfosPtr SegmentReader::fieldInfos() { return core->fieldInfos; } DocumentPtr SegmentReader::document(int32_t n, FieldSelectorPtr fieldSelector) { ensureOpen(); return getFieldsReader()->doc(n, fieldSelector); } bool SegmentReader::isDeleted(int32_t n) { SyncLock syncLock(this); return (deletedDocs && deletedDocs->get(n)); } TermDocsPtr SegmentReader::termDocs(TermPtr term) { if (!term) return newLucene(shared_from_this()); else return IndexReader::termDocs(term); } TermDocsPtr SegmentReader::termDocs() { ensureOpen(); return newLucene(shared_from_this()); } TermPositionsPtr SegmentReader::termPositions() { ensureOpen(); return newLucene(shared_from_this()); } int32_t SegmentReader::docFreq(TermPtr t) { ensureOpen(); TermInfoPtr ti(core->getTermsReader()->get(t)); return ti ? ti->docFreq : 0; } int32_t SegmentReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) int32_t n = maxDoc(); if (deletedDocs) n -= deletedDocs->count(); return n; } int32_t SegmentReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return si->docCount; } HashSet SegmentReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); HashSet fieldSet(HashSet::newInstance()); for (int32_t i = 0; i < core->fieldInfos->size(); ++i) { FieldInfoPtr fi(core->fieldInfos->fieldInfo(i)); if (fieldOption == FIELD_OPTION_ALL) fieldSet.add(fi->name); else if (!fi->isIndexed && fieldOption == FIELD_OPTION_UNINDEXED) fieldSet.add(fi->name); else if (fi->omitTermFreqAndPositions && fieldOption == FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS) fieldSet.add(fi->name); else if (fi->storePayloads && fieldOption == FIELD_OPTION_STORES_PAYLOADS) fieldSet.add(fi->name); else if (fi->isIndexed && fieldOption == FIELD_OPTION_INDEXED) fieldSet.add(fi->name); else if (fi->isIndexed && !fi->storeTermVector && fieldOption == FIELD_OPTION_INDEXED_NO_TERMVECTOR) fieldSet.add(fi->name); else if (fi->storeTermVector && !fi->storePositionWithTermVector && !fi->storeOffsetWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR) fieldSet.add(fi->name); else if (fi->isIndexed && fi->storeTermVector && fieldOption == FIELD_OPTION_INDEXED_WITH_TERMVECTOR) fieldSet.add(fi->name); else if (fi->storePositionWithTermVector && !fi->storeOffsetWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION) fieldSet.add(fi->name); else if (fi->storeOffsetWithTermVector && !fi->storePositionWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_OFFSET) fieldSet.add(fi->name); else if (fi->storeOffsetWithTermVector && fi->storePositionWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET) fieldSet.add(fi->name); } return fieldSet; } bool SegmentReader::hasNorms(const String& field) { SyncLock syncLock(this); ensureOpen(); return _norms.contains(field); } ByteArray SegmentReader::getNorms(const String& field) { SyncLock syncLock(this); NormPtr norm(_norms.get(field)); return norm ? norm->bytes() : ByteArray(); } ByteArray SegmentReader::norms(const String& field) { SyncLock syncLock(this); ensureOpen(); return getNorms(field); } void SegmentReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { NormPtr norm(_norms.get(field)); if (!norm) // not an indexed field return; normsDirty = true; ByteArray bytes(norm->copyOnWrite()); if (doc < 0 || doc >= bytes.size()) boost::throw_exception(IndexOutOfBoundsException()); bytes[doc] = value; // set the value } void SegmentReader::norms(const String& field, ByteArray norms, int32_t offset) { SyncLock syncLock(this); ensureOpen(); NormPtr norm(_norms.get(field)); if (!norm) { MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); return; } norm->bytes(norms.get(), offset, maxDoc()); } void SegmentReader::openNorms(DirectoryPtr cfsDir, int32_t readBufferSize) { int64_t nextNormSeek = SegmentMerger::NORMS_HEADER_LENGTH; // skip header (header unused for now) int32_t _maxDoc = maxDoc(); for (int32_t i = 0; i < core->fieldInfos->size(); ++i) { FieldInfoPtr fi(core->fieldInfos->fieldInfo(i)); if (_norms.contains(fi->name)) { // in case this SegmentReader is being re-opened, we might be able to reuse some norm // instances and skip loading them here continue; } if (fi->isIndexed && !fi->omitNorms) { DirectoryPtr d(directory()); String fileName(si->getNormFileName(fi->number)); if (!si->hasSeparateNorms(fi->number)) d = cfsDir; // singleNormFile means multiple norms share this file bool singleNormFile = boost::ends_with(fileName, String(L".") + IndexFileNames::NORMS_EXTENSION()); IndexInputPtr normInput; int64_t normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (!singleNormStream) { singleNormStream = d->openInput(fileName, readBufferSize); singleNormRef = newLucene(); } else singleNormRef->incRef(); // All norms in the .nrm file can share a single IndexInput since they are only used in // a synchronized context. If this were to change in the future, a clone could be done here. normInput = singleNormStream; } else { normSeek = 0; normInput = d->openInput(fileName); } _norms.put(fi->name, newLucene(shared_from_this(), normInput, fi->number, normSeek)); nextNormSeek += _maxDoc; // increment also if some norms are separate } } } bool SegmentReader::termsIndexLoaded() { return core->termsIndexIsLoaded(); } void SegmentReader::loadTermsIndex(int32_t termsIndexDivisor) { core->loadTermsIndex(si, termsIndexDivisor); } bool SegmentReader::normsClosed() { if (singleNormStream) return false; for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { if (norm->second->refCount > 0) return false; } return true; } bool SegmentReader::normsClosed(const String& field) { return (_norms.get(field)->refCount == 0); } TermVectorsReaderPtr SegmentReader::getTermVectorsReader() { TermVectorsReaderPtr tvReader(termVectorsLocal.get()); if (!tvReader) { TermVectorsReaderPtr orig(core->getTermVectorsReaderOrig()); if (!orig) return TermVectorsReaderPtr(); else { try { tvReader = boost::dynamic_pointer_cast(orig->clone()); } catch (...) { return TermVectorsReaderPtr(); } } termVectorsLocal.set(tvReader); } return tvReader; } TermVectorsReaderPtr SegmentReader::getTermVectorsReaderOrig() { return core->getTermVectorsReaderOrig(); } TermFreqVectorPtr SegmentReader::getTermFreqVector(int32_t docNumber, const String& field) { // Check if this field is invalid or has no stored term vector ensureOpen(); FieldInfoPtr fi(core->fieldInfos->fieldInfo(field)); if (!fi || !fi->storeTermVector) return TermFreqVectorPtr(); TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); if (!termVectorsReader) return TermFreqVectorPtr(); return termVectorsReader->get(docNumber, field); } void SegmentReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) { ensureOpen(); FieldInfoPtr fi(core->fieldInfos->fieldInfo(field)); if (!fi || !fi->storeTermVector) return; TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); if (!termVectorsReader) return; termVectorsReader->get(docNumber, field, mapper); } void SegmentReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) { ensureOpen(); TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); if (!termVectorsReader) return; termVectorsReader->get(docNumber, mapper); } Collection SegmentReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); if (!termVectorsReader) return Collection(); return termVectorsReader->get(docNumber); } String SegmentReader::getSegmentName() { return core->segment; } SegmentInfoPtr SegmentReader::getSegmentInfo() { return si; } void SegmentReader::setSegmentInfo(SegmentInfoPtr info) { si = info; } void SegmentReader::startCommit() { rollbackSegmentInfo = boost::dynamic_pointer_cast(si->clone()); rollbackHasChanges = _hasChanges; rollbackDeletedDocsDirty = deletedDocsDirty; rollbackNormsDirty = normsDirty; rollbackPendingDeleteCount = pendingDeleteCount; for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) norm->second->rollbackDirty = norm->second->dirty; } void SegmentReader::rollbackCommit() { si->reset(rollbackSegmentInfo); _hasChanges = rollbackHasChanges; deletedDocsDirty = rollbackDeletedDocsDirty; normsDirty = rollbackNormsDirty; pendingDeleteCount = rollbackPendingDeleteCount; for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) norm->second->dirty = norm->second->rollbackDirty; } DirectoryPtr SegmentReader::directory() { // Don't ensureOpen here - in certain cases, when a cloned/reopened reader needs to commit, // it may call this method on the closed original reader return core->dir; } LuceneObjectPtr SegmentReader::getFieldCacheKey() { return core->freqStream; } LuceneObjectPtr SegmentReader::getDeletesCacheKey() { return deletedDocs; } int64_t SegmentReader::getUniqueTermCount() { return core->getTermsReader()->size(); } SegmentReaderPtr SegmentReader::getOnlySegmentReader(DirectoryPtr dir) { return getOnlySegmentReader(IndexReader::open(dir, false)); } SegmentReaderPtr SegmentReader::getOnlySegmentReader(IndexReaderPtr reader) { SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(reader)); if (segmentReader) return segmentReader; DirectoryReaderPtr directoryReader(boost::dynamic_pointer_cast(reader)); if (directoryReader) { Collection subReaders(directoryReader->getSequentialSubReaders()); if (subReaders.size() != 1) boost::throw_exception(IllegalArgumentException(L"reader has " + StringUtils::toString(subReaders.size()) + L" segments instead of exactly one")); return boost::dynamic_pointer_cast(subReaders[0]); } boost::throw_exception(IllegalArgumentException(L"reader is not a SegmentReader or a single-segment DirectoryReader")); return SegmentReaderPtr(); } int32_t SegmentReader::getTermInfosIndexDivisor() { return core->termsIndexDivisor; } CoreReaders::CoreReaders(SegmentReaderPtr origInstance, DirectoryPtr dir, SegmentInfoPtr si, int32_t readBufferSize, int32_t termsIndexDivisor) { ref = newLucene(); segment = si->name; this->readBufferSize = readBufferSize; this->dir = dir; bool success = false; LuceneException finally; try { DirectoryPtr dir0(dir); if (si->getUseCompoundFile()) { cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = newLucene(cfsDir, segment + L"." + IndexFileNames::FIELD_INFOS_EXTENSION()); this->termsIndexDivisor = termsIndexDivisor; TermInfosReaderPtr reader(newLucene(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor)); if (termsIndexDivisor == -1) tisNoIndex = reader; else tis = reader; // make sure that all index files have been read or are kept open so that if an index // update removes them we'll still have them freqStream = cfsDir->openInput(segment + L"." + IndexFileNames::FREQ_EXTENSION(), readBufferSize); if (fieldInfos->hasProx()) proxStream = cfsDir->openInput(segment + L"." + IndexFileNames::PROX_EXTENSION(), readBufferSize); success = true; } catch (LuceneException& e) { finally = e; } if (!success) decRef(); finally.throwException(); // Must assign this at the end -- if we hit an exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is not assigned yet). _origInstance = origInstance; } CoreReaders::~CoreReaders() { } TermVectorsReaderPtr CoreReaders::getTermVectorsReaderOrig() { SyncLock syncLock(this); return termVectorsReaderOrig; } FieldsReaderPtr CoreReaders::getFieldsReaderOrig() { SyncLock syncLock(this); return fieldsReaderOrig; } void CoreReaders::incRef() { SyncLock syncLock(this); ref->incRef(); } DirectoryPtr CoreReaders::getCFSReader() { SyncLock syncLock(this); return cfsReader; } TermInfosReaderPtr CoreReaders::getTermsReader() { SyncLock syncLock(this); return tis ? tis : tisNoIndex; } bool CoreReaders::termsIndexIsLoaded() { SyncLock syncLock(this); return tis; } void CoreReaders::loadTermsIndex(SegmentInfoPtr si, int32_t termsIndexDivisor) { SyncLock syncLock(this); if (!tis) { DirectoryPtr dir0; if (si->getUseCompoundFile()) { // In some cases, we were originally opened when CFS was not used, but then we are asked // to open the terms reader with index, the segment has switched to CFS if (!cfsReader) cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); dir0 = cfsReader; } else dir0 = dir; tis = newLucene(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); } } void CoreReaders::decRef() { SyncLock syncLock(this); if (ref->decRef() == 0) { // close everything, nothing is shared anymore with other readers if (tis) { tis->close(); tis.reset(); // null so if an app hangs on to us we still free most ram } if (tisNoIndex) tisNoIndex->close(); if (freqStream) freqStream->close(); if (proxStream) proxStream->close(); if (termVectorsReaderOrig) termVectorsReaderOrig->close(); if (fieldsReaderOrig) fieldsReaderOrig->close(); if (cfsReader) cfsReader->close(); if (storeCFSReader) storeCFSReader->close(); // Force FieldCache to evict our entries at this point SegmentReaderPtr origInstance(_origInstance.lock()); if (origInstance) FieldCache::DEFAULT()->purge(origInstance); } } void CoreReaders::openDocStores(SegmentInfoPtr si) { SyncLock syncLock(this); BOOST_ASSERT(si->name == segment); if (!fieldsReaderOrig) { DirectoryPtr storeDir; if (si->getDocStoreOffset() != -1) { if (si->getDocStoreIsCompoundFile()) { BOOST_ASSERT(!storeCFSReader); storeCFSReader = newLucene(dir, si->getDocStoreSegment() + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION(), readBufferSize); storeDir = storeCFSReader; BOOST_ASSERT(storeDir); } else { storeDir = dir; BOOST_ASSERT(storeDir); } } else if (si->getUseCompoundFile()) { // In some cases, we were originally opened when CFS was not used, but then we are asked to open doc // stores after the segment has switched to CFS if (!cfsReader) cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); storeDir = cfsReader; BOOST_ASSERT(storeDir); } else { storeDir = dir; BOOST_ASSERT(storeDir); } String storesSegment(si->getDocStoreOffset() != -1 ? si->getDocStoreSegment() : segment); fieldsReaderOrig = newLucene(storeDir, storesSegment, fieldInfos, readBufferSize, si->getDocStoreOffset(), si->docCount); // Verify two sources of "maxDoc" agree if (si->getDocStoreOffset() == -1 && fieldsReaderOrig->size() != si->docCount) { boost::throw_exception(CorruptIndexException(L"doc counts differ for segment " + segment + L": fieldsReader shows " + StringUtils::toString(fieldsReaderOrig->size()) + L" but segmentInfo shows " + StringUtils::toString(si->docCount))); } if (fieldInfos->hasVectors()) // open term vector files only as needed termVectorsReaderOrig = newLucene(storeDir, storesSegment, fieldInfos, readBufferSize, si->getDocStoreOffset(), si->docCount); } } FieldsReaderLocal::FieldsReaderLocal(SegmentReaderPtr reader) { this->_reader = reader; } FieldsReaderPtr FieldsReaderLocal::initialValue() { return boost::dynamic_pointer_cast(SegmentReaderPtr(_reader)->core->getFieldsReaderOrig()->clone()); } SegmentReaderRef::SegmentReaderRef() { _refCount = 1; } SegmentReaderRef::~SegmentReaderRef() { } String SegmentReaderRef::toString() { StringStream buffer; buffer << L"refcount: " << _refCount; return buffer.str(); } int32_t SegmentReaderRef::refCount() { SyncLock syncLock(this); return _refCount; } int32_t SegmentReaderRef::incRef() { SyncLock syncLock(this); BOOST_ASSERT(_refCount > 0); return ++_refCount; } int32_t SegmentReaderRef::decRef() { SyncLock syncLock(this); BOOST_ASSERT(_refCount > 0); return --_refCount; } Norm::Norm() { this->refCount = 1; this->normSeek = 0; this->dirty = false; this->rollbackDirty = false; this->number = 0; } Norm::Norm(SegmentReaderPtr reader, IndexInputPtr in, int32_t number, int64_t normSeek) { this->_reader = reader; this->refCount = 1; this->dirty = false; this->rollbackDirty = false; this->in = in; this->number = number; this->normSeek = normSeek; } Norm::~Norm() { } void Norm::incRef() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); ++refCount; } void Norm::closeInput() { SegmentReaderPtr reader(_reader.lock()); if (in && reader) { if (in != reader->singleNormStream) { // It's private to us -- just close it in->close(); } else { // We are sharing this with others -- decRef and maybe close the shared norm stream if (reader->singleNormRef->decRef() == 0) { reader->singleNormStream->close(); reader->singleNormStream.reset(); } } in.reset(); } } void Norm::decRef() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); if (--refCount == 0) { if (origNorm) { origNorm->decRef(); origNorm.reset(); } else closeInput(); if (origReader) origReader.reset(); if (_bytes) { BOOST_ASSERT(_bytesRef); _bytesRef->decRef(); _bytes.reset(); _bytesRef.reset(); } else { BOOST_ASSERT(!_bytesRef); } } } void Norm::bytes(uint8_t* bytesOut, int32_t offset, int32_t length) { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); if (_bytes) { // Already cached - copy from cache BOOST_ASSERT(length <= SegmentReaderPtr(_reader)->maxDoc()); MiscUtils::arrayCopy(_bytes.get(), 0, bytesOut, offset, length); } else { // Not cached if (origNorm) { // Ask origNorm to load origNorm->bytes(bytesOut, offset, length); } else { // We are orig - read ourselves from disk SyncLock instancesLock(in); in->seek(normSeek); in->readBytes(bytesOut, offset, length, false); } } } ByteArray Norm::bytes() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); if (!_bytes) // value not yet read { BOOST_ASSERT(!_bytesRef); if (origNorm) { // Ask origNorm to load so that for a series of reopened readers we share a single read-only byte[] _bytes = origNorm->bytes(); _bytesRef = origNorm->_bytesRef; _bytesRef->incRef(); // Once we've loaded the bytes we no longer need origNorm origNorm->decRef(); origNorm.reset(); origReader.reset(); } else { // We are the origNorm, so load the bytes for real ourself int32_t count = SegmentReaderPtr(_reader)->maxDoc(); _bytes = ByteArray::newInstance(count); // Since we are orig, in must not be null BOOST_ASSERT(in); // Read from disk. { SyncLock instancesLock(in); in->seek(normSeek); in->readBytes(_bytes.get(), 0, count, false); } _bytesRef = newLucene(); closeInput(); } } return _bytes; } SegmentReaderRefPtr Norm::bytesRef() { return _bytesRef; } ByteArray Norm::copyOnWrite() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); bytes(); BOOST_ASSERT(_bytes); BOOST_ASSERT(_bytesRef); if (_bytesRef->refCount() > 1) { // I cannot be the origNorm for another norm instance if I'm being changed. // ie, only the "head Norm" can be changed BOOST_ASSERT(refCount == 1); SegmentReaderRefPtr oldRef(_bytesRef); _bytes = SegmentReaderPtr(_reader)->cloneNormBytes(_bytes); _bytesRef = newLucene(); oldRef->decRef(); } dirty = true; return _bytes; } LuceneObjectPtr Norm::clone(LuceneObjectPtr other) { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); LuceneObjectPtr clone = other ? other : newLucene(); NormPtr cloneNorm(boost::dynamic_pointer_cast(clone)); cloneNorm->_reader = _reader; cloneNorm->origNorm = origNorm; cloneNorm->origReader = origReader; cloneNorm->normSeek = normSeek; cloneNorm->_bytesRef = _bytesRef; cloneNorm->_bytes = _bytes; cloneNorm->dirty = dirty; cloneNorm->number = number; cloneNorm->rollbackDirty = rollbackDirty; cloneNorm->refCount = 1; if (_bytes) { BOOST_ASSERT(_bytesRef); BOOST_ASSERT(!origNorm); // Clone holds a reference to my bytes cloneNorm->_bytesRef->incRef(); } else { BOOST_ASSERT(!_bytesRef); if (!origNorm) { // I become the origNorm for the clone cloneNorm->origNorm = shared_from_this(); cloneNorm->origReader = SegmentReaderPtr(_reader); } cloneNorm->origNorm->incRef(); } // Only the origNorm will actually readBytes from in cloneNorm->in.reset(); return cloneNorm; } void Norm::reWrite(SegmentInfoPtr si) { BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); // NOTE: norms are re-written in regular directory, not cfs si->advanceNormGen(this->number); String normFileName(si->getNormFileName(this->number)); SegmentReaderPtr reader(_reader); IndexOutputPtr out(reader->directory()->createOutput(normFileName)); bool success = false; LuceneException finally; try { try { out->writeBytes(_bytes.get(), reader->maxDoc()); } catch (LuceneException& e) { finally = e; } out->close(); finally.throwException(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { try { reader->directory()->deleteFile(normFileName); } catch (...) { // suppress this so we keep throwing the original exception } } finally.throwException(); this->dirty = false; } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentTermDocs.cpp000066400000000000000000000167221217574114600237200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermDocs.h" #include "SegmentReader.h" #include "_SegmentReader.h" #include "SegmentTermEnum.h" #include "IndexInput.h" #include "TermInfosReader.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "Term.h" #include "TermInfo.h" #include "DefaultSkipListReader.h" #include "BitVector.h" #include "MiscUtils.h" namespace Lucene { SegmentTermDocs::SegmentTermDocs(SegmentReaderPtr parent) { this->_parent = parent; this->count = 0; this->df = 0; this->_doc = 0; this->_freq = 0; this->freqBasePointer = 0; this->proxBasePointer = 0; this->skipPointer = 0; this->haveSkipped = false; this->currentFieldStoresPayloads = false; this->currentFieldOmitTermFreqAndPositions = false; this->_freqStream = boost::dynamic_pointer_cast(parent->core->freqStream->clone()); { SyncLock parentLock(parent); this->deletedDocs = parent->deletedDocs; } this->skipInterval = parent->core->getTermsReader()->getSkipInterval(); this->maxSkipLevels = parent->core->getTermsReader()->getMaxSkipLevels(); } SegmentTermDocs::~SegmentTermDocs() { } void SegmentTermDocs::seek(TermPtr term) { TermInfoPtr ti(SegmentReaderPtr(_parent)->core->getTermsReader()->get(term)); seek(ti, term); } void SegmentTermDocs::seek(TermEnumPtr termEnum) { TermInfoPtr ti; TermPtr term; SegmentTermEnumPtr segmentTermEnum(boost::dynamic_pointer_cast(termEnum)); SegmentReaderPtr parent(_parent); // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs if (segmentTermEnum && segmentTermEnum->fieldInfos == parent->core->fieldInfos) // optimized case { term = segmentTermEnum->term(); ti = segmentTermEnum->termInfo(); } else // punt case { term = termEnum->term(); ti = parent->core->getTermsReader()->get(term); } seek(ti, term); } void SegmentTermDocs::seek(TermInfoPtr ti, TermPtr term) { count = 0; FieldInfoPtr fi(SegmentReaderPtr(_parent)->core->fieldInfos->fieldInfo(term->_field)); currentFieldOmitTermFreqAndPositions = fi ? fi->omitTermFreqAndPositions : false; currentFieldStoresPayloads = fi ? fi->storePayloads : false; if (!ti) df = 0; else { df = ti->docFreq; _doc = 0; freqBasePointer = ti->freqPointer; proxBasePointer = ti->proxPointer; skipPointer = freqBasePointer + ti->skipOffset; _freqStream->seek(freqBasePointer); haveSkipped = false; } } void SegmentTermDocs::close() { _freqStream->close(); if (skipListReader) skipListReader->close(); } int32_t SegmentTermDocs::doc() { return _doc; } int32_t SegmentTermDocs::freq() { return _freq; } void SegmentTermDocs::skippingDoc() { } bool SegmentTermDocs::next() { while (true) { if (count == df) return false; int32_t docCode = _freqStream->readVInt(); if (currentFieldOmitTermFreqAndPositions) { _doc += docCode; _freq = 1; } else { _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit if ((docCode & 1) != 0) // if low bit is set _freq = 1; // freq is one else _freq = _freqStream->readVInt(); // else read freq } ++count; if (!deletedDocs || !deletedDocs->get(_doc)) break; skippingDoc(); } return true; } int32_t SegmentTermDocs::read(Collection docs, Collection freqs) { int32_t length = docs.size(); if (currentFieldOmitTermFreqAndPositions) return readNoTf(docs, freqs, length); else { int32_t i = 0; while (i < length && count < df) { // manually inlined call to next() for speed int32_t docCode = _freqStream->readVInt(); _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit if ((docCode & 1) != 0) // if low bit is set _freq = 1; // freq is one else _freq = _freqStream->readVInt(); // else read freq ++count; if (!deletedDocs || !deletedDocs->get(_doc)) { docs[i] = _doc; freqs[i] = _freq; ++i; } } return i; } } int32_t SegmentTermDocs::readNoTf(Collection docs, Collection freqs, int32_t length) { int32_t i = 0; while (i < length && count < df) { // manually inlined call to next() for speed _doc += _freqStream->readVInt(); ++count; if (!deletedDocs || !deletedDocs->get(_doc)) { docs[i] = _doc; // Hardware freq to 1 when term freqs were not stored in the index freqs[i] = 1; ++i; } } return i; } void SegmentTermDocs::skipProx(int64_t proxPointer, int32_t payloadLength) { } bool SegmentTermDocs::skipTo(int32_t target) { if (df >= skipInterval) // optimized case { if (!skipListReader) skipListReader = newLucene(boost::dynamic_pointer_cast(_freqStream->clone()), maxSkipLevels, skipInterval); // lazily clone if (!haveSkipped) // lazily initialize skip stream { skipListReader->init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads); haveSkipped = true; } int32_t newCount = skipListReader->skipTo(target); if (newCount > count) { _freqStream->seek(skipListReader->getFreqPointer()); skipProx(skipListReader->getProxPointer(), skipListReader->getPayloadLength()); _doc = skipListReader->getDoc(); count = newCount; } } // done skipping, now just scan do { if (!next()) return false; } while (target > _doc); return true; } IndexInputPtr SegmentTermDocs::freqStream() { return _freqStream; } void SegmentTermDocs::freqStream(IndexInputPtr freqStream) { _freqStream = freqStream; } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentTermEnum.cpp000066400000000000000000000160511217574114600237270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermEnum.h" #include "TermInfosWriter.h" #include "IndexInput.h" #include "TermBuffer.h" #include "TermInfo.h" #include "StringUtils.h" namespace Lucene { SegmentTermEnum::SegmentTermEnum() { format = 0; termBuffer = newLucene(); prevBuffer = newLucene(); scanBuffer = newLucene(); _termInfo = newLucene(); formatM1SkipInterval = 0; size = 0; position = -1; indexPointer = 0; indexInterval = 0; skipInterval = 0; maxSkipLevels = 0; isIndex = false; maxSkipLevels = 0; } SegmentTermEnum::SegmentTermEnum(IndexInputPtr i, FieldInfosPtr fis, bool isi) { format = 0; termBuffer = newLucene(); prevBuffer = newLucene(); scanBuffer = newLucene(); _termInfo = newLucene(); formatM1SkipInterval = 0; size = 0; position = -1; indexPointer = 0; indexInterval = 0; skipInterval = 0; maxSkipLevels = 0; input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int32_t firstInt = input->readInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = INT_MAX; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter::FORMAT_CURRENT) boost::throw_exception(CorruptIndexException(L"Unknown format version:" + StringUtils::toString(format) + L" expected " + StringUtils::toString(TermInfosWriter::FORMAT_CURRENT) + L" or higher")); size = input->readLong(); // read the size if (format == -1) { if (!isIndex) { indexInterval = input->readInt(); formatM1SkipInterval = input->readInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 skipInterval = INT_MAX; } else { indexInterval = input->readInt(); skipInterval = input->readInt(); if (format <= TermInfosWriter::FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input->readInt(); } } BOOST_ASSERT(indexInterval > 0); // must not be negative BOOST_ASSERT(skipInterval > 0); // must not be negative } if (format > TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer->setPreUTF8Strings(); scanBuffer->setPreUTF8Strings(); prevBuffer->setPreUTF8Strings(); } } SegmentTermEnum::~SegmentTermEnum() { } LuceneObjectPtr SegmentTermEnum::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); SegmentTermEnumPtr cloneEnum(boost::dynamic_pointer_cast(TermEnum::clone(clone))); cloneEnum->format = format; cloneEnum->isIndex = isIndex; cloneEnum->formatM1SkipInterval = formatM1SkipInterval; cloneEnum->fieldInfos = fieldInfos; cloneEnum->size = size; cloneEnum->position = position; cloneEnum->indexPointer = indexPointer; cloneEnum->indexInterval = indexInterval; cloneEnum->skipInterval = skipInterval; cloneEnum->maxSkipLevels = maxSkipLevels; cloneEnum->input = boost::dynamic_pointer_cast(input->clone()); cloneEnum->_termInfo = newLucene(_termInfo); cloneEnum->termBuffer = boost::dynamic_pointer_cast(termBuffer->clone()); cloneEnum->prevBuffer = boost::dynamic_pointer_cast(prevBuffer->clone()); cloneEnum->scanBuffer = newLucene(); return cloneEnum; } void SegmentTermEnum::seek(int64_t pointer, int64_t p, TermPtr t, TermInfoPtr ti) { input->seek(pointer); position = p; termBuffer->set(t); prevBuffer->reset(); _termInfo->set(ti); } bool SegmentTermEnum::next() { if (position++ >= size - 1) { prevBuffer->set(termBuffer); termBuffer->reset(); return false; } prevBuffer->set(termBuffer); termBuffer->read(input, fieldInfos); _termInfo->docFreq = input->readVInt(); // read doc freq _termInfo->freqPointer += input->readVLong(); // read freq pointer _termInfo->proxPointer += input->readVLong(); // read prox pointer if (format == -1) { // just read skipOffset in order to increment file pointer; value is never used // since skipTo is switched off if (!isIndex && _termInfo->docFreq > formatM1SkipInterval) _termInfo->skipOffset = input->readVInt(); } else if (_termInfo->docFreq >= skipInterval) _termInfo->skipOffset = input->readVInt(); if (isIndex) indexPointer += input->readVLong(); // read index pointer return true; } int32_t SegmentTermEnum::scanTo(TermPtr term) { scanBuffer->set(term); int32_t count = 0; while (scanBuffer->compareTo(termBuffer) > 0 && next()) ++count; return count; } TermPtr SegmentTermEnum::term() { return termBuffer->toTerm(); } TermPtr SegmentTermEnum::prev() { return prevBuffer->toTerm(); } TermInfoPtr SegmentTermEnum::termInfo() { return newLucene(_termInfo); } void SegmentTermEnum::termInfo(TermInfoPtr ti) { ti->set(_termInfo); } int32_t SegmentTermEnum::docFreq() { return _termInfo->docFreq; } int64_t SegmentTermEnum::freqPointer() { return _termInfo->freqPointer; } int64_t SegmentTermEnum::proxPointer() { return _termInfo->proxPointer; } void SegmentTermEnum::close() { input->close(); } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentTermPositionVector.cpp000066400000000000000000000037471217574114600260220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermPositionVector.h" #include "TermVectorOffsetInfo.h" namespace Lucene { SegmentTermPositionVector::SegmentTermPositionVector(const String& field, Collection terms, Collection termFreqs, Collection< Collection > positions, Collection< Collection > offsets) : SegmentTermVector(field, terms, termFreqs) { this->offsets = offsets; this->positions = positions; } SegmentTermPositionVector::~SegmentTermPositionVector() { } const Collection SegmentTermPositionVector::EMPTY_TERM_POS() { static Collection _EMPTY_TERM_POS; if (!_EMPTY_TERM_POS) _EMPTY_TERM_POS = Collection::newInstance(); return _EMPTY_TERM_POS; } Collection SegmentTermPositionVector::getOffsets(int32_t index) { Collection result(TermVectorOffsetInfo::EMPTY_OFFSET_INFO()); if (!offsets) return Collection(); if (index >=0 && index < offsets.size()) result = offsets[index]; return result; } Collection SegmentTermPositionVector::getTermPositions(int32_t index) { Collection result(EMPTY_TERM_POS()); if (!positions) return Collection(); if (index >= 0 && index < positions.size()) result = positions[index]; return result; } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentTermPositions.cpp000066400000000000000000000131621217574114600250120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermPositions.h" #include "SegmentReader.h" #include "_SegmentReader.h" #include "TermInfo.h" #include "IndexInput.h" #include "MiscUtils.h" namespace Lucene { SegmentTermPositions::SegmentTermPositions(SegmentReaderPtr parent) : SegmentTermDocs(parent) { this->proxCount = 0; this->position = 0; this->payloadLength = 0; this->needToLoadPayload = false; this->lazySkipPointer = -1; this->lazySkipProxCount = 0; } SegmentTermPositions::~SegmentTermPositions() { } void SegmentTermPositions::seek(TermInfoPtr ti, TermPtr term) { SegmentTermDocs::seek(ti, term); if (ti) lazySkipPointer = ti->proxPointer; lazySkipProxCount = 0; proxCount = 0; payloadLength = 0; needToLoadPayload = false; } void SegmentTermPositions::close() { SegmentTermDocs::close(); if (proxStream) proxStream->close(); } int32_t SegmentTermPositions::nextPosition() { if (currentFieldOmitTermFreqAndPositions) { // This field does not store term freq, positions, payloads return 0; } // perform lazy skips if necessary lazySkip(); --proxCount; position += readDeltaPosition(); return position; } int32_t SegmentTermPositions::readDeltaPosition() { int32_t delta = proxStream->readVInt(); if (currentFieldStoresPayloads) { // if the current field stores payloads then the position delta is shifted one bit to the left. // if the LSB is set, then we have to read the current payload length if ((delta & 1) != 0) payloadLength = proxStream->readVInt(); delta = MiscUtils::unsignedShift(delta, 1); needToLoadPayload = true; } return delta; } void SegmentTermPositions::skippingDoc() { // we remember to skip a document lazily lazySkipProxCount += _freq; } bool SegmentTermPositions::next() { // we remember to skip the remaining positions of the current document lazily lazySkipProxCount += proxCount; if (SegmentTermDocs::next()) { proxCount = _freq; // note frequency position = 0; // reset position return true; } return false; } int32_t SegmentTermPositions::read(Collection docs, Collection freqs) { boost::throw_exception(UnsupportedOperationException(L"TermPositions does not support processing multiple documents in one call. Use TermDocs instead.")); return 0; } void SegmentTermPositions::skipProx(int64_t proxPointer, int32_t payloadLength) { // we save the pointer, we might have to skip there lazily lazySkipPointer = proxPointer; lazySkipProxCount = 0; proxCount = 0; this->payloadLength = payloadLength; needToLoadPayload = false; } void SegmentTermPositions::skipPositions(int32_t n) { BOOST_ASSERT(!currentFieldOmitTermFreqAndPositions); for (int32_t i = n; i > 0; --i) // skip unread positions { readDeltaPosition(); skipPayload(); } } void SegmentTermPositions::skipPayload() { if (needToLoadPayload && payloadLength > 0) proxStream->seek(proxStream->getFilePointer() + payloadLength); needToLoadPayload = false; } void SegmentTermPositions::lazySkip() { if (!proxStream) { // clone lazily proxStream = boost::dynamic_pointer_cast(SegmentReaderPtr(_parent)->core->proxStream->clone()); } // we might have to skip the current payload if it was not read yet skipPayload(); if (lazySkipPointer != -1) { proxStream->seek(lazySkipPointer); lazySkipPointer = -1; } if (lazySkipProxCount != 0) { skipPositions(lazySkipProxCount); lazySkipProxCount = 0; } } int32_t SegmentTermPositions::getPayloadLength() { return payloadLength; } ByteArray SegmentTermPositions::getPayload(ByteArray data, int32_t offset) { if (!needToLoadPayload) boost::throw_exception(IOException(L"Either no payload exists at this term position or an attempt was made to load it more than once.")); // read payloads lazily ByteArray retArray; int32_t retOffset = 0; if (!data || data.size() - offset < payloadLength) { // the array is too small to store the payload data, so we allocate a new one retArray = ByteArray::newInstance(payloadLength); retOffset = 0; } else { retArray = data; retOffset = offset; } proxStream->readBytes(retArray.get(), retOffset, payloadLength); needToLoadPayload = false; return retArray; } bool SegmentTermPositions::isPayloadAvailable() { return (needToLoadPayload && payloadLength > 0); } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentTermVector.cpp000066400000000000000000000042101217574114600242570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermVector.h" namespace Lucene { SegmentTermVector::SegmentTermVector(const String& field, Collection terms, Collection termFreqs) { this->field = field; this->terms = terms; this->termFreqs = termFreqs; } SegmentTermVector::~SegmentTermVector() { } String SegmentTermVector::getField() { return field; } String SegmentTermVector::toString() { StringStream segTermVector; segTermVector << L"{" << field; if (terms) { for (int32_t i = 0; i < terms.size(); ++i) { if (i > 0) segTermVector << L", "; segTermVector << terms[i] << L"/" << termFreqs[i]; } } segTermVector << L"}"; return segTermVector.str(); } int32_t SegmentTermVector::size() { return terms ? terms.size() : 0; } Collection SegmentTermVector::getTerms() { return terms; } Collection SegmentTermVector::getTermFrequencies() { return termFreqs; } int32_t SegmentTermVector::indexOf(const String& term) { if (!terms) return -1; Collection::iterator search = std::lower_bound(terms.begin(), terms.end(), term); return (search == terms.end() || term < *search) ? -1 : std::distance(terms.begin(), search); } Collection SegmentTermVector::indexesOf(Collection termNumbers, int32_t start, int32_t length) { Collection res(Collection::newInstance(length)); for (int32_t i = 0; i < length; ++i) res[i] = indexOf(termNumbers[start + i]); return res; } } LucenePlusPlus-rel_3.0.4/src/core/index/SegmentWriteState.cpp000066400000000000000000000024071217574114600242660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentWriteState.h" namespace Lucene { SegmentWriteState::SegmentWriteState(DocumentsWriterPtr docWriter, DirectoryPtr directory, const String& segmentName, const String& docStoreSegmentName, int32_t numDocs, int32_t numDocsInStore, int32_t termIndexInterval) { this->_docWriter = docWriter; this->directory = directory; this->segmentName = segmentName; this->docStoreSegmentName = docStoreSegmentName; this->numDocs = numDocs; this->numDocsInStore = numDocsInStore; this->termIndexInterval = termIndexInterval; this->flushedFiles = HashSet::newInstance(); } SegmentWriteState::~SegmentWriteState() { } String SegmentWriteState::segmentFileName(const String& ext) { return segmentName + L"." + ext; } } LucenePlusPlus-rel_3.0.4/src/core/index/SerialMergeScheduler.cpp000066400000000000000000000015201217574114600247010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SerialMergeScheduler.h" #include "IndexWriter.h" namespace Lucene { SerialMergeScheduler::~SerialMergeScheduler() { } void SerialMergeScheduler::merge(IndexWriterPtr writer) { SyncLock syncLock(this); while (true) { OneMergePtr merge(writer->getNextMerge()); if (!merge) break; writer->merge(merge); } } void SerialMergeScheduler::close() { } } LucenePlusPlus-rel_3.0.4/src/core/index/SnapshotDeletionPolicy.cpp000066400000000000000000000073461217574114600253220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SnapshotDeletionPolicy.h" #include "_SnapshotDeletionPolicy.h" namespace Lucene { SnapshotDeletionPolicy::SnapshotDeletionPolicy(IndexDeletionPolicyPtr primary) { this->primary = primary; } SnapshotDeletionPolicy::~SnapshotDeletionPolicy() { } void SnapshotDeletionPolicy::onInit(Collection commits) { SyncLock syncLock(this); primary->onInit(wrapCommits(commits)); lastCommit = commits[commits.size() - 1]; } void SnapshotDeletionPolicy::onCommit(Collection commits) { SyncLock syncLock(this); primary->onCommit(wrapCommits(commits)); lastCommit = commits[commits.size() - 1]; } IndexCommitPtr SnapshotDeletionPolicy::snapshot() { SyncLock syncLock(this); if (!lastCommit) boost::throw_exception(IllegalStateException(L"no index commits to snapshot")); if (_snapshot.empty()) _snapshot = lastCommit->getSegmentsFileName(); else boost::throw_exception(IllegalStateException(L"snapshot is already set; please call release() first")); return lastCommit; } void SnapshotDeletionPolicy::release() { SyncLock syncLock(this); if (!_snapshot.empty()) _snapshot.clear(); else boost::throw_exception(IllegalStateException(L"snapshot was not set; please call snapshot() first")); } Collection SnapshotDeletionPolicy::wrapCommits(Collection commits) { Collection myCommits(Collection::newInstance()); for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) myCommits.add(newLucene(shared_from_this(), *commit)); return myCommits; } MyCommitPoint::MyCommitPoint(SnapshotDeletionPolicyPtr deletionPolicy, IndexCommitPtr cp) { this->_deletionPolicy = deletionPolicy; this->cp = cp; } MyCommitPoint::~MyCommitPoint() { } String MyCommitPoint::toString() { return L"SnapshotDeletionPolicy.SnapshotCommitPoint(" + cp->toString() + L")"; } String MyCommitPoint::getSegmentsFileName() { return cp->getSegmentsFileName(); } HashSet MyCommitPoint::getFileNames() { return cp->getFileNames(); } DirectoryPtr MyCommitPoint::getDirectory() { return cp->getDirectory(); } void MyCommitPoint::deleteCommit() { SnapshotDeletionPolicyPtr deletionPolicy(_deletionPolicy); SyncLock policyLock(deletionPolicy); // Suppress the delete request if this commit point is our current snapshot. if (deletionPolicy->_snapshot.empty() || deletionPolicy->_snapshot != getSegmentsFileName()) cp->deleteCommit(); } bool MyCommitPoint::isDeleted() { return cp->isDeleted(); } int64_t MyCommitPoint::getVersion() { return cp->getVersion(); } int64_t MyCommitPoint::getGeneration() { return cp->getGeneration(); } MapStringString MyCommitPoint::getUserData() { return cp->getUserData(); } bool MyCommitPoint::isOptimized() { return cp->isOptimized(); } } LucenePlusPlus-rel_3.0.4/src/core/index/SortedTermVectorMapper.cpp000066400000000000000000000101151217574114600252630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SortedTermVectorMapper.h" #include "TermVectorEntry.h" namespace Lucene { const wchar_t* SortedTermVectorMapper::ALL = L"_ALL_"; SortedTermVectorMapper::SortedTermVectorMapper(TermVectorEntryComparator comparator) : TermVectorMapper(false, false) { this->storeOffsets = false; this->storePositions = false; this->comparator = comparator; this->currentSet = Collection::newInstance(); this->termToTVE = MapStringTermVectorEntry::newInstance(); } SortedTermVectorMapper::SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator) : TermVectorMapper(ignoringPositions, ignoringPositions) { this->storeOffsets = false; this->storePositions = false; this->comparator = comparator; this->currentSet = Collection::newInstance(); this->termToTVE = MapStringTermVectorEntry::newInstance(); } SortedTermVectorMapper::~SortedTermVectorMapper() { } void SortedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { // We need to combine any previous mentions of the term TermVectorEntryPtr entry(termToTVE.get(term)); if (!entry) { entry = newLucene(ALL, term, frequency, storeOffsets ? offsets : Collection(), storePositions ? positions : Collection()); termToTVE.put(term, entry); if (!currentSet.contains_if(luceneEqualTo(entry))) currentSet.insert(std::upper_bound(currentSet.begin(), currentSet.end(), entry, comparator), entry); } else { entry->setFrequency(entry->getFrequency() + frequency); if (storeOffsets) { Collection existingOffsets(entry->getOffsets()); // A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions if (existingOffsets && offsets && !offsets.empty()) { // copy over the existing offsets Collection newOffsets(Collection::newInstance(existingOffsets.begin(), existingOffsets.end())); newOffsets.addAll(offsets.begin(), offsets.end()); entry->setOffsets(newOffsets); } else if (!existingOffsets && offsets && !offsets.empty()) entry->setOffsets(offsets); // else leave it alone } if (storePositions) { Collection existingPositions(entry->getPositions()); if (existingPositions && positions && !positions.empty()) { Collection newPositions(existingPositions); newPositions.addAll(positions.begin(), positions.end()); entry->setPositions(newPositions); } else if (!existingPositions && positions && !positions.empty()) entry->setPositions(positions); // else leave it alone } } } void SortedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { this->storeOffsets = storeOffsets; this->storePositions = storePositions; } Collection SortedTermVectorMapper::getTermVectorEntrySet() { return currentSet; } } LucenePlusPlus-rel_3.0.4/src/core/index/StoredFieldsWriter.cpp000066400000000000000000000173331217574114600244400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StoredFieldsWriter.h" #include "StoredFieldsWriterPerThread.h" #include "RAMOutputStream.h" #include "SegmentWriteState.h" #include "FieldsWriter.h" #include "IndexFileNames.h" #include "IndexWriter.h" #include "Directory.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { StoredFieldsWriter::StoredFieldsWriter(DocumentsWriterPtr docWriter, FieldInfosPtr fieldInfos) { lastDocID = 0; docFreeList = Collection::newInstance(1); freeCount = 0; allocCount = 0; this->_docWriter = docWriter; this->fieldInfos = fieldInfos; } StoredFieldsWriter::~StoredFieldsWriter() { } StoredFieldsWriterPerThreadPtr StoredFieldsWriter::addThread(DocStatePtr docState) { return newLucene(docState, shared_from_this()); } void StoredFieldsWriter::flush(SegmentWriteStatePtr state) { SyncLock syncLock(this); if (state->numDocsInStore > 0) { // It's possible that all documents seen in this segment hit non-aborting exceptions, // in which case we will not have yet init'd the FieldsWriter initFieldsWriter(); // Fill fdx file to include any final docs that we skipped because they hit non-aborting // exceptions fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); } if (fieldsWriter) fieldsWriter->flush(); } void StoredFieldsWriter::initFieldsWriter() { if (!fieldsWriter) { DocumentsWriterPtr docWriter(_docWriter); String docStoreSegment(docWriter->getDocStoreSegment()); if (!docStoreSegment.empty()) { fieldsWriter = newLucene(docWriter->directory, docStoreSegment, fieldInfos); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::FIELDS_EXTENSION()); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); lastDocID = 0; } } } void StoredFieldsWriter::closeDocStore(SegmentWriteStatePtr state) { SyncLock syncLock(this); int32_t inc = state->numDocsInStore - lastDocID; if (inc > 0) { initFieldsWriter(); fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); } if (fieldsWriter) { fieldsWriter->close(); fieldsWriter.reset(); lastDocID = 0; BOOST_ASSERT(!state->docStoreSegmentName.empty()); state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_EXTENSION()); state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); DocumentsWriterPtr docWriter(state->_docWriter); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_EXTENSION()); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); String fileName(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); if (4 + ((int64_t)state->numDocsInStore) * 8 != state->directory->fileLength(fileName)) { boost::throw_exception(RuntimeException(L"after flush: fdx size mismatch: " + StringUtils::toString(state->numDocsInStore) + L" docs vs " + StringUtils::toString(state->directory->fileLength(fileName)) + L" length in bytes of " + fileName + L" file exists?=" + StringUtils::toString(state->directory->fileExists(fileName)))); } } } StoredFieldsWriterPerDocPtr StoredFieldsWriter::getPerDoc() { SyncLock syncLock(this); if (freeCount == 0) { ++allocCount; if (allocCount > docFreeList.size()) { // Grow our free list up front to make sure we have enough space to recycle all // outstanding StoredFieldsWriterPerDoc instances BOOST_ASSERT(allocCount == docFreeList.size() + 1); docFreeList.resize(MiscUtils::getNextSize(allocCount)); } return newLucene(shared_from_this()); } else return docFreeList[--freeCount]; } void StoredFieldsWriter::abort() { SyncLock syncLock(this); if (fieldsWriter) { try { fieldsWriter->close(); } catch (...) { } fieldsWriter.reset(); lastDocID = 0; } } void StoredFieldsWriter::fill(int32_t docID) { int32_t docStoreOffset = DocumentsWriterPtr(_docWriter)->getDocStoreOffset(); // We must "catch up" for all docs before us that had no stored fields int32_t end = docID + docStoreOffset; while (lastDocID < end) { fieldsWriter->skipDocument(); ++lastDocID; } } void StoredFieldsWriter::finishDocument(StoredFieldsWriterPerDocPtr perDoc) { SyncLock syncLock(this); IndexWriterPtr writer(DocumentsWriterPtr(_docWriter)->_writer); BOOST_ASSERT(writer->testPoint(L"StoredFieldsWriter.finishDocument start")); initFieldsWriter(); fill(perDoc->docID); // Append stored fields to the real FieldsWriter fieldsWriter->flushDocument(perDoc->numStoredFields, perDoc->fdt); ++lastDocID; perDoc->reset(); free(perDoc); BOOST_ASSERT(writer->testPoint(L"StoredFieldsWriter.finishDocument end")); } bool StoredFieldsWriter::freeRAM() { return false; } void StoredFieldsWriter::free(StoredFieldsWriterPerDocPtr perDoc) { SyncLock syncLock(this); BOOST_ASSERT(freeCount < docFreeList.size()); BOOST_ASSERT(perDoc->numStoredFields == 0); BOOST_ASSERT(perDoc->fdt->length() == 0); BOOST_ASSERT(perDoc->fdt->getFilePointer() == 0); docFreeList[freeCount++] = perDoc; } StoredFieldsWriterPerDoc::StoredFieldsWriterPerDoc(StoredFieldsWriterPtr fieldsWriter) { this->_fieldsWriter = fieldsWriter; buffer = DocumentsWriterPtr(fieldsWriter->_docWriter)->newPerDocBuffer(); fdt = newLucene(buffer); numStoredFields = 0; } StoredFieldsWriterPerDoc::~StoredFieldsWriterPerDoc() { } void StoredFieldsWriterPerDoc::reset() { fdt->reset(); buffer->recycle(); numStoredFields = 0; } void StoredFieldsWriterPerDoc::abort() { reset(); StoredFieldsWriterPtr(_fieldsWriter)->free(shared_from_this()); } int64_t StoredFieldsWriterPerDoc::sizeInBytes() { return buffer->getSizeInBytes(); } void StoredFieldsWriterPerDoc::finish() { StoredFieldsWriterPtr(_fieldsWriter)->finishDocument(shared_from_this()); } } LucenePlusPlus-rel_3.0.4/src/core/index/StoredFieldsWriterPerThread.cpp000066400000000000000000000044601217574114600262340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StoredFieldsWriterPerThread.h" #include "StoredFieldsWriter.h" #include "FieldsWriter.h" #include "RAMOutputStream.h" namespace Lucene { StoredFieldsWriterPerThread::StoredFieldsWriterPerThread(DocStatePtr docState, StoredFieldsWriterPtr storedFieldsWriter) { this->_storedFieldsWriter = storedFieldsWriter; this->docState = docState; localFieldsWriter = newLucene(IndexOutputPtr(), IndexOutputPtr(), storedFieldsWriter->fieldInfos); } StoredFieldsWriterPerThread::~StoredFieldsWriterPerThread() { } void StoredFieldsWriterPerThread::startDocument() { if (doc) { // Only happens if previous document hit non-aborting exception while writing stored fields // into localFieldsWriter doc->reset(); doc->docID = docState->docID; } } void StoredFieldsWriterPerThread::addField(FieldablePtr field, FieldInfoPtr fieldInfo) { if (!doc) { doc = StoredFieldsWriterPtr(_storedFieldsWriter)->getPerDoc(); doc->docID = docState->docID; localFieldsWriter->setFieldsStream(doc->fdt); BOOST_ASSERT(doc->numStoredFields == 0); BOOST_ASSERT(doc->fdt->length() == 0); BOOST_ASSERT(doc->fdt->getFilePointer() == 0); } localFieldsWriter->writeField(fieldInfo, field); BOOST_ASSERT(docState->testPoint(L"StoredFieldsWriterPerThread.processFields.writeField")); ++doc->numStoredFields; } DocWriterPtr StoredFieldsWriterPerThread::finishDocument() { // If there were any stored fields in this doc, doc will be non-null; else it's null. DocWriterPtr finishDoc(doc); doc.reset(); return finishDoc; } void StoredFieldsWriterPerThread::abort() { if (doc) { doc->abort(); doc.reset(); } } } LucenePlusPlus-rel_3.0.4/src/core/index/Term.cpp000066400000000000000000000040241217574114600215540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Term.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { Term::Term(const String& fld, const String& txt) : _field(fld), _text(txt) { } Term::~Term() { } String Term::field() { return _field; } String Term::text() { return _text; } TermPtr Term::createTerm(const String& text) { return newLucene(_field, text); } bool Term::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!other) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; TermPtr otherTerm(boost::dynamic_pointer_cast(other)); if (!otherTerm) return false; return (_field == otherTerm->_field && _text == otherTerm->_text); } int32_t Term::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + (_field.empty() ? 0 : StringUtils::hashCode(_field)); result = prime * result + (_text.empty() ? 0 : StringUtils::hashCode(_text)); return result; } int32_t Term::compareTo(LuceneObjectPtr other) { TermPtr otherTerm(boost::static_pointer_cast(other)); if (_field == otherTerm->_field) return _text.compare(otherTerm->_text); else return _field.compare(otherTerm->_field); } void Term::set(const String& fld, const String& txt) { _field = fld; _text = txt; } String Term::toString() { return _field + L":" + _text; } } LucenePlusPlus-rel_3.0.4/src/core/index/TermBuffer.cpp000066400000000000000000000074461217574114600227210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermBuffer.h" #include "IndexInput.h" #include "FieldInfos.h" #include "Term.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { TermBuffer::TermBuffer() { preUTF8Strings = false; text = newLucene(); bytes = newLucene(); } TermBuffer::~TermBuffer() { } int32_t TermBuffer::compareTo(LuceneObjectPtr other) { TermBufferPtr otherTermBuffer(boost::static_pointer_cast(other)); if (field == otherTermBuffer->field) return compareChars(text->result.get(), text->length, otherTermBuffer->text->result.get(), otherTermBuffer->text->length); else return field.compare(otherTermBuffer->field); } int32_t TermBuffer::compareChars(wchar_t* chars1, int32_t len1, wchar_t* chars2, int32_t len2) { int32_t end = len1 < len2 ? len1 : len2; for (int32_t k = 0; k < end; ++k) { wchar_t c1 = chars1[k]; wchar_t c2 = chars2[k]; if (c1 != c2) return c1 - c2; } return len1 - len2; } void TermBuffer::setPreUTF8Strings() { preUTF8Strings = true; } void TermBuffer::read(IndexInputPtr input, FieldInfosPtr fieldInfos) { this->term.reset(); // invalidate cache int32_t start = input->readVInt(); int32_t length = input->readVInt(); int32_t totalLength = start + length; if (preUTF8Strings) text->setLength(start + input->readChars(text->result.get(), start, length)); else { StringUtils::toUTF8(text->result.get(), text->length, bytes); bytes->setLength(totalLength); input->readBytes(bytes->result.get(), start, length); StringUtils::toUnicode(bytes->result.get(), totalLength, text); } this->field = fieldInfos->fieldName(input->readVInt()); } void TermBuffer::set(TermPtr term) { if (!term) { reset(); return; } String termText(term->text()); int32_t termLen = termText.length(); text->setLength(termLen); MiscUtils::arrayCopy(termText.begin(), 0, text->result.get(), 0, termLen); field = term->field(); this->term = term; } void TermBuffer::set(TermBufferPtr other) { text->copyText(other->text); field = other->field; term = other->term; } void TermBuffer::reset() { field.clear(); text->setLength(0); term.reset(); } TermPtr TermBuffer::toTerm() { if (field.empty()) // unset return TermPtr(); if (!term) term = newLucene(field, String(text->result.get(), text->length)); return term; } LuceneObjectPtr TermBuffer::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); TermBufferPtr cloneBuffer(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneBuffer->field = field; cloneBuffer->term = term; cloneBuffer->preUTF8Strings = preUTF8Strings; cloneBuffer->bytes = newLucene(); cloneBuffer->text = newLucene(); cloneBuffer->text->copyText(text); return cloneBuffer; } } LucenePlusPlus-rel_3.0.4/src/core/index/TermDocs.cpp000066400000000000000000000025011217574114600223630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermDocs.h" namespace Lucene { TermDocs::TermDocs() { } void TermDocs::seek(TermPtr term) { BOOST_ASSERT(false); // override } void TermDocs::seek(TermEnumPtr termEnum) { BOOST_ASSERT(false); // override } int32_t TermDocs::doc() { BOOST_ASSERT(false); return 0; // override } int32_t TermDocs::freq() { BOOST_ASSERT(false); return 0; // override } bool TermDocs::next() { BOOST_ASSERT(false); return false; // override } int32_t TermDocs::read(Collection docs, Collection freqs) { BOOST_ASSERT(false); return 0; // override } bool TermDocs::skipTo(int32_t target) { BOOST_ASSERT(false); return false; // override } void TermDocs::close() { BOOST_ASSERT(false); // override } } LucenePlusPlus-rel_3.0.4/src/core/index/TermEnum.cpp000066400000000000000000000006741217574114600224100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermEnum.h" namespace Lucene { TermEnum::~TermEnum() { } } LucenePlusPlus-rel_3.0.4/src/core/index/TermFreqVector.cpp000066400000000000000000000025441217574114600235620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermFreqVector.h" namespace Lucene { TermFreqVector::TermFreqVector() { } TermFreqVector::~TermFreqVector() { } String TermFreqVector::getField() { BOOST_ASSERT(false); return L""; // override } int32_t TermFreqVector::size() { BOOST_ASSERT(false); return 0; // override } Collection TermFreqVector::getTerms() { BOOST_ASSERT(false); return Collection(); // override } Collection TermFreqVector::getTermFrequencies() { BOOST_ASSERT(false); return Collection(); // override } int32_t TermFreqVector::indexOf(const String& term) { BOOST_ASSERT(false); return 0; // override } Collection TermFreqVector::indexesOf(Collection terms, int32_t start, int32_t length) { BOOST_ASSERT(false); return Collection(); // override } } LucenePlusPlus-rel_3.0.4/src/core/index/TermInfo.cpp000066400000000000000000000022161217574114600223710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermInfo.h" namespace Lucene { TermInfo::TermInfo(TermInfoPtr ti) { set(ti); } TermInfo::TermInfo(int32_t df, int64_t fp, int64_t pp) { docFreq = df; freqPointer = fp; proxPointer = pp; skipOffset = 0; } TermInfo::~TermInfo() { } void TermInfo::set(int32_t docFreq, int64_t freqPointer, int64_t proxPointer, int32_t skipOffset) { this->docFreq = docFreq; this->freqPointer = freqPointer; this->proxPointer = proxPointer; this->skipOffset = skipOffset; } void TermInfo::set(TermInfoPtr ti) { docFreq = ti->docFreq; freqPointer = ti->freqPointer; proxPointer = ti->proxPointer; skipOffset = ti->skipOffset; } } LucenePlusPlus-rel_3.0.4/src/core/index/TermInfosReader.cpp000066400000000000000000000212161217574114600237000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermInfosReader.h" #include "SegmentTermEnum.h" #include "Directory.h" #include "IndexFileNames.h" #include "Term.h" #include "StringUtils.h" namespace Lucene { const int32_t TermInfosReader::DEFAULT_CACHE_SIZE = 1024; TermInfosReader::TermInfosReader(DirectoryPtr dir, const String& seg, FieldInfosPtr fis, int32_t readBufferSize, int32_t indexDivisor) { bool success = false; if (indexDivisor < 1 && indexDivisor != -1) boost::throw_exception(IllegalArgumentException(L"indexDivisor must be -1 (don't load terms index) or greater than 0: got " + StringUtils::toString(indexDivisor))); LuceneException finally; try { directory = dir; segment = seg; fieldInfos = fis; origEnum = newLucene(directory->openInput(segment + L"." + IndexFileNames::TERMS_EXTENSION(), readBufferSize), fieldInfos, false); _size = origEnum->size; if (indexDivisor != -1) { // Load terms index totalIndexInterval = origEnum->indexInterval * indexDivisor; SegmentTermEnumPtr indexEnum(newLucene(directory->openInput(segment + L"." + IndexFileNames::TERMS_INDEX_EXTENSION(), readBufferSize), fieldInfos, true)); try { int32_t indexSize = 1 + ((int32_t)indexEnum->size - 1) / indexDivisor; // otherwise read index indexTerms = Collection::newInstance(indexSize); indexInfos = Collection::newInstance(indexSize); indexPointers = Collection::newInstance(indexSize); for (int32_t i = 0; indexEnum->next(); ++i) { indexTerms[i] = indexEnum->term(); indexInfos[i] = indexEnum->termInfo(); indexPointers[i] = indexEnum->indexPointer; for (int32_t j = 1; j < indexDivisor; ++j) { if (!indexEnum->next()) break; } } } catch (LuceneException& e) { finally = e; } indexEnum->close(); } else { // Do not load terms index totalIndexInterval = -1; } success = true; } catch (LuceneException& e) { finally = e; } // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. // In this case, we want to explicitly close any subset of things that were opened. if (!success) close(); finally.throwException(); } TermInfosReader::~TermInfosReader() { } int32_t TermInfosReader::getMaxSkipLevels() { return origEnum->maxSkipLevels; } int32_t TermInfosReader::getSkipInterval() { return origEnum->skipInterval; } void TermInfosReader::close() { if (origEnum) origEnum->close(); threadResources.close(); } int64_t TermInfosReader::size() { return _size; } TermInfosReaderThreadResourcesPtr TermInfosReader::getThreadResources() { TermInfosReaderThreadResourcesPtr resources(threadResources.get()); if (!resources) { resources = newLucene(); resources->termEnum = terms(); // Cache does not have to be thread-safe, it is only used by one thread at the same time resources->termInfoCache = newInstance(DEFAULT_CACHE_SIZE); threadResources.set(resources); } return resources; } int32_t TermInfosReader::getIndexOffset(TermPtr term) { // binary search indexTerms Collection::iterator indexTerm = std::upper_bound(indexTerms.begin(), indexTerms.end(), term, luceneCompare()); return (std::distance(indexTerms.begin(), indexTerm) - 1); } void TermInfosReader::seekEnum(SegmentTermEnumPtr enumerator, int32_t indexOffset) { enumerator->seek(indexPointers[indexOffset], ((int64_t)indexOffset * (int64_t)totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]); } TermInfoPtr TermInfosReader::get(TermPtr term) { return get(term, true); } TermInfoPtr TermInfosReader::get(TermPtr term, bool useCache) { if (_size == 0) return TermInfoPtr(); ensureIndexIsRead(); TermInfoPtr ti; TermInfosReaderThreadResourcesPtr resources(getThreadResources()); TermInfoCachePtr cache; if (useCache) { cache = resources->termInfoCache; // check the cache first if the term was recently looked up ti = cache->get(term); if (ti) return ti; } // optimize sequential access: first try scanning cached enum without seeking SegmentTermEnumPtr enumerator = resources->termEnum; if (enumerator->term() && // term is at or past current ((enumerator->prev() && term->compareTo(enumerator->prev()) > 0) || term->compareTo(enumerator->term()) >= 0)) { int32_t enumOffset = (int32_t)(enumerator->position / totalIndexInterval ) + 1; if (indexTerms.size() == enumOffset || // but before end of block term->compareTo(indexTerms[enumOffset]) < 0) { // no need to seek int32_t numScans = enumerator->scanTo(term); if (enumerator->term() && term->compareTo(enumerator->term()) == 0) { ti = enumerator->termInfo(); if (cache && numScans > 1) { // we only want to put this TermInfo into the cache if scanEnum skipped more // than one dictionary entry. This prevents RangeQueries or WildcardQueries to // wipe out the cache when they iterate over a large numbers of terms in order. cache->put(term, ti); } } else ti.reset(); return ti; } } // random-access: must seek seekEnum(enumerator, getIndexOffset(term)); enumerator->scanTo(term); if (enumerator->term() && term->compareTo(enumerator->term()) == 0) { ti = enumerator->termInfo(); if (cache) cache->put(term, ti); } else ti.reset(); return ti; } void TermInfosReader::ensureIndexIsRead() { if (!indexTerms) boost::throw_exception(IllegalStateException(L"terms index was not loaded when this reader was created")); } int64_t TermInfosReader::getPosition(TermPtr term) { if (_size == 0) return -1; ensureIndexIsRead(); int32_t indexOffset = getIndexOffset(term); SegmentTermEnumPtr enumerator(getThreadResources()->termEnum); seekEnum(enumerator, indexOffset); while (term->compareTo(enumerator->term()) > 0 && enumerator->next()) { } return term->compareTo(enumerator->term()) == 0 ? enumerator->position : -1; } SegmentTermEnumPtr TermInfosReader::terms() { return boost::static_pointer_cast(origEnum->clone()); } SegmentTermEnumPtr TermInfosReader::terms(TermPtr term) { // don't use the cache in this call because we want to reposition the enumeration get(term, false); return boost::static_pointer_cast(getThreadResources()->termEnum->clone()); } TermInfosReaderThreadResources::~TermInfosReaderThreadResources() { } } LucenePlusPlus-rel_3.0.4/src/core/index/TermInfosWriter.cpp000066400000000000000000000155441217574114600237610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermInfosWriter.h" #include "Directory.h" #include "IndexOutput.h" #include "Term.h" #include "TermInfo.h" #include "FieldInfos.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { /// The file format version, a negative number. const int32_t TermInfosWriter::FORMAT = -3; /// Changed strings to true utf8 with length-in-bytes not length-in-chars. const int32_t TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4; /// NOTE: always change this if you switch to a new format. const int32_t TermInfosWriter::FORMAT_CURRENT = TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; TermInfosWriter::TermInfosWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval) { initialize(directory, segment, fis, interval, false); otherWriter = newLucene(directory, segment, fis, interval, true); } TermInfosWriter::TermInfosWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval, bool isIndex) { initialize(directory, segment, fis, interval, isIndex); } TermInfosWriter::~TermInfosWriter() { } void TermInfosWriter::initialize() { if (otherWriter) { _other = otherWriter; otherWriter->_other = shared_from_this(); } } void TermInfosWriter::initialize(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval, bool isi) { lastTi = newLucene(); utf8Result = newLucene(); lastTermBytes = ByteArray::newInstance(10); lastTermBytesLength = 0; lastFieldNumber = -1; skipInterval = 16; maxSkipLevels = 10; size = 0; lastIndexPointer = 0; indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory->createOutput(segment + (isIndex ? L".tii" : L".tis")); output->writeInt(FORMAT_CURRENT); // write format output->writeLong(0); // leave space for size output->writeInt(indexInterval); // write indexInterval output->writeInt(skipInterval); // write skipInterval output->writeInt(maxSkipLevels); // write maxSkipLevels BOOST_ASSERT(initUnicodeResults()); } void TermInfosWriter::add(TermPtr term, TermInfoPtr ti) { StringUtils::toUTF8(term->_text.c_str(), term->_text.size(), utf8Result); add(fieldInfos->fieldNumber(term->_field), utf8Result->result, utf8Result->length, ti); } bool TermInfosWriter::initUnicodeResults() { unicodeResult1 = newLucene(); unicodeResult2 = newLucene(); return true; } int32_t TermInfosWriter::compareToLastTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength) { if (lastFieldNumber != fieldNumber) { int32_t cmp = fieldInfos->fieldName(lastFieldNumber).compare(fieldInfos->fieldName(fieldNumber)); // If there is a field named "" (empty string) then we will get 0 on this comparison, yet, it's "OK". // But it's not OK if two different field numbers map to the same name. if (cmp != 0 || lastFieldNumber != -1) return cmp; } StringUtils::toUnicode(lastTermBytes.get(), lastTermBytesLength, unicodeResult1); StringUtils::toUnicode(termBytes.get(), termBytesLength, unicodeResult2); int32_t len = std::min(unicodeResult1->length, unicodeResult2->length); for (int32_t i = 0; i < len; ++i) { wchar_t ch1 = unicodeResult1->result[i]; wchar_t ch2 = unicodeResult2->result[i]; if (ch1 != ch2) return (ch1 - ch2); } return (unicodeResult1->length - unicodeResult2->length); } void TermInfosWriter::add(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength, TermInfoPtr ti) { // terms out of order? BOOST_ASSERT(compareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 || (isIndex && termBytesLength == 0 && lastTermBytesLength == 0)); BOOST_ASSERT(ti->freqPointer >= lastTi->freqPointer); // freqPointer out of order? BOOST_ASSERT(ti->proxPointer >= lastTi->proxPointer); // proxPointer out of order? TermInfosWriterPtr other(_other); if (!isIndex && size % indexInterval == 0) other->add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term writeTerm(fieldNumber, termBytes, termBytesLength); // write term output->writeVInt(ti->docFreq); // write doc freq output->writeVLong(ti->freqPointer - lastTi->freqPointer); // write pointers output->writeVLong(ti->proxPointer - lastTi->proxPointer); if (ti->docFreq >= skipInterval) output->writeVInt(ti->skipOffset); if (isIndex) { output->writeVLong(other->output->getFilePointer() - lastIndexPointer); lastIndexPointer = other->output->getFilePointer(); // write pointer } lastFieldNumber = fieldNumber; lastTi->set(ti); ++size; } void TermInfosWriter::writeTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength) { // Compute prefix in common with last term int32_t start = 0; int32_t limit = std::min(termBytesLength, lastTermBytesLength); while (start < limit) { if (termBytes[start] != lastTermBytes[start]) break; ++start; } int32_t length = termBytesLength - start; output->writeVInt(start); // write shared prefix length output->writeVInt(length); // write delta length output->writeBytes(termBytes.get(), start, length); // write delta bytes output->writeVInt(fieldNumber); // write field num if (lastTermBytes.size() < termBytesLength) lastTermBytes.resize((int32_t)((double)termBytesLength * 1.5)); MiscUtils::arrayCopy(termBytes.get(), start, lastTermBytes.get(), start, length); lastTermBytesLength = termBytesLength; } void TermInfosWriter::close() { output->seek(4); // write size after format output->writeLong(size); output->close(); if (!isIndex) TermInfosWriterPtr(_other)->close(); } } LucenePlusPlus-rel_3.0.4/src/core/index/TermPositionVector.cpp000066400000000000000000000016121217574114600244640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermPositionVector.h" namespace Lucene { TermPositionVector::TermPositionVector() { } TermPositionVector::~TermPositionVector() { } Collection TermPositionVector::getTermPositions(int32_t index) { BOOST_ASSERT(false); return Collection(); // override } Collection TermPositionVector::getOffsets(int32_t index) { BOOST_ASSERT(false); return Collection(); // override } } LucenePlusPlus-rel_3.0.4/src/core/index/TermPositions.cpp000066400000000000000000000020121217574114600234570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermPositions.h" namespace Lucene { TermPositions::TermPositions() { } TermPositions::~TermPositions() { } int32_t TermPositions::nextPosition() { BOOST_ASSERT(false); return 0; // override } int32_t TermPositions::getPayloadLength() { BOOST_ASSERT(false); return 0; // override } ByteArray TermPositions::getPayload(ByteArray data, int32_t offset) { BOOST_ASSERT(false); return ByteArray(); // override } bool TermPositions::isPayloadAvailable() { BOOST_ASSERT(false); return false; // override } } LucenePlusPlus-rel_3.0.4/src/core/index/TermVectorEntry.cpp000066400000000000000000000046111217574114600237630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorEntry.h" #include "StringUtils.h" namespace Lucene { TermVectorEntry::TermVectorEntry(const String& field, const String& term, int32_t frequency, Collection offsets, Collection positions) { this->field = field; this->term = term; this->frequency = frequency; this->offsets = offsets; this->positions = positions; } TermVectorEntry::~TermVectorEntry() { } String TermVectorEntry::getField() { return field; } int32_t TermVectorEntry::getFrequency() { return frequency; } Collection TermVectorEntry::getOffsets() { return offsets; } Collection TermVectorEntry::getPositions() { return positions; } String TermVectorEntry::getTerm() { return term; } void TermVectorEntry::setFrequency(int32_t frequency) { this->frequency = frequency; } void TermVectorEntry::setOffsets(Collection offsets) { this->offsets = offsets; } void TermVectorEntry::setPositions(Collection positions) { this->positions = positions; } bool TermVectorEntry::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; TermVectorEntryPtr otherTermVectorEntry(boost::dynamic_pointer_cast(other)); if (otherTermVectorEntry) return (term == otherTermVectorEntry->term); return false; } int32_t TermVectorEntry::hashCode() { return StringUtils::hashCode(term); } String TermVectorEntry::toString() { StringStream buffer; buffer << L"TermVectorEntry{field='" << field; buffer << L"\', term='" << term; buffer << L"\', frequency=" << frequency << L"}"; return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/index/TermVectorEntryFreqSortedComparator.cpp000066400000000000000000000021071217574114600300100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorEntryFreqSortedComparator.h" #include "TermVectorEntry.h" namespace Lucene { TermVectorEntryFreqSortedComparator::~TermVectorEntryFreqSortedComparator() { } bool TermVectorEntryFreqSortedComparator::compare(const TermVectorEntryPtr& first, const TermVectorEntryPtr& second) { int32_t result = (second->getFrequency() - first->getFrequency()); if (result < 0) return true; if (result > 0) return false; result = first->getTerm().compare(second->getTerm()); if (result < 0) return true; if (result > 0) return false; return (first->getField().compare(second->getField()) < 0); } } LucenePlusPlus-rel_3.0.4/src/core/index/TermVectorMapper.cpp000066400000000000000000000017161217574114600241110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorMapper.h" namespace Lucene { TermVectorMapper::TermVectorMapper(bool ignoringPositions, bool ignoringOffsets) { this->ignoringPositions = ignoringPositions; this->ignoringOffsets = ignoringOffsets; } TermVectorMapper::~TermVectorMapper() { } bool TermVectorMapper::isIgnoringPositions() { return ignoringPositions; } bool TermVectorMapper::isIgnoringOffsets() { return ignoringOffsets; } void TermVectorMapper::setDocumentNumber(int32_t documentNumber) { // override } } LucenePlusPlus-rel_3.0.4/src/core/index/TermVectorOffsetInfo.cpp000066400000000000000000000036651217574114600247340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorOffsetInfo.h" namespace Lucene { TermVectorOffsetInfo::TermVectorOffsetInfo(int32_t startOffset, int32_t endOffset) { this->endOffset = endOffset; this->startOffset = startOffset; } TermVectorOffsetInfo::~TermVectorOffsetInfo() { } const Collection TermVectorOffsetInfo::EMPTY_OFFSET_INFO() { static Collection _EMPTY_OFFSET_INFO; if (!_EMPTY_OFFSET_INFO) _EMPTY_OFFSET_INFO = Collection::newInstance(); return _EMPTY_OFFSET_INFO; } int32_t TermVectorOffsetInfo::getEndOffset() { return endOffset; } void TermVectorOffsetInfo::setEndOffset(int32_t endOffset) { this->endOffset = endOffset; } int32_t TermVectorOffsetInfo::getStartOffset() { return startOffset; } void TermVectorOffsetInfo::setStartOffset(int32_t endOffset) { this->startOffset = startOffset; } bool TermVectorOffsetInfo::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; TermVectorOffsetInfoPtr otherTermVector(boost::dynamic_pointer_cast(other)); if (!otherTermVector) return false; return (endOffset == otherTermVector->endOffset && startOffset == otherTermVector->startOffset); } int32_t TermVectorOffsetInfo::hashCode() { int32_t result = startOffset; return (29 * result + endOffset); } } LucenePlusPlus-rel_3.0.4/src/core/index/TermVectorsReader.cpp000066400000000000000000000535441217574114600242600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsReader.h" #include "BufferedIndexInput.h" #include "IndexFileNames.h" #include "Directory.h" #include "FieldInfos.h" #include "SegmentTermPositionVector.h" #include "TermVectorOffsetInfo.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// NOTE: if you make a new format, it must be larger than the current format const int32_t TermVectorsReader::FORMAT_VERSION = 2; /// Changes to speed up bulk merging of term vectors const int32_t TermVectorsReader::FORMAT_VERSION2 = 3; /// Changed strings to UTF8 with length-in-bytes not length-in-chars const int32_t TermVectorsReader::FORMAT_UTF8_LENGTH_IN_BYTES = 4; /// NOTE: always change this if you switch to a new format const int32_t TermVectorsReader::FORMAT_CURRENT = TermVectorsReader::FORMAT_UTF8_LENGTH_IN_BYTES; /// The size in bytes that the FORMAT_VERSION will take up at the beginning of each file const int32_t TermVectorsReader::FORMAT_SIZE = 4; const uint8_t TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR = 0x1; const uint8_t TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR = 0x2; TermVectorsReader::TermVectorsReader() { this->_size = 0; this->numTotalDocs = 0; this->docStoreOffset = 0; this->format = 0; } TermVectorsReader::TermVectorsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos) { ConstructReader(d, segment, fieldInfos, BufferedIndexInput::BUFFER_SIZE, -1, 0); } TermVectorsReader::TermVectorsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { ConstructReader(d, segment, fieldInfos, readBufferSize, docStoreOffset, size); } TermVectorsReader::~TermVectorsReader() { } void TermVectorsReader::ConstructReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { this->_size = 0; this->numTotalDocs = 0; this->docStoreOffset = 0; this->format = 0; bool success = false; LuceneException finally; try { if (d->fileExists(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION())) { tvx = d->openInput(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION(), readBufferSize); format = checkValidFormat(tvx); tvd = d->openInput(segment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION(), readBufferSize); int32_t tvdFormat = checkValidFormat(tvd); tvf = d->openInput(segment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION(), readBufferSize); int32_t tvfFormat = checkValidFormat(tvf); BOOST_ASSERT(format == tvdFormat); BOOST_ASSERT(format == tvfFormat); if (format >= FORMAT_VERSION2) { BOOST_ASSERT((tvx->length() - FORMAT_SIZE) % 16 == 0); numTotalDocs = (int32_t)(tvx->length() >> 4); } else { BOOST_ASSERT((tvx->length() - FORMAT_SIZE) % 8 == 0); numTotalDocs = (int32_t)(tvx->length() >> 3); } if (docStoreOffset == -1) { this->docStoreOffset = 0; this->_size = numTotalDocs; BOOST_ASSERT(size == 0 || numTotalDocs == size); } else { this->docStoreOffset = docStoreOffset; this->_size = size; // Verify the file is long enough to hold all of our docs BOOST_ASSERT(numTotalDocs >= size + docStoreOffset); } } else { // If all documents flushed in a segment had hit non-aborting exceptions, it's possible that // FieldInfos.hasVectors returns true yet the term vector files don't exist. format = 0; } this->fieldInfos = fieldInfos; success = true; } catch (LuceneException& e) { finally = e; } // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception // above. In this case, we want to explicitly close any subset of things that were opened. if (!success) close(); finally.throwException(); } IndexInputPtr TermVectorsReader::getTvdStream() { return tvd; } IndexInputPtr TermVectorsReader::getTvfStream() { return tvf; } void TermVectorsReader::seekTvx(int32_t docNum) { if (format < FORMAT_VERSION2) tvx->seek((docNum + docStoreOffset) * 8 + FORMAT_SIZE); else tvx->seek((docNum + docStoreOffset) * 16 + FORMAT_SIZE); } bool TermVectorsReader::canReadRawDocs() { return (format >= FORMAT_UTF8_LENGTH_IN_BYTES); } void TermVectorsReader::rawDocs(Collection tvdLengths, Collection tvfLengths, int32_t startDocID, int32_t numDocs) { if (!tvx) { MiscUtils::arrayFill(tvdLengths.begin(), 0, tvdLengths.size(), 0); MiscUtils::arrayFill(tvfLengths.begin(), 0, tvfLengths.size(), 0); return; } // SegmentMerger calls canReadRawDocs() first and should not call us if that returns false. if (format < FORMAT_VERSION2) boost::throw_exception(IllegalStateException(L"cannot read raw docs with older term vector formats")); seekTvx(startDocID); int64_t tvdPosition = tvx->readLong(); tvd->seek(tvdPosition); int64_t tvfPosition = tvx->readLong(); tvf->seek(tvfPosition); int64_t lastTvdPosition = tvdPosition; int64_t lastTvfPosition = tvfPosition; int32_t count = 0; while (count < numDocs) { int32_t docID = docStoreOffset + startDocID + count + 1; BOOST_ASSERT(docID <= numTotalDocs); if (docID < numTotalDocs) { tvdPosition = tvx->readLong(); tvfPosition = tvx->readLong(); } else { tvdPosition = tvd->length(); tvfPosition = tvf->length(); BOOST_ASSERT(count == numDocs - 1); } tvdLengths[count] = (int32_t)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int32_t)(tvfPosition - lastTvfPosition); ++count; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } } int32_t TermVectorsReader::checkValidFormat(IndexInputPtr in) { int32_t format = in->readInt(); if (format > FORMAT_CURRENT) { boost::throw_exception(CorruptIndexException(L"Incompatible format version: " + StringUtils::toString(format) + L" expected " + StringUtils::toString(FORMAT_CURRENT) + L" or less")); } return format; } void TermVectorsReader::close() { // make all effort to close up. Keep the first exception and throw it as a new one. LuceneException keep; if (tvx) { try { tvx->close(); } catch (LuceneException& e) { if (keep.isNull()) keep = e; } } if (tvd) { try { tvd->close(); } catch (LuceneException& e) { if (keep.isNull()) keep = e; } } if (tvf) { try { tvf->close(); } catch (LuceneException& e) { if (keep.isNull()) keep = e; } } keep.throwException(); } int32_t TermVectorsReader::size() { return _size; } void TermVectorsReader::get(int32_t docNum, const String& field, TermVectorMapperPtr mapper) { if (tvx) { int32_t fieldNumber = fieldInfos->fieldNumber(field); // We need to account for the FORMAT_SIZE at when seeking in the tvx. We don't need to do // this in other seeks because we already have the file pointer that was written in another file seekTvx(docNum); int64_t tvdPosition = tvx->readLong(); tvd->seek(tvdPosition); int32_t fieldCount = tvd->readVInt(); // There are only a few fields per document. We opt for a full scan rather then requiring that they // be ordered. We need to read through all of the fields anyway to get to the tvf pointers. int32_t number = 0; int32_t found = -1; for (int32_t i = 0; i < fieldCount; ++i) { if (format >= FORMAT_VERSION) number = tvd->readVInt(); else number += tvd->readVInt(); if (number == fieldNumber) found = i; } // This field, although valid in the segment, was not found in this document if (found != -1) { // Compute position in the tvf file int64_t position; if (format >= FORMAT_VERSION2) position = tvx->readLong(); else position = tvd->readVLong(); for (int32_t i = 1; i <= found; ++i) position += tvd->readVLong(); mapper->setDocumentNumber(docNum); readTermVector(field, position, mapper); } } } TermFreqVectorPtr TermVectorsReader::get(int32_t docNum, const String& field) { // Check if no term vectors are available for this segment at all ParallelArrayTermVectorMapperPtr mapper(newLucene()); get(docNum, field, mapper); return mapper->materializeVector(); } Collection TermVectorsReader::readFields(int32_t fieldCount) { int32_t number = 0; Collection fields(Collection::newInstance(fieldCount)); for (int32_t i = 0; i < fieldCount; ++i) { if (format >= FORMAT_VERSION) number = tvd->readVInt(); else number += tvd->readVInt(); fields[i] = fieldInfos->fieldName(number); } return fields; } Collection TermVectorsReader::readTvfPointers(int32_t fieldCount) { // Compute position in the tvf file int64_t position; if (format >= FORMAT_VERSION2) position = tvx->readLong(); else position = tvd->readVLong(); Collection tvfPointers(Collection::newInstance(fieldCount)); tvfPointers[0] = position; for (int32_t i = 1; i < fieldCount; ++i) { position += tvd->readVLong(); tvfPointers[i] = position; } return tvfPointers; } Collection TermVectorsReader::get(int32_t docNum) { Collection result; if (tvx) { // We need to offset by seekTvx(docNum); int64_t tvdPosition = tvx->readLong(); tvd->seek(tvdPosition); int32_t fieldCount = tvd->readVInt(); // No fields are vectorized for this document if (fieldCount != 0) { Collection fields(readFields(fieldCount)); Collection tvfPointers(readTvfPointers(fieldCount)); result = readTermVectors(docNum, fields, tvfPointers); } } return result; } void TermVectorsReader::get(int32_t docNumber, TermVectorMapperPtr mapper) { // Check if no term vectors are available for this segment at all if (tvx) { // We need to offset by seekTvx(docNumber); int64_t tvdPosition = tvx->readLong(); tvd->seek(tvdPosition); int32_t fieldCount = tvd->readVInt(); // No fields are vectorized for this document if (fieldCount != 0) { Collection fields(readFields(fieldCount)); Collection tvfPointers(readTvfPointers(fieldCount)); mapper->setDocumentNumber(docNumber); readTermVectors(fields, tvfPointers, mapper); } } } Collection TermVectorsReader::readTermVectors(int32_t docNum, Collection fields, Collection tvfPointers) { Collection res(Collection::newInstance(fields.size())); for (int32_t i = 0; i < fields.size(); ++i) { ParallelArrayTermVectorMapperPtr mapper(newLucene()); mapper->setDocumentNumber(docNum); readTermVector(fields[i], tvfPointers[i], mapper); res[i] = mapper->materializeVector(); } return res; } void TermVectorsReader::readTermVectors(Collection fields, Collection tvfPointers, TermVectorMapperPtr mapper) { for (int32_t i = 0; i < fields.size(); ++i) readTermVector(fields[i], tvfPointers[i], mapper); } void TermVectorsReader::readTermVector(const String& field, int64_t tvfPointer, TermVectorMapperPtr mapper) { // Now read the data from specified position. We don't need to offset by the FORMAT here since // the pointer already includes the offset tvf->seek(tvfPointer); int32_t numTerms = tvf->readVInt(); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) return; bool storePositions; bool storeOffsets; if (format >= FORMAT_VERSION) { uint8_t bits = tvf->readByte(); storePositions = ((bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0); storeOffsets = ((bits & STORE_OFFSET_WITH_TERMVECTOR) != 0); } else { tvf->readVInt(); storePositions = false; storeOffsets = false; } mapper->setExpectations(field, numTerms, storeOffsets, storePositions); int32_t start = 0; int32_t deltaLength = 0; int32_t totalLength = 0; ByteArray byteBuffer; CharArray charBuffer; bool preUTF8 = (format < FORMAT_UTF8_LENGTH_IN_BYTES); // init the buffers if (preUTF8) { charBuffer = CharArray::newInstance(10); byteBuffer.reset(); } else { charBuffer.reset(); byteBuffer = ByteArray::newInstance(20); } for (int32_t i = 0; i < numTerms; ++i) { start = tvf->readVInt(); deltaLength = tvf->readVInt(); totalLength = start + deltaLength; String term; if (preUTF8) { // Term stored as "java chars" if (charBuffer.size() < totalLength) charBuffer.resize((int32_t)(1.5 * (double)totalLength)); totalLength = start + tvf->readChars(charBuffer.get(), start, deltaLength); term.append(charBuffer.get(), totalLength); } else { // Term stored as utf8 bytes if (byteBuffer.size() < totalLength) byteBuffer.resize((int32_t)(1.5 * (double)totalLength)); tvf->readBytes(byteBuffer.get(), start, deltaLength); term = StringUtils::toUnicode(byteBuffer.get(), totalLength); } int32_t freq = tvf->readVInt(); Collection positions; if (storePositions) // read in the positions { // does the mapper even care about positions? if (!mapper->isIgnoringPositions()) { positions = Collection::newInstance(freq); int32_t prevPosition = 0; for (Collection::iterator position = positions.begin(); position != positions.end(); ++position) { *position = prevPosition + tvf->readVInt(); prevPosition = *position; } } else { // we need to skip over the positions. Since these are VInts, I don't believe there // is anyway to know for sure how far to skip for (int32_t j = 0; j < freq; ++j) tvf->readVInt(); } } Collection offsets; if (storeOffsets) { // does the mapper even care about offsets? if (!mapper->isIgnoringOffsets()) { offsets = Collection::newInstance(freq); int32_t prevOffset = 0; for (Collection::iterator offset = offsets.begin(); offset != offsets.end(); ++offset) { int32_t startOffset = prevOffset + tvf->readVInt(); int32_t endOffset = startOffset + tvf->readVInt(); *offset = newLucene(startOffset, endOffset); prevOffset = endOffset; } } else { for (int32_t j = 0; j < freq; ++j) { tvf->readVInt(); tvf->readVInt(); } } } mapper->map(term, freq, offsets, positions); } } LuceneObjectPtr TermVectorsReader::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); TermVectorsReaderPtr cloneReader(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneReader->fieldInfos = fieldInfos; cloneReader->_size = _size; cloneReader->numTotalDocs = numTotalDocs; cloneReader->docStoreOffset = docStoreOffset; cloneReader->format = format; // These are null when a TermVectorsReader was created on a segment that did not have term vectors saved if (tvx && tvd && tvf) { cloneReader->tvx = boost::dynamic_pointer_cast(tvx->clone()); cloneReader->tvd = boost::dynamic_pointer_cast(tvd->clone()); cloneReader->tvf = boost::dynamic_pointer_cast(tvf->clone()); } return cloneReader; } ParallelArrayTermVectorMapper::ParallelArrayTermVectorMapper() { currentPosition = 0; storingOffsets = false; storingPositions = false; } ParallelArrayTermVectorMapper::~ParallelArrayTermVectorMapper() { } void ParallelArrayTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { this->field = field; terms = Collection::newInstance(numTerms); termFreqs = Collection::newInstance(numTerms); this->storingOffsets = storeOffsets; this->storingPositions = storePositions; if (storePositions) this->positions = Collection< Collection >::newInstance(numTerms); if (storeOffsets) this->offsets = Collection< Collection >::newInstance(numTerms); } void ParallelArrayTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { terms[currentPosition] = term; termFreqs[currentPosition] = frequency; if (storingOffsets) this->offsets[currentPosition] = offsets; if (storingPositions) this->positions[currentPosition] = positions; ++currentPosition; } TermFreqVectorPtr ParallelArrayTermVectorMapper::materializeVector() { SegmentTermVectorPtr tv; if (!field.empty() && terms) { if (storingPositions || storingOffsets) tv = newLucene(field, terms, termFreqs, positions, offsets); else tv = newLucene(field, terms, termFreqs); } return tv; } } LucenePlusPlus-rel_3.0.4/src/core/index/TermVectorsTermsWriter.cpp000066400000000000000000000307571217574114600253460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsTermsWriter.h" #include "TermVectorsTermsWriterPerThread.h" #include "TermVectorsTermsWriterPerField.h" #include "TermVectorsReader.h" #include "TermsHashPerField.h" #include "TermsHashPerThread.h" #include "RAMOutputStream.h" #include "IndexWriter.h" #include "IndexFileNames.h" #include "SegmentWriteState.h" #include "Directory.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { TermVectorsTermsWriter::TermVectorsTermsWriter(DocumentsWriterPtr docWriter) { this->freeCount = 0; this->lastDocID = 0; this->allocCount = 0; this->_docWriter = docWriter; this->docFreeList = Collection::newInstance(1); } TermVectorsTermsWriter::~TermVectorsTermsWriter() { } TermsHashConsumerPerThreadPtr TermVectorsTermsWriter::addThread(TermsHashPerThreadPtr perThread) { return newLucene(perThread, shared_from_this()); } void TermVectorsTermsWriter::createPostings(Collection postings, int32_t start, int32_t count) { int32_t end = start + count; for (int32_t i = start; i < end; ++i) postings[i] = newLucene(); } void TermVectorsTermsWriter::flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state) { SyncLock syncLock(this); // NOTE: it's possible that all documents seen in this segment hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is actually OK (unlike in the stored fields case) because, // although IieldInfos.hasVectors() will return true, the TermVectorsReader gracefully handles non-existence of // the term vectors files. if (tvx) { if (state->numDocsInStore > 0) { // In case there are some final documents that we didn't see (because they hit a non-aborting exception) fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); } tvx->flush(); tvd->flush(); tvf->flush(); } for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { for (Collection::iterator field = entry->second.begin(); field != entry->second.end(); ++field) { TermVectorsTermsWriterPerFieldPtr perField(boost::static_pointer_cast(*field)); TermsHashPerFieldPtr(perField->_termsHashPerField)->reset(); perField->shrinkHash(); } TermVectorsTermsWriterPerThreadPtr perThread(boost::static_pointer_cast(entry->first)); TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(true); } } void TermVectorsTermsWriter::closeDocStore(SegmentWriteStatePtr state) { SyncLock syncLock(this); if (tvx) { DocumentsWriterPtr docWriter(_docWriter); // At least one doc in this run had term vectors enabled fill(state->numDocsInStore - docWriter->getDocStoreOffset()); tvx->close(); tvf->close(); tvd->close(); tvx.reset(); BOOST_ASSERT(!state->docStoreSegmentName.empty()); String fileName(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); if (4 + ((int64_t)state->numDocsInStore) * 16 != state->directory->fileLength(fileName)) { boost::throw_exception(RuntimeException(L"after flush: tvx size mismatch: " + StringUtils::toString(state->numDocsInStore) + L" docs vs " + StringUtils::toString(state->directory->fileLength(fileName)) + L" length in bytes of " + fileName + L" file exists?=" + StringUtils::toString(state->directory->fileExists(fileName)))); } state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); lastDocID = 0; } } TermVectorsTermsWriterPerDocPtr TermVectorsTermsWriter::getPerDoc() { SyncLock syncLock(this); if (freeCount == 0) { if (++allocCount > docFreeList.size()) { // Grow our free list up front to make sure we have enough space to recycle all outstanding // PerDoc instances BOOST_ASSERT(allocCount == 1 + docFreeList.size()); docFreeList.resize(MiscUtils::getNextSize(allocCount)); } return newLucene(shared_from_this()); } else return docFreeList[--freeCount]; } void TermVectorsTermsWriter::fill(int32_t docID) { int32_t docStoreOffset = DocumentsWriterPtr(_docWriter)->getDocStoreOffset(); int32_t end = docID + docStoreOffset; if (lastDocID < end) { int64_t tvfPosition = tvf->getFilePointer(); while (lastDocID < end) { tvx->writeLong(tvd->getFilePointer()); tvd->writeVInt(0); tvx->writeLong(tvfPosition); ++lastDocID; } } } void TermVectorsTermsWriter::initTermVectorsWriter() { SyncLock syncLock(this); if (!tvx) { DocumentsWriterPtr docWriter(_docWriter); String docStoreSegment(docWriter->getDocStoreSegment()); if (docStoreSegment.empty()) return; // If we hit an exception while init'ing the term vector output files, we must abort this segment // because those files will be in an unknown state tvx = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); tvd = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); tvf = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); tvx->writeInt(TermVectorsReader::FORMAT_CURRENT); tvd->writeInt(TermVectorsReader::FORMAT_CURRENT); tvf->writeInt(TermVectorsReader::FORMAT_CURRENT); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); lastDocID = 0; } } void TermVectorsTermsWriter::finishDocument(TermVectorsTermsWriterPerDocPtr perDoc) { SyncLock syncLock(this); DocumentsWriterPtr docWriter(_docWriter); BOOST_ASSERT(IndexWriterPtr(docWriter->_writer)->testPoint(L"TermVectorsTermsWriter.finishDocument start")); initTermVectorsWriter(); fill(perDoc->docID); // Append term vectors to the real outputs tvx->writeLong(tvd->getFilePointer()); tvx->writeLong(tvf->getFilePointer()); tvd->writeVInt(perDoc->numVectorFields); if (perDoc->numVectorFields > 0) { for (int32_t i = 0; i < perDoc->numVectorFields; ++i) tvd->writeVInt(perDoc->fieldNumbers[i]); BOOST_ASSERT(perDoc->fieldPointers[0] == 0); int64_t lastPos = perDoc->fieldPointers[0]; for (int32_t i = 1; i < perDoc->numVectorFields; ++i) { int64_t pos = perDoc->fieldPointers[i]; tvd->writeVLong(pos - lastPos); lastPos = pos; } perDoc->perDocTvf->writeTo(tvf); perDoc->numVectorFields = 0; } BOOST_ASSERT(lastDocID == perDoc->docID + docWriter->getDocStoreOffset()); ++lastDocID; perDoc->reset(); free(perDoc); BOOST_ASSERT(IndexWriterPtr(docWriter->_writer)->testPoint(L"TermVectorsTermsWriter.finishDocument end")); } bool TermVectorsTermsWriter::freeRAM() { // We don't hold any state beyond one doc, so we don't free persistent RAM here return false; } void TermVectorsTermsWriter::abort() { if (tvx) { try { tvx->close(); } catch (...) { } tvx.reset(); } if (tvd) { try { tvd->close(); } catch (...) { } tvd.reset(); } if (tvf) { try { tvf->close(); } catch (...) { } tvf.reset(); } lastDocID = 0; } void TermVectorsTermsWriter::free(TermVectorsTermsWriterPerDocPtr doc) { SyncLock syncLock(this); BOOST_ASSERT(freeCount < docFreeList.size()); docFreeList[freeCount++] = doc; } int32_t TermVectorsTermsWriter::bytesPerPosting() { return (RawPostingList::BYTES_SIZE + 3 * DocumentsWriter::INT_NUM_BYTE); } TermVectorsTermsWriterPerDoc::TermVectorsTermsWriterPerDoc(TermVectorsTermsWriterPtr termsWriter) { this->_termsWriter = termsWriter; buffer = DocumentsWriterPtr(termsWriter->_docWriter)->newPerDocBuffer(); perDocTvf = newLucene(buffer); numVectorFields = 0; fieldNumbers = Collection::newInstance(1); fieldPointers = Collection::newInstance(1); } TermVectorsTermsWriterPerDoc::~TermVectorsTermsWriterPerDoc() { } void TermVectorsTermsWriterPerDoc::reset() { perDocTvf->reset(); buffer->recycle(); numVectorFields = 0; } void TermVectorsTermsWriterPerDoc::abort() { reset(); TermVectorsTermsWriterPtr(_termsWriter)->free(shared_from_this()); } void TermVectorsTermsWriterPerDoc::addField(int32_t fieldNumber) { if (numVectorFields == fieldNumbers.size()) { fieldNumbers.resize(MiscUtils::getNextSize(fieldNumbers.size())); fieldPointers.resize(MiscUtils::getNextSize(fieldPointers.size())); } fieldNumbers[numVectorFields] = fieldNumber; fieldPointers[numVectorFields] = perDocTvf->getFilePointer(); ++numVectorFields; } int64_t TermVectorsTermsWriterPerDoc::sizeInBytes() { return buffer->getSizeInBytes(); } void TermVectorsTermsWriterPerDoc::finish() { TermVectorsTermsWriterPtr(_termsWriter)->finishDocument(shared_from_this()); } TermVectorsTermsWriterPostingList::TermVectorsTermsWriterPostingList() { freq = 0; lastOffset = 0; lastPosition = 0; } TermVectorsTermsWriterPostingList::~TermVectorsTermsWriterPostingList() { } } LucenePlusPlus-rel_3.0.4/src/core/index/TermVectorsTermsWriterPerField.cpp000066400000000000000000000242541217574114600267540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsTermsWriterPerField.h" #include "TermVectorsTermsWriterPerThread.h" #include "TermVectorsTermsWriter.h" #include "TermsHashPerField.h" #include "TermsHashPerThread.h" #include "TermVectorsReader.h" #include "Fieldable.h" #include "FieldInfo.h" #include "FieldInvertState.h" #include "RAMOutputStream.h" #include "ByteSliceReader.h" #include "CharBlockPool.h" #include "OffsetAttribute.h" #include "AttributeSource.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { TermVectorsTermsWriterPerField::TermVectorsTermsWriterPerField(TermsHashPerFieldPtr termsHashPerField, TermVectorsTermsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo) { this->doVectors = false; this->doVectorPositions = false; this->doVectorOffsets = false; this->maxNumPostings = 0; this->_termsHashPerField = termsHashPerField; this->_perThread = perThread; this->_termsWriter = perThread->_termsWriter; this->fieldInfo = fieldInfo; _docState = termsHashPerField->docState; _fieldState = termsHashPerField->fieldState; } TermVectorsTermsWriterPerField::~TermVectorsTermsWriterPerField() { } int32_t TermVectorsTermsWriterPerField::getStreamCount() { return 2; } bool TermVectorsTermsWriterPerField::start(Collection fields, int32_t count) { doVectors = false; doVectorPositions = false; doVectorOffsets = false; for (int32_t i = 0; i < count; ++i) { FieldablePtr field(fields[i]); if (field->isIndexed() && field->isTermVectorStored()) { doVectors = true; if (field->isStorePositionWithTermVector()) doVectorPositions = true; if (field->isStoreOffsetWithTermVector()) doVectorOffsets = true; } } if (doVectors) { TermVectorsTermsWriterPerThreadPtr perThread(_perThread); DocStatePtr docState(_docState); if (!perThread->doc) { perThread->doc = TermVectorsTermsWriterPtr(_termsWriter)->getPerDoc(); perThread->doc->docID = docState->docID; BOOST_ASSERT(perThread->doc->numVectorFields == 0); BOOST_ASSERT(perThread->doc->perDocTvf->length() == 0); BOOST_ASSERT(perThread->doc->perDocTvf->getFilePointer() == 0); } BOOST_ASSERT(perThread->doc->docID == docState->docID); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (termsHashPerField->numPostings != 0) { // Only necessary if previous doc hit a non-aborting exception while writing vectors // in this field termsHashPerField->reset(); TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(false); } } return doVectors; } void TermVectorsTermsWriterPerField::abort() { } void TermVectorsTermsWriterPerField::finish() { BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.finish start")); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); int32_t numPostings = termsHashPerField->numPostings; BOOST_ASSERT(numPostings >= 0); if (!doVectors || numPostings == 0) return; if (numPostings > maxNumPostings) maxNumPostings = numPostings; TermVectorsTermsWriterPerThreadPtr perThread(_perThread); IndexOutputPtr tvf(perThread->doc->perDocTvf); // This is called once, after inverting all occurrences of a given field in the doc. At this point we flush // our hash into the DocWriter. BOOST_ASSERT(fieldInfo->storeTermVector); BOOST_ASSERT(perThread->vectorFieldsInOrder(fieldInfo)); perThread->doc->addField(termsHashPerField->fieldInfo->number); Collection postings(termsHashPerField->sortPostings()); tvf->writeVInt(numPostings); uint8_t bits = 0x0; if (doVectorPositions) bits |= TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR; if (doVectorOffsets) bits |= TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR; tvf->writeByte(bits); int32_t encoderUpto = 0; int32_t lastTermBytesCount = 0; ByteSliceReaderPtr reader(perThread->vectorSliceReader); Collection charBuffers(TermsHashPerThreadPtr(perThread->_termsHashPerThread)->charPool->buffers); for (int32_t j = 0; j < numPostings; ++j) { TermVectorsTermsWriterPostingListPtr posting(boost::static_pointer_cast(postings[j])); int32_t freq = posting->freq; CharArray text2(charBuffers[posting->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]); int32_t start2 = (posting->textStart & DocumentsWriter::CHAR_BLOCK_MASK); // We swap between two encoders to save copying last Term's byte array UTF8ResultPtr utf8Result(perThread->utf8Results[encoderUpto]); StringUtils::toUTF8(text2.get() + start2, text2.size(), utf8Result); int32_t termBytesCount = utf8Result->length; // Compute common prefix between last term and this term int32_t prefix = 0; if (j > 0) { ByteArray lastTermBytes(perThread->utf8Results[1 - encoderUpto]->result); ByteArray termBytes(perThread->utf8Results[encoderUpto]->result); while (prefix < lastTermBytesCount && prefix < termBytesCount) { if (lastTermBytes[prefix] != termBytes[prefix]) break; ++prefix; } } encoderUpto = 1 - encoderUpto; lastTermBytesCount = termBytesCount; int32_t suffix = termBytesCount - prefix; tvf->writeVInt(prefix); tvf->writeVInt(suffix); tvf->writeBytes(utf8Result->result.get(), prefix, suffix); tvf->writeVInt(freq); if (doVectorPositions) { termsHashPerField->initReader(reader, posting, 0); reader->writeTo(tvf); } if (doVectorOffsets) { termsHashPerField->initReader(reader, posting, 1); reader->writeTo(tvf); } } termsHashPerField->reset(); // NOTE: we clear per-field at the thread level, because term vectors fully write themselves on each // field; this saves RAM (eg if large doc has two large fields with term vectors on) because we // recycle/reuse all RAM after each field TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(false); } void TermVectorsTermsWriterPerField::shrinkHash() { TermsHashPerFieldPtr(_termsHashPerField)->shrinkHash(maxNumPostings); maxNumPostings = 0; } void TermVectorsTermsWriterPerField::start(FieldablePtr field) { if (doVectorOffsets) offsetAttribute = FieldInvertStatePtr(_fieldState)->attributeSource->addAttribute(); else offsetAttribute.reset(); } void TermVectorsTermsWriterPerField::newTerm(RawPostingListPtr p0) { BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.newTerm start")); TermVectorsTermsWriterPostingListPtr p(boost::static_pointer_cast(p0)); p->freq = 1; FieldInvertStatePtr fieldState(_fieldState); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (doVectorOffsets) { int32_t startOffset = fieldState->offset + offsetAttribute->startOffset(); int32_t endOffset = fieldState->offset + offsetAttribute->endOffset(); termsHashPerField->writeVInt(1, startOffset); termsHashPerField->writeVInt(1, endOffset - startOffset); p->lastOffset = endOffset; } if (doVectorPositions) { termsHashPerField->writeVInt(0, fieldState->position); p->lastPosition = fieldState->position; } } void TermVectorsTermsWriterPerField::addTerm(RawPostingListPtr p0) { BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.newTerm start")); TermVectorsTermsWriterPostingListPtr p(boost::static_pointer_cast(p0)); ++p->freq; FieldInvertStatePtr fieldState(_fieldState); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (doVectorOffsets) { int32_t startOffset = fieldState->offset + offsetAttribute->startOffset(); int32_t endOffset = fieldState->offset + offsetAttribute->endOffset(); termsHashPerField->writeVInt(1, startOffset - p->lastOffset); termsHashPerField->writeVInt(1, endOffset - startOffset); p->lastOffset = endOffset; } if (doVectorPositions) { termsHashPerField->writeVInt(0, fieldState->position - p->lastPosition); p->lastPosition = fieldState->position; } } void TermVectorsTermsWriterPerField::skippingLongTerm() { } } LucenePlusPlus-rel_3.0.4/src/core/index/TermVectorsTermsWriterPerThread.cpp000066400000000000000000000046101217574114600271320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsTermsWriterPerThread.h" #include "TermVectorsTermsWriterPerField.h" #include "TermVectorsTermsWriter.h" #include "TermsHashPerThread.h" #include "ByteSliceReader.h" #include "FieldInfo.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { TermVectorsTermsWriterPerThread::TermVectorsTermsWriterPerThread(TermsHashPerThreadPtr termsHashPerThread, TermVectorsTermsWriterPtr termsWriter) { utf8Results = newCollection(newInstance(), newInstance()); this->vectorSliceReader = newLucene(); this->_termsWriter = termsWriter; this->_termsHashPerThread = termsHashPerThread; _docState = termsHashPerThread->docState; } TermVectorsTermsWriterPerThread::~TermVectorsTermsWriterPerThread() { } void TermVectorsTermsWriterPerThread::startDocument() { BOOST_ASSERT(clearLastVectorFieldName()); if (doc) { doc->reset(); doc->docID = DocStatePtr(_docState)->docID; } } DocWriterPtr TermVectorsTermsWriterPerThread::finishDocument() { DocWriterPtr returnDoc(doc); doc.reset(); return returnDoc; } TermsHashConsumerPerFieldPtr TermVectorsTermsWriterPerThread::addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo) { return newLucene(termsHashPerField, shared_from_this(), fieldInfo); } void TermVectorsTermsWriterPerThread::abort() { if (doc) { doc->abort(); doc.reset(); } } bool TermVectorsTermsWriterPerThread::clearLastVectorFieldName() { lastVectorFieldName.clear(); return true; } bool TermVectorsTermsWriterPerThread::vectorFieldsInOrder(FieldInfoPtr fi) { bool inOrder = lastVectorFieldName.empty() ? true : (lastVectorFieldName < fi->name); lastVectorFieldName = fi->name; return inOrder; } } LucenePlusPlus-rel_3.0.4/src/core/index/TermVectorsWriter.cpp000066400000000000000000000213551217574114600243250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsWriter.h" #include "IndexFileNames.h" #include "IndexOutput.h" #include "TermVectorsReader.h" #include "TermVectorOffsetInfo.h" #include "TermPositionVector.h" #include "Directory.h" #include "FieldInfos.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { TermVectorsWriter::TermVectorsWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fieldInfos) { utf8Results = newCollection(newInstance(), newInstance()); // Open files for TermVector storage tvx = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); tvx->writeInt(TermVectorsReader::FORMAT_CURRENT); tvd = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); tvd->writeInt(TermVectorsReader::FORMAT_CURRENT); tvf = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); tvf->writeInt(TermVectorsReader::FORMAT_CURRENT); this->fieldInfos = fieldInfos; } TermVectorsWriter::~TermVectorsWriter() { } void TermVectorsWriter::addAllDocVectors(Collection vectors) { tvx->writeLong(tvd->getFilePointer()); tvx->writeLong(tvf->getFilePointer()); if (vectors) { int32_t numFields = vectors.size(); tvd->writeVInt(numFields); Collection fieldPointers(Collection::newInstance(numFields)); for (int32_t i = 0; i < numFields; ++i) { fieldPointers[i] = tvf->getFilePointer(); int32_t fieldNumber = fieldInfos->fieldNumber(vectors[i]->getField()); // 1st pass: write field numbers to tvd tvd->writeVInt(fieldNumber); int32_t numTerms = vectors[i]->size(); tvf->writeVInt(numTerms); TermPositionVectorPtr tpVector(boost::dynamic_pointer_cast(vectors[i])); uint8_t bits; bool storePositions; bool storeOffsets; if (tpVector) { // May have positions & offsets storePositions = (tpVector->size() > 0 && !tpVector->getTermPositions(0)); storeOffsets = (tpVector->size() > 0 && tpVector->getOffsets(0)); bits = (uint8_t)((storePositions ? TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR : 0) + (storeOffsets ? TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR : 0)); } else { bits = 0; storePositions = false; storeOffsets = false; } tvf->writeVInt(bits); Collection terms(vectors[i]->getTerms()); Collection freqs(vectors[i]->getTermFrequencies()); int32_t utf8Upto = 0; utf8Results[1]->length = 0; for (int32_t j = 0; j < numTerms; ++j) { StringUtils::toUTF8(terms[j].c_str(), terms[j].length(), utf8Results[utf8Upto]); int32_t start = MiscUtils::bytesDifference(utf8Results[1 - utf8Upto]->result.get(), utf8Results[1 - utf8Upto]->length, utf8Results[utf8Upto]->result.get(), utf8Results[utf8Upto]->length); int32_t length = utf8Results[utf8Upto]->length - start; tvf->writeVInt(start); // write shared prefix length tvf->writeVInt(length); // write delta length tvf->writeBytes(utf8Results[utf8Upto]->result.get(), start, length); // write delta bytes utf8Upto = 1 - utf8Upto; int32_t termFreq = freqs[j]; tvf->writeVInt(termFreq); if (storePositions) { Collection positions(tpVector->getTermPositions(j)); if (!positions) boost::throw_exception(IllegalStateException(L"Trying to write positions that are null!")); BOOST_ASSERT(positions.size() == termFreq); // use delta encoding for positions int32_t lastPosition = 0; for (int32_t k = 0; k < positions.size(); ++k) { int32_t position = positions[k]; tvf->writeVInt(position - lastPosition); lastPosition = position; } } if (storeOffsets) { Collection offsets(tpVector->getOffsets(j)); if (!offsets) boost::throw_exception(IllegalStateException(L"Trying to write offsets that are null!")); BOOST_ASSERT(offsets.size() == termFreq); // use delta encoding for offsets int32_t lastEndOffset = 0; for (int32_t k = 0; k < offsets.size(); ++k) { int32_t startOffset = offsets[k]->getStartOffset(); int32_t endOffset = offsets[k]->getEndOffset(); tvf->writeVInt(startOffset - lastEndOffset); tvf->writeVInt(endOffset - startOffset); lastEndOffset = endOffset; } } } } // 2nd pass: write field pointers to tvd if (numFields > 1) { int64_t lastFieldPointer = fieldPointers[0]; for (int32_t i = 1; i < numFields; ++i) { int64_t fieldPointer = fieldPointers[i]; tvd->writeVLong(fieldPointer - lastFieldPointer); lastFieldPointer = fieldPointer; } } } else tvd->writeVInt(0); } void TermVectorsWriter::addRawDocuments(TermVectorsReaderPtr reader, Collection tvdLengths, Collection tvfLengths, int32_t numDocs) { int64_t tvdPosition = tvd->getFilePointer(); int64_t tvfPosition = tvf->getFilePointer(); int64_t tvdStart = tvdPosition; int64_t tvfStart = tvfPosition; for (int32_t i = 0; i < numDocs; ++i) { tvx->writeLong(tvdPosition); tvdPosition += tvdLengths[i]; tvx->writeLong(tvfPosition); tvfPosition += tvfLengths[i]; } tvd->copyBytes(reader->getTvdStream(), tvdPosition - tvdStart); tvf->copyBytes(reader->getTvfStream(), tvfPosition - tvfStart); BOOST_ASSERT(tvd->getFilePointer() == tvdPosition); BOOST_ASSERT(tvf->getFilePointer() == tvfPosition); } void TermVectorsWriter::close() { // make an effort to close all streams we can but remember and re-throw the first exception // encountered in this process LuceneException keep; if (tvx) { try { tvx->close(); } catch (LuceneException& e) { if (keep.isNull()) keep = e; } } if (tvd) { try { tvd->close(); } catch (LuceneException& e) { if (keep.isNull()) keep = e; } } if (tvf) { try { tvf->close(); } catch (LuceneException& e) { if (keep.isNull()) keep = e; } } keep.throwException(); } } LucenePlusPlus-rel_3.0.4/src/core/index/TermsHash.cpp000066400000000000000000000211611217574114600225440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHash.h" #include "DocumentsWriter.h" #include "TermsHashConsumer.h" #include "TermsHashPerThread.h" #include "TermsHashPerField.h" #include "TermsHashConsumerPerThread.h" #include "DocInverterPerThread.h" #include "TermsHashConsumerPerField.h" #include "IndexWriter.h" #include "MiscUtils.h" namespace Lucene { TermsHash::TermsHash(DocumentsWriterPtr docWriter, bool trackAllocations, TermsHashConsumerPtr consumer, TermsHashPtr nextTermsHash) { this->postingsFreeCount = 0; this->postingsAllocCount = 0; this->trackAllocations = false; this->postingsFreeList = Collection::newInstance(1); this->_docWriter = docWriter; this->consumer = consumer; this->nextTermsHash = nextTermsHash; this->trackAllocations = trackAllocations; bytesPerPosting = consumer->bytesPerPosting() + 4 * DocumentsWriter::POINTER_NUM_BYTE; postingsFreeChunk = (int32_t)((double)DocumentsWriter::BYTE_BLOCK_SIZE / (double)bytesPerPosting); } TermsHash::~TermsHash() { } InvertedDocConsumerPerThreadPtr TermsHash::addThread(DocInverterPerThreadPtr docInverterPerThread) { return newLucene(docInverterPerThread, shared_from_this(), nextTermsHash, TermsHashPerThreadPtr()); } TermsHashPerThreadPtr TermsHash::addThread(DocInverterPerThreadPtr docInverterPerThread, TermsHashPerThreadPtr primaryPerThread) { return newLucene(docInverterPerThread, shared_from_this(), nextTermsHash, primaryPerThread); } void TermsHash::setFieldInfos(FieldInfosPtr fieldInfos) { this->fieldInfos = fieldInfos; consumer->setFieldInfos(fieldInfos); } void TermsHash::abort() { consumer->abort(); if (nextTermsHash) nextTermsHash->abort(); } void TermsHash::shrinkFreePostings(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state) { BOOST_ASSERT(postingsFreeCount == postingsAllocCount); int32_t newSize = 1; if (newSize != postingsFreeList.size()) { if (postingsFreeCount > newSize) { if (trackAllocations) DocumentsWriterPtr(_docWriter)->bytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); postingsFreeCount = newSize; postingsAllocCount = newSize; } postingsFreeList.resize(newSize); } } void TermsHash::closeDocStore(SegmentWriteStatePtr state) { SyncLock syncLock(this); consumer->closeDocStore(state); if (nextTermsHash) nextTermsHash->closeDocStore(state); } void TermsHash::flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state) { SyncLock syncLock(this); MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField childThreadsAndFields(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::newInstance()); MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField nextThreadsAndFields; if (nextTermsHash) nextThreadsAndFields = MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::newInstance(); for (MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { Collection childFields(Collection::newInstance()); Collection nextChildFields; if (nextTermsHash) nextChildFields = Collection::newInstance(); for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { childFields.add(boost::static_pointer_cast(*perField)->consumer); if (nextTermsHash) nextChildFields.add(boost::static_pointer_cast(*perField)->nextPerField); } childThreadsAndFields.put(boost::static_pointer_cast(entry->first)->consumer, childFields); if (nextTermsHash) nextThreadsAndFields.put(boost::static_pointer_cast(entry->first)->nextPerThread, nextChildFields); } consumer->flush(childThreadsAndFields, state); shrinkFreePostings(threadsAndFields, state); if (nextTermsHash) nextTermsHash->flush(nextThreadsAndFields, state); } bool TermsHash::freeRAM() { if (!trackAllocations) return false; bool any = false; int64_t bytesFreed = 0; { SyncLock syncLock(this); int32_t numToFree = postingsFreeCount >= postingsFreeChunk ? postingsFreeChunk : postingsFreeCount; any = (numToFree > 0); if (any) { MiscUtils::arrayFill(postingsFreeList.begin(), postingsFreeCount - numToFree, postingsFreeCount, RawPostingListPtr()); postingsFreeCount -= numToFree; postingsAllocCount -= numToFree; bytesFreed = -numToFree * bytesPerPosting; any = true; } } if (any) DocumentsWriterPtr(_docWriter)->bytesAllocated(bytesFreed); if (nextTermsHash && nextTermsHash->freeRAM()) any = true; return any; } void TermsHash::recyclePostings(Collection postings, int32_t numPostings) { SyncLock syncLock(this); BOOST_ASSERT(postings.size() >= numPostings); // Move all Postings from this ThreadState back to our free list. We pre-allocated this array while we // were creating Postings to make sure it's large enough BOOST_ASSERT(postingsFreeCount + numPostings <= postingsFreeList.size()); MiscUtils::arrayCopy(postings.begin(), 0, postingsFreeList.begin(), postingsFreeCount, numPostings); postingsFreeCount += numPostings; } void TermsHash::getPostings(Collection postings) { SyncLock syncLock(this); DocumentsWriterPtr docWriter(_docWriter); IndexWriterPtr writer(docWriter->_writer); BOOST_ASSERT(writer->testPoint(L"TermsHash.getPostings start")); BOOST_ASSERT(postingsFreeCount <= postingsFreeList.size()); BOOST_ASSERT(postingsFreeCount <= postingsAllocCount); int32_t numToCopy = postingsFreeCount < postings.size() ? postingsFreeCount : postings.size(); int32_t start = postingsFreeCount - numToCopy; BOOST_ASSERT(start >= 0); BOOST_ASSERT(start + numToCopy <= postingsFreeList.size()); BOOST_ASSERT(numToCopy <= postings.size()); MiscUtils::arrayCopy(postingsFreeList.begin(), start, postings.begin(), 0, numToCopy); // Directly allocate the remainder if any if (numToCopy != postings.size()) { int32_t extra = postings.size() - numToCopy; int32_t newPostingsAllocCount = postingsAllocCount + extra; consumer->createPostings(postings, numToCopy, extra); BOOST_ASSERT(writer->testPoint(L"TermsHash.getPostings after create")); postingsAllocCount += extra; if (trackAllocations) docWriter->bytesAllocated(extra * bytesPerPosting); if (newPostingsAllocCount > postingsFreeList.size()) { // Pre-allocate the postingsFreeList so it's large enough to hold all postings we've given out postingsFreeList = Collection::newInstance(MiscUtils::getNextSize(newPostingsAllocCount)); } } postingsFreeCount -= numToCopy; if (trackAllocations) docWriter->bytesUsed(postings.size() * bytesPerPosting); } } LucenePlusPlus-rel_3.0.4/src/core/index/TermsHashConsumer.cpp000066400000000000000000000011231217574114600242540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashConsumer.h" namespace Lucene { TermsHashConsumer::~TermsHashConsumer() { } void TermsHashConsumer::setFieldInfos(FieldInfosPtr fieldInfos) { this->fieldInfos = fieldInfos; } } LucenePlusPlus-rel_3.0.4/src/core/index/TermsHashConsumerPerField.cpp000066400000000000000000000007571217574114600257030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashConsumerPerField.h" namespace Lucene { TermsHashConsumerPerField::~TermsHashConsumerPerField() { } } LucenePlusPlus-rel_3.0.4/src/core/index/TermsHashConsumerPerThread.cpp000066400000000000000000000007621217574114600260630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashConsumerPerThread.h" namespace Lucene { TermsHashConsumerPerThread::~TermsHashConsumerPerThread() { } } LucenePlusPlus-rel_3.0.4/src/core/index/TermsHashPerField.cpp000066400000000000000000000460101217574114600241570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashPerField.h" #include "TermsHashPerThread.h" #include "TermsHashConsumerPerThread.h" #include "TermsHashConsumerPerField.h" #include "TermsHash.h" #include "TermAttribute.h" #include "AttributeSource.h" #include "DocInverterPerField.h" #include "DocumentsWriter.h" #include "IntBlockPool.h" #include "CharBlockPool.h" #include "ByteSliceReader.h" #include "RawPostingList.h" #include "FieldInvertState.h" #include "UTF8Stream.h" #include "MiscUtils.h" namespace Lucene { TermsHashPerField::TermsHashPerField(DocInverterPerFieldPtr docInverterPerField, TermsHashPerThreadPtr perThread, TermsHashPerThreadPtr nextPerThread, FieldInfoPtr fieldInfo) { this->_docInverterPerField = docInverterPerField; this->_perThread = perThread; this->nextPerThread = nextPerThread; this->fieldInfo = fieldInfo; } TermsHashPerField::~TermsHashPerField() { } void TermsHashPerField::initialize() { this->postingsCompacted = false; this->numPostings = 0; this->postingsHashSize = 4; this->postingsHashHalfSize = this->postingsHashSize / 2; this->postingsHashMask = this->postingsHashSize - 1; this->postingsHash = Collection::newInstance(postingsHashSize); this->doCall = false; this->doNextCall = false; this->intUptoStart = 0; TermsHashPerThreadPtr perThread(_perThread); intPool = perThread->intPool; charPool = perThread->charPool; bytePool = perThread->bytePool; docState = perThread->docState; DocInverterPerFieldPtr docInverterPerField(_docInverterPerField); fieldState = docInverterPerField->fieldState; this->consumer = perThread->consumer->addField(shared_from_this(), fieldInfo); streamCount = consumer->getStreamCount(); numPostingInt = 2 * streamCount; if (nextPerThread) nextPerField = boost::dynamic_pointer_cast(nextPerThread->addField(docInverterPerField, fieldInfo)); } void TermsHashPerField::shrinkHash(int32_t targetSize) { BOOST_ASSERT(postingsCompacted || numPostings == 0); int32_t newSize = 4; if (newSize != postingsHash.size()) { postingsHash.resize(newSize); postingsHashSize = newSize; postingsHashHalfSize = newSize / 2; postingsHashMask = newSize - 1; } MiscUtils::arrayFill(postingsHash.begin(), 0, postingsHash.size(), RawPostingListPtr()); } void TermsHashPerField::reset() { if (!postingsCompacted) compactPostings(); BOOST_ASSERT(numPostings <= postingsHash.size()); if (numPostings > 0) { TermsHashPtr(TermsHashPerThreadPtr(_perThread)->_termsHash)->recyclePostings(postingsHash, numPostings); MiscUtils::arrayFill(postingsHash.begin(), 0, numPostings, RawPostingListPtr()); numPostings = 0; } postingsCompacted = false; if (nextPerField) nextPerField->reset(); } void TermsHashPerField::abort() { SyncLock syncLock(this); reset(); if (nextPerField) nextPerField->abort(); } void TermsHashPerField::initReader(ByteSliceReaderPtr reader, RawPostingListPtr p, int32_t stream) { BOOST_ASSERT(stream < streamCount); IntArray ints(intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]); int32_t upto = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); reader->init(bytePool, p->byteStart + stream * ByteBlockPool::FIRST_LEVEL_SIZE(), ints[upto + stream]); } void TermsHashPerField::compactPostings() { SyncLock syncLock(this); int32_t upto = 0; for (int32_t i = 0; i < postingsHashSize; ++i) { if (postingsHash[i]) { if (upto < i) { postingsHash[upto] = postingsHash[i]; postingsHash[i].reset(); } ++upto; } } BOOST_ASSERT(upto == numPostings); postingsCompacted = true; } struct comparePostings { comparePostings(Collection buffers) { this->buffers = buffers; } /// Compares term text for two Posting instance inline bool operator()(const RawPostingListPtr& first, const RawPostingListPtr& second) const { if (first == second) return false; wchar_t* text1 = buffers[first->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); int32_t pos1 = (first->textStart & DocumentsWriter::CHAR_BLOCK_MASK); wchar_t* text2 = buffers[second->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); int32_t pos2 = (second->textStart & DocumentsWriter::CHAR_BLOCK_MASK); BOOST_ASSERT(text1 != text2 || pos1 != pos2); while (true) { wchar_t c1 = text1[pos1++]; wchar_t c2 = text2[pos2++]; if (c1 != c2) { if (c2 == UTF8Base::UNICODE_TERMINATOR) return false; else if (c1 == UTF8Base::UNICODE_TERMINATOR) return true; else return (c1 < c2); } else { // This method should never compare equal postings unless first == second BOOST_ASSERT(c1 != UTF8Base::UNICODE_TERMINATOR); } } } Collection buffers; }; Collection TermsHashPerField::sortPostings() { compactPostings(); std::sort(postingsHash.begin(), postingsHash.begin() + numPostings, comparePostings(charPool->buffers)); return postingsHash; } bool TermsHashPerField::postingEquals(const wchar_t* tokenText, int32_t tokenTextLen) { wchar_t* text = TermsHashPerThreadPtr(_perThread)->charPool->buffers[p->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); BOOST_ASSERT(text); int32_t pos = (p->textStart & DocumentsWriter::CHAR_BLOCK_MASK); int32_t tokenPos = 0; for (; tokenPos < tokenTextLen; ++pos, ++tokenPos) { if (tokenText[tokenPos] != text[pos]) return false; } return (text[pos] == UTF8Base::UNICODE_TERMINATOR); } void TermsHashPerField::start(FieldablePtr field) { termAtt = fieldState->attributeSource->addAttribute(); consumer->start(field); if (nextPerField) nextPerField->start(field); } bool TermsHashPerField::start(Collection fields, int32_t count) { doCall = consumer->start(fields, count); if (nextPerField) doNextCall = nextPerField->start(fields, count); return (doCall || doNextCall); } void TermsHashPerField::add(int32_t textStart) { // Secondary entry point (for 2nd and subsequent TermsHash), we hash by textStart int32_t code = textStart; int32_t hashPos = (code & postingsHashMask); BOOST_ASSERT(!postingsCompacted); // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p && p->textStart != textStart) { // Conflict: keep searching different locations in the hash table. int32_t inc = (((code >> 8) + code) | 1); do { code += inc; hashPos = (code & postingsHashMask); p = postingsHash[hashPos]; } while (p && p->textStart != textStart); } if (!p) { // First time we are seeing this token since we last flushed the hash. TermsHashPerThreadPtr perThread(_perThread); // Refill? if (perThread->freePostingsCount == 0) perThread->morePostings(); // Pull next free RawPostingList from free list p = perThread->freePostings[--perThread->freePostingsCount]; BOOST_ASSERT(p); p->textStart = textStart; BOOST_ASSERT(!postingsHash[hashPos]); postingsHash[hashPos] = p; ++numPostings; if (numPostings == postingsHashHalfSize) rehashPostings(2 * postingsHashSize); // Init stream slices if (numPostingInt + intPool->intUpto > DocumentsWriter::INT_BLOCK_SIZE) intPool->nextBuffer(); if (DocumentsWriter::BYTE_BLOCK_SIZE - bytePool->byteUpto < numPostingInt * ByteBlockPool::FIRST_LEVEL_SIZE()) bytePool->nextBuffer(); intUptos = intPool->buffer; intUptoStart = intPool->intUpto; intPool->intUpto += streamCount; p->intStart = intUptoStart + intPool->intOffset; for (int32_t i = 0; i < streamCount; ++i) { int32_t upto = bytePool->newSlice(ByteBlockPool::FIRST_LEVEL_SIZE()); intUptos[intUptoStart + i] = upto + bytePool->byteOffset; } p->byteStart = intUptos[intUptoStart]; consumer->newTerm(p); } else { intUptos = intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]; intUptoStart = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); consumer->addTerm(p); } } void TermsHashPerField::add() { BOOST_ASSERT(!postingsCompacted); // Get the text of this term. wchar_t* tokenText = termAtt->termBufferArray(); int32_t tokenTextLen = termAtt->termLength(); // Compute hashcode and replace any invalid UTF16 sequences int32_t downto = tokenTextLen; int32_t code = 0; while (downto > 0) { wchar_t ch = tokenText[--downto]; #ifdef LPP_UNICODE_CHAR_SIZE_2 if (ch >= UTF8Base::TRAIL_SURROGATE_MIN && ch <= UTF8Base::TRAIL_SURROGATE_MAX) { if (downto == 0) { // Unpaired ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; tokenText[downto] = ch; } else { wchar_t ch2 = tokenText[downto - 1]; if (ch2 >= UTF8Base::LEAD_SURROGATE_MIN && ch2 <= UTF8Base::LEAD_SURROGATE_MAX) { // OK: high followed by low. This is a valid surrogate pair. code = ((code * 31) + ch) * 31 + ch2; --downto; continue; } else { // Unpaired ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; tokenText[downto] = ch; } } } else if (ch >= UTF8Base::LEAD_SURROGATE_MIN && (ch <= UTF8Base::LEAD_SURROGATE_MAX || ch == UTF8Base::UNICODE_TERMINATOR)) { // Unpaired or UTF8Base::UNICODE_TERMINATOR ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; tokenText[downto] = ch; } #else if (ch == UTF8Base::UNICODE_TERMINATOR) { // Unpaired or UTF8Base::UNICODE_TERMINATOR ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; tokenText[downto] = ch; } #endif code = (code * 31) + ch; } int32_t hashPos = (code & postingsHashMask); // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p && !postingEquals(tokenText, tokenTextLen)) { // Conflict: keep searching different locations in the hash table. int32_t inc = (((code >> 8) + code) | 1); do { code += inc; hashPos = (code & postingsHashMask); p = postingsHash[hashPos]; } while (p && !postingEquals(tokenText, tokenTextLen)); } if (!p) { // First time we are seeing this token since we last flushed the hash. int32_t textLen1 = 1 + tokenTextLen; if (textLen1 + charPool->charUpto > DocumentsWriter::CHAR_BLOCK_SIZE) { if (textLen1 > DocumentsWriter::CHAR_BLOCK_SIZE) { // Just skip this term, to remain as robust as possible during indexing. A TokenFilter // can be inserted into the analyzer chain if other behavior is wanted (pruning the term // to a prefix, throwing an exception, etc). if (docState->maxTermPrefix.empty()) docState->maxTermPrefix.append(tokenText, std::min((int32_t)30, tokenTextLen)); consumer->skippingLongTerm(); return; } charPool->nextBuffer(); } TermsHashPerThreadPtr perThread(_perThread); // Refill? if (perThread->freePostingsCount == 0) perThread->morePostings(); // Pull next free RawPostingList from free list p = perThread->freePostings[--perThread->freePostingsCount]; BOOST_ASSERT(p); wchar_t* text = charPool->buffer.get(); int32_t textUpto = charPool->charUpto; p->textStart = textUpto + charPool->charOffset; charPool->charUpto += textLen1; MiscUtils::arrayCopy(tokenText, 0, text, textUpto, tokenTextLen); text[textUpto + tokenTextLen] = UTF8Base::UNICODE_TERMINATOR; BOOST_ASSERT(!postingsHash[hashPos]); postingsHash[hashPos] = p; ++numPostings; if (numPostings == postingsHashHalfSize) rehashPostings(2 * postingsHashSize); // Init stream slices if (numPostingInt + intPool->intUpto > DocumentsWriter::INT_BLOCK_SIZE) intPool->nextBuffer(); if (DocumentsWriter::BYTE_BLOCK_SIZE - bytePool->byteUpto < numPostingInt * ByteBlockPool::FIRST_LEVEL_SIZE()) bytePool->nextBuffer(); intUptos = intPool->buffer; intUptoStart = intPool->intUpto; intPool->intUpto += streamCount; p->intStart = intUptoStart + intPool->intOffset; for (int32_t i = 0; i < streamCount; ++i) { int32_t upto = bytePool->newSlice(ByteBlockPool::FIRST_LEVEL_SIZE()); intUptos[intUptoStart + i] = upto + bytePool->byteOffset; } p->byteStart = intUptos[intUptoStart]; consumer->newTerm(p); } else { intUptos = intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]; intUptoStart = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); consumer->addTerm(p); } if (doNextCall) nextPerField->add(p->textStart); } void TermsHashPerField::writeByte(int32_t stream, int8_t b) { int32_t upto = intUptos[intUptoStart + stream]; ByteArray bytes(bytePool->buffers[upto >> DocumentsWriter::BYTE_BLOCK_SHIFT]); BOOST_ASSERT(bytes); int32_t offset = (upto & DocumentsWriter::BYTE_BLOCK_MASK); if (bytes[offset] != 0) { // End of slice; allocate a new one offset = bytePool->allocSlice(bytes, offset); bytes = bytePool->buffer; intUptos[intUptoStart + stream] = offset + bytePool->byteOffset; } bytes[offset] = b; intUptos[intUptoStart + stream]++; } void TermsHashPerField::writeBytes(int32_t stream, const uint8_t* b, int32_t offset, int32_t length) { int32_t end = offset + length; for (int32_t i = offset; i < end; ++i) writeByte(stream, b[i]); } void TermsHashPerField::writeVInt(int32_t stream, int32_t i) { BOOST_ASSERT(stream < streamCount); while ((i & ~0x7f) != 0) { writeByte(stream, (uint8_t)((i & 0x7f) | 0x80)); i = MiscUtils::unsignedShift(i, 7); } writeByte(stream, (uint8_t)i); } void TermsHashPerField::finish() { consumer->finish(); if (nextPerField) nextPerField->finish(); } void TermsHashPerField::rehashPostings(int32_t newSize) { int32_t newMask = newSize - 1; Collection newHash(Collection::newInstance(newSize)); TermsHashPerThreadPtr perThread(_perThread); for (int32_t i = 0; i < postingsHashSize; ++i) { RawPostingListPtr p0(postingsHash[i]); if (p0) { int32_t code; if (perThread->primary) { int32_t start = (p0->textStart & DocumentsWriter::CHAR_BLOCK_MASK); CharArray text = charPool->buffers[p0->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]; int32_t pos = start; while (text[pos] != UTF8Base::UNICODE_TERMINATOR) ++pos; code = 0; while (pos > start) code = (code * 31) + text[--pos]; } else code = p0->textStart; int32_t hashPos = (code & newMask); BOOST_ASSERT(hashPos >= 0); if (newHash[hashPos]) { int32_t inc = (((code >> 8) + code) | 1); do { code += inc; hashPos = (code & newMask); } while (newHash[hashPos]); } newHash[hashPos] = p0; } } postingsHashMask = newMask; postingsHash = newHash; postingsHashSize = newSize; postingsHashHalfSize = (newSize >> 1); } } LucenePlusPlus-rel_3.0.4/src/core/index/TermsHashPerThread.cpp000066400000000000000000000102221217574114600243370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashPerThread.h" #include "TermsHashPerField.h" #include "DocInverterPerThread.h" #include "TermsHash.h" #include "TermsHashConsumer.h" #include "TermsHashConsumerPerThread.h" #include "CharBlockPool.h" #include "IntBlockPool.h" #include "DocumentsWriter.h" namespace Lucene { TermsHashPerThread::TermsHashPerThread(DocInverterPerThreadPtr docInverterPerThread, TermsHashPtr termsHash, TermsHashPtr nextTermsHash, TermsHashPerThreadPtr primaryPerThread) { this->freePostings = Collection::newInstance(256); this->freePostingsCount = 0; this->primary = false; this->_docInverterPerThread = docInverterPerThread; this->_termsHash = termsHash; this->nextTermsHash = nextTermsHash; this->_primaryPerThread = primaryPerThread; } TermsHashPerThread::~TermsHashPerThread() { } void TermsHashPerThread::initialize() { DocInverterPerThreadPtr docInverterPerThread(_docInverterPerThread); TermsHashPtr termsHash(_termsHash); docState = docInverterPerThread->docState; consumer = termsHash->consumer->addThread(shared_from_this()); if (nextTermsHash) { // We are primary charPool = newLucene(DocumentsWriterPtr(termsHash->_docWriter)); primary = true; } else { charPool = TermsHashPerThreadPtr(_primaryPerThread)->charPool; primary = false; } intPool = newLucene(DocumentsWriterPtr(termsHash->_docWriter), termsHash->trackAllocations); bytePool = newLucene(DocumentsWriterPtr(termsHash->_docWriter)->byteBlockAllocator, termsHash->trackAllocations); if (nextTermsHash) nextPerThread = nextTermsHash->addThread(docInverterPerThread, shared_from_this()); } InvertedDocConsumerPerFieldPtr TermsHashPerThread::addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo) { return newLucene(docInverterPerField, shared_from_this(), nextPerThread, fieldInfo); } void TermsHashPerThread::abort() { SyncLock syncLock(this); reset(true); consumer->abort(); if (nextPerThread) nextPerThread->abort(); } void TermsHashPerThread::morePostings() { BOOST_ASSERT(freePostingsCount == 0); TermsHashPtr(_termsHash)->getPostings(freePostings); freePostingsCount = freePostings.size(); BOOST_ASSERT(noNullPostings(freePostings, freePostingsCount, L"consumer=" + consumer->toString())); } bool TermsHashPerThread::noNullPostings(Collection postings, int32_t count, const String& details) { for (int32_t i = 0; i < count; ++i) { BOOST_ASSERT(postings[i]); } return true; } void TermsHashPerThread::startDocument() { consumer->startDocument(); if (nextPerThread) nextPerThread->consumer->startDocument(); } DocWriterPtr TermsHashPerThread::finishDocument() { DocWriterPtr doc(consumer->finishDocument()); DocWriterPtr doc2(nextPerThread ? nextPerThread->consumer->finishDocument() : DocWriterPtr()); if (!doc) return doc2; else { doc->setNext(doc2); return doc; } } void TermsHashPerThread::reset(bool recyclePostings) { intPool->reset(); bytePool->reset(); if (primary) charPool->reset(); if (recyclePostings) { TermsHashPtr(_termsHash)->recyclePostings(freePostings, freePostingsCount); freePostingsCount = 0; } } } LucenePlusPlus-rel_3.0.4/src/core/msvc/000077500000000000000000000000001217574114600200025ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/msvc/LuceneInc.cpp000066400000000000000000000005521217574114600223550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" LucenePlusPlus-rel_3.0.4/src/core/msvc/dllmain.cpp000066400000000000000000000013121217574114600221230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #if defined(_WIN32) && defined(LPP_HAVE_DLL) BOOL APIENTRY DllMain(HMODULE module, DWORD ul_reason_for_call, LPVOID lpReserved) { switch (ul_reason_for_call) { case DLL_PROCESS_ATTACH: case DLL_THREAD_ATTACH: case DLL_THREAD_DETACH: case DLL_PROCESS_DETACH: break; } return TRUE; } #endif LucenePlusPlus-rel_3.0.4/src/core/msvc/lucene++.vcproj000066400000000000000000002270441217574114600226410ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/core/queryparser/000077500000000000000000000000001217574114600214145ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/queryparser/FastCharStream.cpp000066400000000000000000000061171217574114600247740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FastCharStream.h" #include "Reader.h" #include "MiscUtils.h" namespace Lucene { FastCharStream::FastCharStream(ReaderPtr reader) { input = reader; bufferLength = 0; bufferPosition = 0; tokenStart = 0; bufferStart = 0; } FastCharStream::~FastCharStream() { } wchar_t FastCharStream::readChar() { if (bufferPosition >= bufferLength) refill(); return buffer[bufferPosition++]; } void FastCharStream::refill() { int32_t newPosition = bufferLength - tokenStart; if (tokenStart == 0) // token won't fit in buffer { if (!buffer) buffer = CharArray::newInstance(2048); else if (bufferLength == buffer.size()) // grow buffer buffer.resize(buffer.size() * 2); } else // shift token to front MiscUtils::arrayCopy(buffer.get(), tokenStart, buffer.get(), 0, newPosition); bufferLength = newPosition; // update state bufferPosition = newPosition; bufferStart += tokenStart; tokenStart = 0; int32_t charsRead = input->read(buffer.get(), newPosition, buffer.size() - newPosition); // fill space in buffer if (charsRead == -1) boost::throw_exception(IOException(L"read past eof")); else bufferLength += charsRead; } wchar_t FastCharStream::BeginToken() { tokenStart = bufferPosition; return readChar(); } void FastCharStream::backup(int32_t amount) { bufferPosition -= amount; } String FastCharStream::GetImage() { return String(buffer.get() + tokenStart, bufferPosition - tokenStart); } CharArray FastCharStream::GetSuffix(int32_t length) { CharArray value(CharArray::newInstance(length)); MiscUtils::arrayCopy(buffer.get(), bufferPosition - length, value.get(), 0, length); return value; } void FastCharStream::Done() { try { input->close(); } catch (IOException&) { // ignore IO exceptions } } int32_t FastCharStream::getColumn() { return bufferStart + bufferPosition; } int32_t FastCharStream::getLine() { return 1; } int32_t FastCharStream::getEndColumn() { return bufferStart + bufferPosition; } int32_t FastCharStream::getEndLine() { return 1; } int32_t FastCharStream::getBeginColumn() { return bufferStart + tokenStart; } int32_t FastCharStream::getBeginLine() { return 1; } } LucenePlusPlus-rel_3.0.4/src/core/queryparser/MultiFieldQueryParser.cpp000066400000000000000000000175361217574114600263750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiFieldQueryParser.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "PhraseQuery.h" #include "MultiPhraseQuery.h" #include "MiscUtils.h" namespace Lucene { MultiFieldQueryParser::MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, AnalyzerPtr analyzer, MapStringDouble boosts) : QueryParser(matchVersion, L"", analyzer) { this->boosts = boosts; this->fields = fields; } MultiFieldQueryParser::MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, AnalyzerPtr analyzer) : QueryParser(matchVersion, L"", analyzer) { this->fields = fields; } MultiFieldQueryParser::~MultiFieldQueryParser() { } QueryPtr MultiFieldQueryParser::getFieldQuery(const String& field, const String& queryText, int32_t slop) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { QueryPtr query(QueryParser::getFieldQuery(*field, queryText)); if (query) { // If the user passes a map of boosts if (boosts) { // Get the boost from the map and apply them MapStringDouble::iterator boost = boosts.find(*field); if (boost != boosts.end()) query->setBoost(boost->second); } applySlop(query, slop); clauses.add(newLucene(query, BooleanClause::SHOULD)); } } if (clauses.empty()) // happens for stopwords return QueryPtr(); return getBooleanQuery(clauses, true); } QueryPtr query(QueryParser::getFieldQuery(field, queryText)); applySlop(query, slop); return query; } QueryPtr MultiFieldQueryParser::getFieldQuery(const String& field, const String& queryText) { return getFieldQuery(field, queryText, 0); } void MultiFieldQueryParser::applySlop(QueryPtr query, int32_t slop) { if (MiscUtils::typeOf(query)) boost::dynamic_pointer_cast(query)->setSlop(slop); if (MiscUtils::typeOf(query)) boost::dynamic_pointer_cast(query)->setSlop(slop); } QueryPtr MultiFieldQueryParser::getFuzzyQuery(const String& field, const String& termStr, double minSimilarity) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) clauses.add(newLucene(getFuzzyQuery(*field, termStr, minSimilarity), BooleanClause::SHOULD)); return getBooleanQuery(clauses, true); } return QueryParser::getFuzzyQuery(field, termStr, minSimilarity); } QueryPtr MultiFieldQueryParser::getPrefixQuery(const String& field, const String& termStr) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) clauses.add(newLucene(getPrefixQuery(*field, termStr), BooleanClause::SHOULD)); return getBooleanQuery(clauses, true); } return QueryParser::getPrefixQuery(field, termStr); } QueryPtr MultiFieldQueryParser::getWildcardQuery(const String& field, const String& termStr) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) clauses.add(newLucene(getWildcardQuery(*field, termStr), BooleanClause::SHOULD)); return getBooleanQuery(clauses, true); } return QueryParser::getWildcardQuery(field, termStr); } QueryPtr MultiFieldQueryParser::getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) clauses.add(newLucene(getRangeQuery(*field, part1, part2, inclusive), BooleanClause::SHOULD)); return getBooleanQuery(clauses, true); } return QueryParser::getRangeQuery(field, part1, part2, inclusive); } QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, AnalyzerPtr analyzer) { if (queries.size() != fields.size()) boost::throw_exception(IllegalArgumentException(L"queries.size() != fields.size()")); BooleanQueryPtr booleanQuery(newLucene()); for (int32_t i = 0; i < fields.size(); ++i) { QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); QueryPtr query(queryParser->parse(queries[i])); if (query && (!MiscUtils::typeOf(query) || !boost::dynamic_pointer_cast(query)->getClauses().empty())) booleanQuery->add(query, BooleanClause::SHOULD); } return booleanQuery; } QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, const String& query, Collection fields, Collection flags, AnalyzerPtr analyzer) { if (fields.size() != flags.size()) boost::throw_exception(IllegalArgumentException(L"fields.size() != flags.size()")); BooleanQueryPtr booleanQuery(newLucene()); for (int32_t i = 0; i < fields.size(); ++i) { QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); QueryPtr q(queryParser->parse(query)); if (q && (!MiscUtils::typeOf(q) || !boost::dynamic_pointer_cast(q)->getClauses().empty())) booleanQuery->add(q, flags[i]); } return booleanQuery; } QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, Collection flags, AnalyzerPtr analyzer) { if (queries.size() != fields.size() || fields.size() != flags.size()) boost::throw_exception(IllegalArgumentException(L"queries, fields, and flags array have have different length")); BooleanQueryPtr booleanQuery(newLucene()); for (int32_t i = 0; i < fields.size(); ++i) { QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); QueryPtr query(queryParser->parse(queries[i])); if (query && (!MiscUtils::typeOf(query) || !boost::dynamic_pointer_cast(query)->getClauses().empty())) booleanQuery->add(query, flags[i]); } return booleanQuery; } } LucenePlusPlus-rel_3.0.4/src/core/queryparser/QueryParseError.cpp000066400000000000000000000103111217574114600252260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParseError.h" #include "QueryParserToken.h" #include "StringUtils.h" namespace Lucene { QueryParseError::~QueryParseError() { } String QueryParseError::lexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, const String& errorAfter, wchar_t curChar) { StringStream buffer; buffer << L"Lexical error at line " << errorLine << L", column " << errorColumn + L". Encountered:"; if (EOFSeen) buffer << L""; else buffer << L"\"" << addEscapes(String(1, curChar)) << L"\""; buffer << L" (" + (int32_t)curChar << L"), after : \"" << addEscapes(errorAfter) + L"\""; return buffer.str(); } String QueryParseError::parseError(QueryParserTokenPtr currentToken, Collection< Collection > expectedTokenSequences, Collection tokenImage) { StringStream expected; int32_t maxSize = 0; for (int32_t i = 0; i < expectedTokenSequences.size(); ++i) { if (maxSize < expectedTokenSequences[i].size()) maxSize = expectedTokenSequences[i].size(); for (int32_t j = 0; j < expectedTokenSequences[i].size(); ++j) expected << tokenImage[expectedTokenSequences[i][j]] << L" "; if (expectedTokenSequences[i][expectedTokenSequences[i].size() - 1] != 0) expected << L"..."; expected << L"\n "; } StringStream retval; retval << L"Encountered \""; QueryParserTokenPtr token(currentToken->next); for (int32_t i = 0; i < maxSize; ++i) { if (i != 0) retval << L" "; if (token->kind == 0) { retval << tokenImage[0]; break; } retval << L" " << tokenImage[token->kind] << L" \"" << addEscapes(token->image) << L" \""; token = token->next; } retval << L"\" at line " << currentToken->next->beginLine << L", column " << currentToken->next->beginColumn; retval << L".\n"; if (expectedTokenSequences.size() == 1) retval << L"Was expecting:\n "; else retval << L"Was expecting one of:\n "; retval << expected.str(); return retval.str(); } String QueryParseError::addEscapes(const String& str) { StringStream buffer; for (String::const_iterator ch = str.begin(); ch != str.end(); ++ch) { switch (*ch) { case L'\0': continue; case L'\b': buffer << L"\\b"; continue; case L'\t': buffer << L"\\t"; continue; case L'\n': buffer << L"\\n"; continue; case L'\f': buffer << L"\\f"; continue; case L'\r': buffer << L"\\r"; continue; case L'\"': buffer << L"\\\""; continue; case L'\'': buffer << L"\\\'"; continue; case L'\\': buffer << L"\\\\"; continue; default: if (*ch < 0x20 || *ch > 0x7e) { String hexChar(L"0000" + StringUtils::toString(*ch, 16)); buffer << L"\\u" + hexChar.substr(hexChar.length() - 4); } else buffer << *ch; continue; } } return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/queryparser/QueryParser.cpp000066400000000000000000001500001217574114600243760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "QueryParser.h" #include "QueryParserTokenManager.h" #include "QueryParserToken.h" #include "QueryParseError.h" #include "MultiTermQuery.h" #include "TermQuery.h" #include "TermRangeQuery.h" #include "FuzzyQuery.h" #include "FastCharStream.h" #include "StringReader.h" #include "BooleanQuery.h" #include "CachingTokenFilter.h" #include "TermAttribute.h" #include "Term.h" #include "PositionIncrementAttribute.h" #include "PhraseQuery.h" #include "MultiPhraseQuery.h" #include "PrefixQuery.h" #include "WildcardQuery.h" #include "MatchAllDocsQuery.h" #include "SimpleAnalyzer.h" #include "DateField.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t QueryParser::CONJ_NONE = 0; const int32_t QueryParser::CONJ_AND = 1; const int32_t QueryParser::CONJ_OR = 2; const int32_t QueryParser::MOD_NONE = 0; const int32_t QueryParser::MOD_NOT = 10; const int32_t QueryParser::MOD_REQ = 11; const int32_t QueryParser::jj_la1_0[] = { 0x300, 0x300, 0x1c00, 0x1c00, 0x3ed3f00, 0x90000, 0x20000, 0x3ed2000, 0x2690000, 0x100000, 0x100000, 0x20000, 0x30000000, 0x4000000, 0x30000000, 0x20000, 0x0, 0x40000000, 0x0, 0x20000, 0x100000, 0x20000, 0x3ed0000 }; const int32_t QueryParser::jj_la1_1[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0 }; QueryParser::QueryParser(LuceneVersion::Version matchVersion, const String& field, AnalyzerPtr analyzer) { ConstructParser(newLucene(newLucene(L"")), QueryParserTokenManagerPtr()); this->analyzer = analyzer; this->field = field; this->enablePositionIncrements = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_29); } QueryParser::QueryParser(QueryParserCharStreamPtr stream) { ConstructParser(stream, QueryParserTokenManagerPtr()); } QueryParser::QueryParser(QueryParserTokenManagerPtr tokenMgr) { ConstructParser(QueryParserCharStreamPtr(), tokenMgr); } QueryParser::~QueryParser() { } void QueryParser::ConstructParser(QueryParserCharStreamPtr stream, QueryParserTokenManagerPtr tokenMgr) { _operator = OR_OPERATOR; lowercaseExpandedTerms = true; multiTermRewriteMethod = MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); allowLeadingWildcard = false; enablePositionIncrements = true; phraseSlop = 0; fuzzyMinSim = FuzzyQuery::defaultMinSimilarity(); fuzzyPrefixLength = FuzzyQuery::defaultPrefixLength; locale = std::locale(); dateResolution = DateTools::RESOLUTION_NULL; token_source = tokenMgr ? tokenMgr : newLucene(stream); token = newLucene(); _jj_ntk = -1; jj_la = 0; jj_gen = 0; jj_rescan = false; jj_gc = 0; jj_la1 = Collection::newInstance(23); jj_2_rtns = Collection::newInstance(1); for (int32_t i = 0; i < 23; ++i) jj_la1[i] = -1; for (int32_t i = 0; i < jj_2_rtns.size(); ++i) jj_2_rtns[i] = newInstance(); jj_expentries = Collection< Collection >::newInstance(); jj_kind = -1; jj_lasttokens = Collection::newInstance(100); jj_endpos = 0; } QueryPtr QueryParser::parse(const String& query) { ReInit(newLucene(newLucene(query))); try { // TopLevelQuery is a Query followed by the end-of-input (EOF) QueryPtr res(TopLevelQuery(field)); return res ? res : newBooleanQuery(false); } catch (QueryParserError& e) { boost::throw_exception(QueryParserError(L"Cannot parse '" + query + L"': " + e.getError())); } catch (TooManyClausesException&) { boost::throw_exception(QueryParserError(L"Cannot parse '" + query + L"': too many boolean clauses")); } return QueryPtr(); } AnalyzerPtr QueryParser::getAnalyzer() { return analyzer; } String QueryParser::getField() { return field; } double QueryParser::getFuzzyMinSim() { return fuzzyMinSim; } void QueryParser::setFuzzyMinSim(double fuzzyMinSim) { this->fuzzyMinSim = fuzzyMinSim; } int32_t QueryParser::getFuzzyPrefixLength() { return fuzzyPrefixLength; } void QueryParser::setFuzzyPrefixLength(int32_t fuzzyPrefixLength) { this->fuzzyPrefixLength = fuzzyPrefixLength; } void QueryParser::setPhraseSlop(int32_t phraseSlop) { this->phraseSlop = phraseSlop; } int32_t QueryParser::getPhraseSlop() { return phraseSlop; } void QueryParser::setAllowLeadingWildcard(bool allowLeadingWildcard) { this->allowLeadingWildcard = allowLeadingWildcard; } bool QueryParser::getAllowLeadingWildcard() { return allowLeadingWildcard; } void QueryParser::setEnablePositionIncrements(bool enable) { this->enablePositionIncrements = enable; } bool QueryParser::getEnablePositionIncrements() { return enablePositionIncrements; } void QueryParser::setDefaultOperator(Operator op) { this->_operator = op; } QueryParser::Operator QueryParser::getDefaultOperator() { return _operator; } void QueryParser::setLowercaseExpandedTerms(bool lowercaseExpandedTerms) { this->lowercaseExpandedTerms = lowercaseExpandedTerms; } bool QueryParser::getLowercaseExpandedTerms() { return lowercaseExpandedTerms; } void QueryParser::setMultiTermRewriteMethod(RewriteMethodPtr method) { multiTermRewriteMethod = method; } RewriteMethodPtr QueryParser::getMultiTermRewriteMethod() { return multiTermRewriteMethod; } void QueryParser::setLocale(std::locale locale) { this->locale = locale; } std::locale QueryParser::getLocale() { return locale; } void QueryParser::setDateResolution(DateTools::Resolution dateResolution) { this->dateResolution = dateResolution; } void QueryParser::setDateResolution(const String& fieldName, DateTools::Resolution dateResolution) { if (fieldName.empty()) boost::throw_exception(IllegalArgumentException(L"Field cannot be empty.")); if (!fieldToDateResolution) { // lazily initialize Map fieldToDateResolution = MapStringResolution::newInstance(); } fieldToDateResolution.put(fieldName, dateResolution); } DateTools::Resolution QueryParser::getDateResolution(const String& fieldName) { if (fieldName.empty()) boost::throw_exception(IllegalArgumentException(L"Field cannot be empty.")); if (!fieldToDateResolution) { // no field specific date resolutions set; return default date resolution instead return this->dateResolution; } MapStringResolution::iterator resolution = fieldToDateResolution.find(fieldName); if (resolution == fieldToDateResolution.end()) { // no date resolutions set for the given field; return default date resolution instead return this->dateResolution; } return resolution->second; } void QueryParser::setRangeCollator(CollatorPtr rc) { rangeCollator = rc; } CollatorPtr QueryParser::getRangeCollator() { return rangeCollator; } void QueryParser::addClause(Collection clauses, int32_t conj, int32_t mods, QueryPtr q) { bool required = false; bool prohibited = false; // If this term is introduced by AND, make the preceding term required, unless it's already prohibited if (!clauses.empty() && conj == CONJ_AND) { BooleanClausePtr c(clauses[clauses.size() - 1]); if (!c->isProhibited()) c->setOccur(BooleanClause::MUST); } if (!clauses.empty() && _operator == AND_OPERATOR && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, unless it's prohibited (that // means we leave -a OR b but +a OR b-->a OR b) notice if the input is a OR b, first term is parsed // as required; without this modification a OR b would parsed as +a OR b BooleanClausePtr c(clauses[clauses.size() - 1]); if (!c->isProhibited()) c->setOccur(BooleanClause::SHOULD); } // We might have been passed a null query; the term might have been filtered away by the analyzer. if (!q) return; if (_operator == OR_OPERATOR) { // We set REQUIRED if we're introduced by AND or +; PROHIBITED if introduced by NOT or -; make // sure not to set both. prohibited = (mods == MOD_NOT); required = (mods == MOD_REQ); if (conj == CONJ_AND && !prohibited) required = true; } else { // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED if not PROHIBITED and not // introduced by OR prohibited = (mods == MOD_NOT); required = (!prohibited && conj != CONJ_OR); } if (required && !prohibited) clauses.add(newBooleanClause(q, BooleanClause::MUST)); else if (!required && !prohibited) clauses.add(newBooleanClause(q, BooleanClause::SHOULD)); else if (!required && prohibited) clauses.add(newBooleanClause(q, BooleanClause::MUST_NOT)); else boost::throw_exception(RuntimeException(L"Clause cannot be both required and prohibited")); } QueryPtr QueryParser::getFieldQuery(const String& field, const String& queryText) { TokenStreamPtr source; try { source = analyzer->reusableTokenStream(field, newLucene(queryText)); source->reset(); } catch (IOException&) { source = analyzer->tokenStream(field, newLucene(queryText)); } CachingTokenFilterPtr buffer(newLucene(source)); TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; int32_t numTokens = 0; bool success = false; try { buffer->reset(); success = true; } catch (IOException&) { // success == false if we hit an exception } if (success) { if (buffer->hasAttribute()) termAtt = buffer->getAttribute(); if (buffer->hasAttribute()) posIncrAtt = buffer->getAttribute(); } int32_t positionCount = 0; bool severalTokensAtSamePosition = false; bool hasMoreTokens = false; if (termAtt) { try { hasMoreTokens = buffer->incrementToken(); while (hasMoreTokens) { ++numTokens; int32_t positionIncrement = posIncrAtt ? posIncrAtt->getPositionIncrement() : 1; if (positionIncrement != 0) positionCount += positionIncrement; else severalTokensAtSamePosition = true; hasMoreTokens = buffer->incrementToken(); } } catch (IOException&) { // ignore } } try { // rewind the buffer stream buffer->reset(); // close original stream - all tokens buffered source->close(); } catch (IOException&) { // ignore } if (numTokens == 0) return QueryPtr(); else if (numTokens == 1) { String term; try { bool hasNext = buffer->incrementToken(); BOOST_ASSERT(hasNext); term = termAtt->term(); } catch (IOException&) { // safe to ignore, because we know the number of tokens } return newTermQuery(newLucene(field, term)); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query BooleanQueryPtr q(newBooleanQuery(true)); for (int32_t i = 0; i < numTokens; ++i) { String term; try { bool hasNext = buffer->incrementToken(); BOOST_ASSERT(hasNext); term = termAtt->term(); } catch (IOException&) { // safe to ignore, because we know the number of tokens } QueryPtr currentQuery(newTermQuery(newLucene(field, term))); q->add(currentQuery, BooleanClause::SHOULD); } return q; } else { // phrase query MultiPhraseQueryPtr mpq(newMultiPhraseQuery()); mpq->setSlop(phraseSlop); Collection multiTerms(Collection::newInstance()); int32_t position = -1; for (int32_t i = 0; i < numTokens; ++i) { String term; int32_t positionIncrement = 1; try { bool hasNext = buffer->incrementToken(); BOOST_ASSERT(hasNext); term = termAtt->term(); if (posIncrAtt) positionIncrement = posIncrAtt->getPositionIncrement(); } catch (IOException&) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && !multiTerms.empty()) { if (enablePositionIncrements) mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end()), position); else mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end())); multiTerms.clear(); } position += positionIncrement; multiTerms.add(newLucene(field, term)); } if (enablePositionIncrements) mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end()), position); else mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end())); return mpq; } } else { PhraseQueryPtr pq(newPhraseQuery()); pq->setSlop(phraseSlop); int32_t position = -1; for (int32_t i = 0; i < numTokens; ++i) { String term; int32_t positionIncrement = 1; try { bool hasNext = buffer->incrementToken(); BOOST_ASSERT(hasNext); term = termAtt->term(); if (posIncrAtt) positionIncrement = posIncrAtt->getPositionIncrement(); } catch (IOException&) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq->add(newLucene(field, term), position); } else pq->add(newLucene(field, term)); } return pq; } } } QueryPtr QueryParser::getFieldQuery(const String& field, const String& queryText, int32_t slop) { QueryPtr query(getFieldQuery(field, queryText)); if (MiscUtils::typeOf(query)) boost::dynamic_pointer_cast(query)->setSlop(slop); if (MiscUtils::typeOf(query)) boost::dynamic_pointer_cast(query)->setSlop(slop); return query; } QueryPtr QueryParser::getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) { String date1(part1); String date2(part2); if (lowercaseExpandedTerms) { StringUtils::toLower(date1); StringUtils::toLower(date2); } try { boost::posix_time::ptime d1(DateTools::parseDate(date1, locale)); boost::posix_time::ptime d2; // The user can only specify the date, not the time, so make sure the time is set to // the latest possible time of that date to really include all documents if (inclusive) { d2 = boost::posix_time::ptime(DateTools::parseDate(date2, locale) + boost::posix_time::hours(23) + boost::posix_time::minutes(59) + boost::posix_time::seconds(59) + boost::posix_time::millisec(999)); } else d2 = boost::posix_time::ptime(DateTools::parseDate(date2, locale)); DateTools::Resolution resolution = getDateResolution(field); if (resolution == DateTools::RESOLUTION_NULL) { // no default or field specific date resolution has been set, use deprecated // DateField to maintain compatibility with pre-1.9 Lucene versions. date1 = DateField::dateToString(d1); date2 = DateField::dateToString(d2); } else { date1 = DateTools::dateToString(d1, resolution); date2 = DateTools::dateToString(d2, resolution); } } catch (...) { } return newRangeQuery(field, date1, date2, inclusive); } BooleanQueryPtr QueryParser::newBooleanQuery(bool disableCoord) { return newLucene(disableCoord); } BooleanClausePtr QueryParser::newBooleanClause(QueryPtr q, BooleanClause::Occur occur) { return newLucene(q, occur); } QueryPtr QueryParser::newTermQuery(TermPtr term) { return newLucene(term); } PhraseQueryPtr QueryParser::newPhraseQuery() { return newLucene(); } MultiPhraseQueryPtr QueryParser::newMultiPhraseQuery() { return newLucene(); } QueryPtr QueryParser::newPrefixQuery(TermPtr prefix) { PrefixQueryPtr query(newLucene(prefix)); query->setRewriteMethod(multiTermRewriteMethod); return query; } QueryPtr QueryParser::newFuzzyQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength) { // FuzzyQuery doesn't yet allow constant score rewrite return newLucene(term, minimumSimilarity, prefixLength); } QueryPtr QueryParser::newRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) { TermRangeQueryPtr query(newLucene(field, part1, part2, inclusive, inclusive, rangeCollator)); query->setRewriteMethod(multiTermRewriteMethod); return query; } QueryPtr QueryParser::newMatchAllDocsQuery() { return newLucene(); } QueryPtr QueryParser::newWildcardQuery(TermPtr term) { WildcardQueryPtr query(newLucene(term)); query->setRewriteMethod(multiTermRewriteMethod); return query; } QueryPtr QueryParser::getBooleanQuery(Collection clauses) { return getBooleanQuery(clauses, false); } QueryPtr QueryParser::getBooleanQuery(Collection clauses, bool disableCoord) { if (clauses.empty()) return QueryPtr(); // all clause words were filtered away by the analyzer. BooleanQueryPtr query(newBooleanQuery(disableCoord)); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) query->add(*clause); return query; } QueryPtr QueryParser::getWildcardQuery(const String& field, const String& termStr) { if (field == L"*" && termStr == L"*") return newMatchAllDocsQuery(); if (!allowLeadingWildcard && (boost::starts_with(termStr, L"*") || boost::starts_with(termStr, L"?"))) boost::throw_exception(QueryParserError(L"'*' or '?' not allowed as first character in WildcardQuery")); String queryTerm(termStr); if (lowercaseExpandedTerms) StringUtils::toLower(queryTerm); TermPtr term(newLucene(field, queryTerm)); return newWildcardQuery(term); } QueryPtr QueryParser::getPrefixQuery(const String& field, const String& termStr) { if (!allowLeadingWildcard && boost::starts_with(termStr, L"*")) boost::throw_exception(QueryParserError(L"'*' not allowed as first character in PrefixQuery")); String queryTerm(termStr); if (lowercaseExpandedTerms) StringUtils::toLower(queryTerm); TermPtr term(newLucene(field, queryTerm)); return newPrefixQuery(term); } QueryPtr QueryParser::getFuzzyQuery(const String& field, const String& termStr, double minSimilarity) { String queryTerm(termStr); if (lowercaseExpandedTerms) StringUtils::toLower(queryTerm); TermPtr term(newLucene(field, queryTerm)); return newFuzzyQuery(term, minSimilarity, fuzzyPrefixLength); } String QueryParser::discardEscapeChar(const String& input) { // Create char array to hold unescaped char sequence CharArray output(CharArray::newInstance(input.length())); // The length of the output can be less than the input due to discarded escape chars. // This variable holds the actual length of the output int32_t length = 0; // We remember whether the last processed character was an escape character bool lastCharWasEscapeChar = false; // The multiplier the current unicode digit must be multiplied with. eg. the first digit must // be multiplied with 16^3, the second with 16^2 int32_t codePointMultiplier = 0; // Used to calculate the codepoint of the escaped unicode character int32_t codePoint = 0; for (int32_t i = 0; i < (int32_t)input.length(); ++i) { wchar_t curChar = input[i]; if (codePointMultiplier > 0) { codePoint += hexToInt(curChar) * codePointMultiplier; codePointMultiplier = MiscUtils::unsignedShift(codePointMultiplier, 4); if (codePointMultiplier == 0) { output[length++] = (wchar_t)codePoint; codePoint = 0; } } else if (lastCharWasEscapeChar) { if (curChar == L'u') { // found an escaped unicode character codePointMultiplier = 16 * 16 * 16; } else { // this character was escaped output[length++] = curChar; } lastCharWasEscapeChar = false; } else { if (curChar == L'\\') lastCharWasEscapeChar = true; else output[length++] = curChar; } } if (codePointMultiplier > 0) boost::throw_exception(QueryParserError(L"Truncated unicode escape sequence.")); if (lastCharWasEscapeChar) boost::throw_exception(QueryParserError(L"Term can not end with escape character.")); return String(output.get(), length); } int32_t QueryParser::hexToInt(wchar_t c) { if (L'0' <= c && c <= L'9') return c - L'0'; else if (L'a' <= c && c <= L'f') return c - L'a' + 10; else if (L'A' <= c && c <= L'F') return c - L'A' + 10; else { boost::throw_exception(QueryParserError(L"None-hex character in unicode escape sequence: " + StringUtils::toString(c))); return 0; } } String QueryParser::escape(const String& s) { StringStream buffer; for (int32_t i = 0; i < (int32_t)s.length(); ++i) { wchar_t c = s[i]; // These characters are part of the query syntax and must be escaped if (c == L'\\' || c == L'+' || c == L'-' || c == L'!' || c == L'(' || c == L')' || c == L':' || c == L'^' || c == L'[' || c == L']' || c == L'\"' || c == L'{' || c == L'}' || c == L'~' || c == L'*' || c == L'?' || c == L'|' || c == L'&') buffer << L"\\"; buffer << c; } return buffer.str(); } int QueryParser::main(Collection args) { if (args.empty()) { std::wcout << L"Usage: QueryParser "; return 1; } QueryParserPtr qp(newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene())); QueryPtr q(qp->parse(args[0])); std::wcout << q->toString(L"field"); return 0; } int32_t QueryParser::Conjunction() { int32_t ret = CONJ_NONE; switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case AND: case OR: switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case AND: jj_consume_token(AND); ret = CONJ_AND; break; case OR: jj_consume_token(OR); ret = CONJ_OR; break; default: jj_la1[0] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } break; default: jj_la1[1] = jj_gen; } return ret; } int32_t QueryParser::Modifiers() { int32_t ret = MOD_NONE; switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case NOT: case PLUS: case MINUS: switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case PLUS: jj_consume_token(PLUS); ret = MOD_REQ; break; case MINUS: jj_consume_token(MINUS); ret = MOD_NOT; break; case NOT: jj_consume_token(NOT); ret = MOD_NOT; break; default: jj_la1[2] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } break; default: jj_la1[3] = jj_gen; } return ret; } QueryPtr QueryParser::TopLevelQuery(const String& field) { QueryPtr q(ParseQuery(field)); jj_consume_token(0); return q; } QueryPtr QueryParser::ParseQuery(const String& field) { Collection clauses(Collection::newInstance()); QueryPtr firstQuery; int32_t mods = Modifiers(); QueryPtr q(ParseClause(field)); addClause(clauses, CONJ_NONE, mods, q); if (mods == MOD_NONE) firstQuery = q; for (bool more = true; more; ) { switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case AND: case OR: case NOT: case PLUS: case MINUS: case LPAREN: case STAR: case QUOTED: case TERM: case PREFIXTERM: case WILDTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: break; default: jj_la1[4] = jj_gen; more = false; continue; } int32_t conj = Conjunction(); mods = Modifiers(); q = ParseClause(field); addClause(clauses, conj, mods, q); } if (clauses.size() == 1 && firstQuery) return firstQuery; else return getBooleanQuery(clauses); } QueryPtr QueryParser::ParseClause(const String& field) { QueryPtr q; QueryParserTokenPtr fieldToken; QueryParserTokenPtr boost; String fieldClause(field); if (jj_2_1(2)) { switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case TERM: fieldToken = jj_consume_token(TERM); jj_consume_token(COLON); fieldClause = discardEscapeChar(fieldToken->image); break; case STAR: jj_consume_token(STAR); jj_consume_token(COLON); fieldClause = L"*"; break; default: jj_la1[5] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case STAR: case QUOTED: case TERM: case PREFIXTERM: case WILDTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: q = ParseTerm(fieldClause); break; case LPAREN: jj_consume_token(LPAREN); q = ParseQuery(fieldClause); jj_consume_token(RPAREN); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); break; default: jj_la1[6] = jj_gen; } break; default: jj_la1[7] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } if (boost) { double f = 1.0; try { if (q) { f = StringUtils::toDouble(boost->image); q->setBoost(f); } } catch (...) { } } return q; } QueryPtr QueryParser::ParseTerm(const String& field) { QueryParserTokenPtr term; QueryParserTokenPtr boost; QueryParserTokenPtr fuzzySlop; QueryParserTokenPtr goop1; QueryParserTokenPtr goop2; bool prefix = false; bool wildcard = false; bool fuzzy = false; QueryPtr q; switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case STAR: case TERM: case PREFIXTERM: case WILDTERM: case NUMBER: switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case TERM: term = jj_consume_token(TERM); break; case STAR: term = jj_consume_token(STAR); wildcard = true; break; case PREFIXTERM: term = jj_consume_token(PREFIXTERM); prefix = true; break; case WILDTERM: term = jj_consume_token(WILDTERM); wildcard = true; break; case NUMBER: term = jj_consume_token(NUMBER); break; default: jj_la1[8] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case FUZZY_SLOP: fuzzySlop = jj_consume_token(FUZZY_SLOP); fuzzy = true; break; default: jj_la1[9] = jj_gen; } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case FUZZY_SLOP: fuzzySlop = jj_consume_token(FUZZY_SLOP); fuzzy = true; break; default: jj_la1[10] = jj_gen; } break; default: jj_la1[11] = jj_gen; } { String termImage(discardEscapeChar(term->image)); if (wildcard) q = getWildcardQuery(field, termImage); else if (prefix) q = getPrefixQuery(field, discardEscapeChar(term->image.substr(0, term->image.length() - 1))); else if (fuzzy) { double fms = fuzzyMinSim; try { fms = StringUtils::toDouble(fuzzySlop->image.substr(1)); } catch (...) { } if (fms < 0.0 || fms > 1.0) boost::throw_exception(QueryParserError(L"Minimum similarity for a FuzzyQuery has to be between 0.0 and 1.0")); q = getFuzzyQuery(field, termImage, fms); } else q = getFieldQuery(field, termImage); } break; case RANGEIN_START: jj_consume_token(RANGEIN_START); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEIN_GOOP: goop1 = jj_consume_token(RANGEIN_GOOP); break; case RANGEIN_QUOTED: goop1 = jj_consume_token(RANGEIN_QUOTED); break; default: jj_la1[12] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEIN_TO: jj_consume_token(RANGEIN_TO); break; default: jj_la1[13] = jj_gen; } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEIN_GOOP: goop2 = jj_consume_token(RANGEIN_GOOP); break; case RANGEIN_QUOTED: goop2 = jj_consume_token(RANGEIN_QUOTED); break; default: jj_la1[14] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } jj_consume_token(RANGEIN_END); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); break; default: jj_la1[15] = jj_gen; } if (goop1->kind == RANGEIN_QUOTED) goop1->image = goop1->image.substr(1, std::max((int32_t)0, (int32_t)goop1->image.length() - 2)); if (goop2->kind == RANGEIN_QUOTED) goop2->image = goop2->image.substr(1, std::max((int32_t)0, (int32_t)goop2->image.length() - 2)); q = getRangeQuery(field, discardEscapeChar(goop1->image), discardEscapeChar(goop2->image), true); break; case RANGEEX_START: jj_consume_token(RANGEEX_START); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEEX_GOOP: goop1 = jj_consume_token(RANGEEX_GOOP); break; case RANGEEX_QUOTED: goop1 = jj_consume_token(RANGEEX_QUOTED); break; default: jj_la1[16] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEEX_TO: jj_consume_token(RANGEEX_TO); break; default: jj_la1[17] = jj_gen; } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEEX_GOOP: goop2 = jj_consume_token(RANGEEX_GOOP); break; case RANGEEX_QUOTED: goop2 = jj_consume_token(RANGEEX_QUOTED); break; default: jj_la1[18] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } jj_consume_token(RANGEEX_END); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); break; default: jj_la1[19] = jj_gen; } if (goop1->kind == RANGEEX_QUOTED) goop1->image = goop1->image.substr(1, std::max((int32_t)0, (int32_t)goop1->image.length() - 2)); if (goop2->kind == RANGEEX_QUOTED) goop2->image = goop2->image.substr(1, std::max((int32_t)0, (int32_t)goop2->image.length() - 2)); q = getRangeQuery(field, discardEscapeChar(goop1->image), discardEscapeChar(goop2->image), false); break; case QUOTED: term = jj_consume_token(QUOTED); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case FUZZY_SLOP: fuzzySlop = jj_consume_token(FUZZY_SLOP); break; default: jj_la1[20] = jj_gen; } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); break; default: jj_la1[21] = jj_gen; } { int32_t s = phraseSlop; if (fuzzySlop) { try { s = StringUtils::toInt(fuzzySlop->image.substr(1)); } catch (...) { } } q = getFieldQuery(field, discardEscapeChar(term->image.substr(1, std::max((int32_t)0, (int32_t)term->image.length() - 2))), s); } break; default: jj_la1[22] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } if (boost) { double f = 1.0; try { f = StringUtils::toDouble(boost->image); } catch (...) { } // avoid boosting null queries, such as those caused by stop words if (q) q->setBoost(f); } return q; } bool QueryParser::jj_2_1(int32_t xla) { jj_la = xla; jj_scanpos = token; jj_lastpos = jj_scanpos; bool _jj_2_1 = false; LuceneException finally; try { _jj_2_1 = !jj_3_1(); } catch (LookaheadSuccess&) { _jj_2_1 = true; } catch (LuceneException& e) { finally = e; } jj_save(0, xla); finally.throwException(); return _jj_2_1; } bool QueryParser::jj_3R_2() { if (jj_scan_token(TERM)) return true; if (jj_scan_token(COLON)) return true; return false; } bool QueryParser::jj_3_1() { QueryParserTokenPtr xsp(jj_scanpos); if (jj_3R_2()) { jj_scanpos = xsp; if (jj_3R_3()) return true; } return false; } bool QueryParser::jj_3R_3() { if (jj_scan_token(STAR)) return true; if (jj_scan_token(COLON)) return true; return false; } void QueryParser::ReInit(QueryParserCharStreamPtr stream) { token_source->ReInit(stream); token = newLucene(); _jj_ntk = -1; jj_gen = 0; for (int32_t i = 0; i < 23; ++i) jj_la1[i] = -1; for (int32_t i = 0; i < jj_2_rtns.size(); ++i) jj_2_rtns[i] = newInstance(); } void QueryParser::ReInit(QueryParserTokenManagerPtr tokenMgr) { token_source = tokenMgr; token = newLucene(); _jj_ntk = -1; jj_gen = 0; for (int32_t i = 0; i < 23; ++i) jj_la1[i] = -1; for (int32_t i = 0; i < jj_2_rtns.size(); ++i) jj_2_rtns[i] = newInstance(); } QueryParserTokenPtr QueryParser::jj_consume_token(int32_t kind) { QueryParserTokenPtr oldToken(token); if (oldToken->next) token = token->next; else { token->next = token_source->getNextToken(); token = token->next; } _jj_ntk = -1; if (token->kind == kind) { ++jj_gen; if (++jj_gc > 100) { jj_gc = 0; for (int32_t i = 0; i < jj_2_rtns.size(); ++i) { JJCallsPtr c(jj_2_rtns[i]); while (c) { if (c->gen < jj_gen) c->first.reset(); c = c->next; } } } return token; } token = oldToken; jj_kind = kind; generateParseException(); return QueryParserTokenPtr(); } bool QueryParser::jj_scan_token(int32_t kind) { if (jj_scanpos == jj_lastpos) { --jj_la; if (!jj_scanpos->next) { jj_scanpos->next = token_source->getNextToken(); jj_scanpos = jj_scanpos->next; jj_lastpos = jj_scanpos; } else { jj_scanpos = jj_scanpos->next; jj_lastpos = jj_scanpos; } } else jj_scanpos = jj_scanpos->next; if (jj_rescan) { int32_t i = 0; QueryParserTokenPtr tok(token); while (tok && tok != jj_scanpos) { ++i; tok = tok->next; } if (tok) jj_add_error_token(kind, i); } if (jj_scanpos->kind != kind) return true; if (jj_la == 0 && jj_scanpos == jj_lastpos) boost::throw_exception(LookaheadSuccess()); return false; } QueryParserTokenPtr QueryParser::getNextToken() { if (token->next) token = token->next; else { token->next = token_source->getNextToken(); token = token->next; } _jj_ntk = -1; ++jj_gen; return token; } QueryParserTokenPtr QueryParser::getToken(int32_t index) { QueryParserTokenPtr t(token); for (int32_t i = 0; i < index; ++i) { if (t->next) t = t->next; else { t->next = token_source->getNextToken(); t = t->next; } } return t; } int32_t QueryParser::jj_ntk() { jj_nt = token->next; if (!jj_nt) { token->next = token_source->getNextToken(); _jj_ntk = token->next->kind; return _jj_ntk; } else { _jj_ntk = jj_nt->kind; return _jj_ntk; } } void QueryParser::jj_add_error_token(int32_t kind, int32_t pos) { if (pos >= 100) return; if (pos == jj_endpos + 1) jj_lasttokens[jj_endpos++] = kind; else if (jj_endpos != 0) { jj_expentry = Collection::newInstance(jj_endpos); for (int32_t i = 0; i < jj_endpos; ++i) jj_expentry[i] = jj_lasttokens[i]; for (Collection< Collection >::iterator oldentry = jj_expentries.begin(); oldentry != jj_expentries.end(); ++oldentry) { if (oldentry->size() == jj_expentry.size()) { bool jj_entries_loop = true; for (int32_t i = 0; i < jj_expentry.size(); ++i) { if ((*oldentry)[i] != jj_expentry[i]) { jj_entries_loop = false; break; } } if (!jj_entries_loop) continue; jj_expentries.add(jj_expentry); break; } } if (pos != 0) { jj_endpos = pos; jj_lasttokens[jj_endpos - 1] = kind; } } } void QueryParser::generateParseException() { jj_expentries.clear(); Collection la1tokens(Collection::newInstance(34)); if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; } for (int32_t i = 0; i < 23; ++i) { if (jj_la1[i] == jj_gen) { for (int32_t j = 0; j < 32; ++j) { if ((jj_la1_0[i] & (1 << j)) != 0) la1tokens[j] = true; if ((jj_la1_1[i] & (1 << j)) != 0) la1tokens[32 + j] = true; } } } for (int32_t i = 0; i < 34; ++i) { if (la1tokens[i]) { jj_expentry = Collection::newInstance(1); jj_expentry[0] = i; jj_expentries.add(jj_expentry); } } jj_endpos = 0; jj_rescan_token(); jj_add_error_token(0, 0); Collection< Collection > exptokseq(Collection< Collection >::newInstance(jj_expentries.size())); for (int32_t i = 0; i < jj_expentries.size(); ++i) exptokseq[i] = jj_expentries[i]; boost::throw_exception(QueryParserError(QueryParseError::parseError(token, exptokseq, tokenImage))); } void QueryParser::enable_tracing() { } void QueryParser::disable_tracing() { } void QueryParser::jj_rescan_token() { jj_rescan = true; for (int32_t i = 0; i < 1; ++i) { try { JJCallsPtr p(jj_2_rtns[i]); do { if (p->gen > jj_gen) { jj_la = p->arg; jj_scanpos = p->first; jj_lastpos = jj_scanpos; jj_3_1(); } p = p->next; } while (p); } catch (LookaheadSuccess&) { } } jj_rescan = false; } void QueryParser::jj_save(int32_t index, int32_t xla) { JJCallsPtr p(jj_2_rtns[index]); while (p->gen > jj_gen) { if (!p->next) { p->next = newInstance(); p = p->next; break; } p = p->next; } p->gen = jj_gen + xla - jj_la; p->first = token; p->arg = xla; } } LucenePlusPlus-rel_3.0.4/src/core/queryparser/QueryParserCharStream.cpp000066400000000000000000000035621217574114600263620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParserCharStream.h" namespace Lucene { wchar_t QueryParserCharStream::readChar() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getColumn() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getLine() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getEndColumn() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getEndLine() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getBeginColumn() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getBeginLine() { BOOST_ASSERT(false); return 0; // override } void QueryParserCharStream::backup(int32_t amount) { BOOST_ASSERT(false); // override } wchar_t QueryParserCharStream::BeginToken() { BOOST_ASSERT(false); return 0; // override } String QueryParserCharStream::GetImage() { BOOST_ASSERT(false); return L""; // override } CharArray QueryParserCharStream::GetSuffix(int32_t length) { BOOST_ASSERT(false); return CharArray(); // override } void QueryParserCharStream::Done() { BOOST_ASSERT(false); // override } } LucenePlusPlus-rel_3.0.4/src/core/queryparser/QueryParserConstants.cpp000066400000000000000000000027421217574114600263040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParserConstants.h" namespace Lucene { const wchar_t* QueryParserConstants::_tokenImage[] = { L"", L"<_NUM_CHAR>", L"<_ESCAPED_CHAR>", L"<_TERM_START_CHAR>", L"<_TERM_CHAR>", L"<_WHITESPACE>", L"<_QUOTED_CHAR>", L"", L"", L"", L"", L"\"+\"", L"\"-\"", L"\"(\"", L"\")\"", L"\":\"", L"\"*\"", L"\"^\"", L"", L"", L"", L"", L"", L"\"[\"", L"\"{\"", L"", L"\"TO\"", L"\"]\"", L"", L"", L"\"TO\"", L"\"}\"", L"", L"" }; Collection QueryParserConstants::tokenImage = Collection::newInstance(_tokenImage, _tokenImage + SIZEOF_ARRAY(_tokenImage)); QueryParserConstants::QueryParserConstants() { } QueryParserConstants::~QueryParserConstants() { } } LucenePlusPlus-rel_3.0.4/src/core/queryparser/QueryParserToken.cpp000066400000000000000000000017161217574114600254100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParserToken.h" namespace Lucene { QueryParserToken::QueryParserToken(int32_t kind, const String& image) { this->kind = kind; this->image = image; this->beginLine = 0; this->beginColumn = 0; this->endLine = 0; this->endColumn = 0; } QueryParserToken::~QueryParserToken() { } String QueryParserToken::toString() { return image; } QueryParserTokenPtr QueryParserToken::newToken(int32_t ofKind, const String& image) { return newLucene(ofKind, image); } } LucenePlusPlus-rel_3.0.4/src/core/queryparser/QueryParserTokenManager.cpp000066400000000000000000001350651217574114600267100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParserTokenManager.h" #include "QueryParserCharStream.h" #include "QueryParserToken.h" #include "QueryParseError.h" #include "InfoStream.h" #include "StringUtils.h" namespace Lucene { const int64_t QueryParserTokenManager::jjbitVec0[] = {0x1LL, 0x0LL, 0x0LL, 0x0LL}; const int64_t QueryParserTokenManager::jjbitVec1[] = {0xfffffffffffffffeLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL}; const int64_t QueryParserTokenManager::jjbitVec3[] = {0x0LL, 0x0LL, 0xffffffffffffffffLL, 0xffffffffffffffffLL}; const int64_t QueryParserTokenManager::jjbitVec4[] = {0xfffefffffffffffeLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL}; const int32_t QueryParserTokenManager::jjnextStates[] = {15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27, 2, 4, 5, 0, 1}; /// Token literal values. const wchar_t* QueryParserTokenManager::jjstrLiteralImages[] = { L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"\53", L"\55", L"\50", L"\51", L"\72", L"\52", L"\136", L"", L"", L"", L"", L"", L"\133", L"\173", L"", L"\124\117", L"\135", L"", L"", L"\124\117", L"\175", L"", L"" }; /// Lexer state names. const wchar_t* QueryParserTokenManager::lexStateNames[] = { L"Boost", L"RangeEx", L"RangeIn", L"DEFAULT" }; /// Lex State array. const int32_t QueryParserTokenManager::jjnewLexState[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1, 3, -1, -1, -1, 3, -1, -1 }; const int64_t QueryParserTokenManager::jjtoToken[] = {0x3ffffff01LL}; const int64_t QueryParserTokenManager::jjtoSkip[] = {0x80LL}; QueryParserTokenManager::QueryParserTokenManager(QueryParserCharStreamPtr stream) { debugStream = newLucene(); jjrounds = IntArray::newInstance(36); jjstateSet = IntArray::newInstance(72); curChar = 0; curLexState = 3; defaultLexState = 3; jjnewStateCnt = 0; jjround = 0; jjmatchedPos = 0; jjmatchedKind = 0; input_stream = stream; } QueryParserTokenManager::QueryParserTokenManager(QueryParserCharStreamPtr stream, int32_t lexState) { debugStream = newLucene(); jjrounds = IntArray::newInstance(36); jjstateSet = IntArray::newInstance(72); input_stream = stream; curChar = 0; curLexState = 3; defaultLexState = 3; jjnewStateCnt = 0; jjround = 0; jjmatchedPos = 0; jjmatchedKind = 0; SwitchTo(lexState); } QueryParserTokenManager::~QueryParserTokenManager() { } void QueryParserTokenManager::setDebugStream(InfoStreamPtr debugStream) { this->debugStream = debugStream; } int32_t QueryParserTokenManager::jjStopStringLiteralDfa_3(int32_t pos, int64_t active0) { return -1; } int32_t QueryParserTokenManager::jjStartNfa_3(int32_t pos, int64_t active0) { return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1); } int32_t QueryParserTokenManager::jjStopAtPos(int32_t pos, int32_t kind) { jjmatchedKind = kind; jjmatchedPos = pos; return pos + 1; } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_3() { switch (curChar) { case 40: return jjStopAtPos(0, 13); case 41: return jjStopAtPos(0, 14); case 42: return jjStartNfaWithStates_3(0, 16, 36); case 43: return jjStopAtPos(0, 11); case 45: return jjStopAtPos(0, 12); case 58: return jjStopAtPos(0, 15); case 91: return jjStopAtPos(0, 23); case 94: return jjStopAtPos(0, 17); case 123: return jjStopAtPos(0, 24); default: return jjMoveNfa_3(0, 0); } } int32_t QueryParserTokenManager::jjStartNfaWithStates_3(int32_t pos, int32_t kind, int32_t state) { jjmatchedKind = kind; jjmatchedPos = pos; try { curChar = input_stream->readChar(); } catch (IOException&) { return pos + 1; } return jjMoveNfa_3(state, pos + 1); } int32_t QueryParserTokenManager::jjMoveNfa_3(int32_t startState, int32_t curPos) { int32_t startsAt = 0; jjnewStateCnt = 36; int32_t i = 1; jjstateSet[0] = startState; int32_t kind = 0x7fffffff; while (true) { if (++jjround == 0x7fffffff) ReInitRounds(); if (curChar < 64) { int64_t l = (int64_t)1 << curChar; do { switch (jjstateSet[--i]) { case 36: case 25: if ((0xfbfffcf8ffffd9ffLL & l) == 0) break; if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); break; case 0: if ((0xfbffd4f8ffffd9ffLL & l) != 0) { if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); } else if ((0x100002600LL & l) != 0) { if (kind > 7) kind = 7; } else if (curChar == 34) jjCheckNAddStates(0, 2); else if (curChar == 33) { if (kind > 10) kind = 10; } if ((0x7bffd0f8ffffd9ffLL & l) != 0) { if (kind > 19) kind = 19; jjCheckNAddStates(3, 7); } else if (curChar == 42) { if (kind > 21) kind = 21; } if (curChar == 38) jjstateSet[jjnewStateCnt++] = 4; break; case 4: if (curChar == 38 && kind > 8) kind = 8; break; case 5: if (curChar == 38) jjstateSet[jjnewStateCnt++] = 4; break; case 13: if (curChar == 33 && kind > 10) kind = 10; break; case 14: if (curChar == 34) jjCheckNAddStates(0, 2); break; case 15: if ((0xfffffffbffffffffLL & l) != 0) jjCheckNAddStates(0, 2); break; case 17: jjCheckNAddStates(0, 2); break; case 18: if (curChar == 34 && kind > 18) kind = 18; break; case 20: if ((0x3ff000000000000LL & l) == 0) break; if (kind > 20) kind = 20; jjAddStates(8, 9); break; case 21: if (curChar == 46) jjCheckNAdd(22); break; case 22: if ((0x3ff000000000000LL & l) == 0) break; if (kind > 20) kind = 20; jjCheckNAdd(22); break; case 23: if (curChar == 42 && kind > 21) kind = 21; break; case 24: if ((0xfbffd4f8ffffd9ffLL & l) == 0) break; if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); break; case 27: if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); break; case 28: if ((0x7bffd0f8ffffd9ffLL & l) == 0) break; if (kind > 19) kind = 19; jjCheckNAddStates(3, 7); break; case 29: if ((0x7bfff8f8ffffd9ffLL & l) == 0) break; if (kind > 19) kind = 19; jjCheckNAddTwoStates(29, 30); break; case 31: if (kind > 19) kind = 19; jjCheckNAddTwoStates(29, 30); break; case 32: if ((0x7bfff8f8ffffd9ffLL & l) != 0) jjCheckNAddStates(10, 12); break; case 34: jjCheckNAddStates(10, 12); break; default: break; } } while (i != startsAt); } else if (curChar < 128) { int64_t l = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 36: if ((0x97ffffff87ffffffLL & l) != 0) { if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); } else if (curChar == 92) jjCheckNAddTwoStates(27, 27); break; case 0: if ((0x97ffffff87ffffffLL & l) != 0) { if (kind > 19) kind = 19; jjCheckNAddStates(3, 7); } else if (curChar == 92) jjCheckNAddStates(13, 15); else if (curChar == 126) { if (kind > 20) kind = 20; jjstateSet[jjnewStateCnt++] = 20; } if ((0x97ffffff87ffffffLL & l) != 0) { if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); } if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; else if (curChar == 124) jjstateSet[jjnewStateCnt++] = 8; else if (curChar == 79) jjstateSet[jjnewStateCnt++] = 6; else if (curChar == 65) jjstateSet[jjnewStateCnt++] = 2; break; case 1: if (curChar == 68 && kind > 8) kind = 8; break; case 2: if (curChar == 78) jjstateSet[jjnewStateCnt++] = 1; break; case 3: if (curChar == 65) jjstateSet[jjnewStateCnt++] = 2; break; case 6: if (curChar == 82 && kind > 9) kind = 9; break; case 7: if (curChar == 79) jjstateSet[jjnewStateCnt++] = 6; break; case 8: if (curChar == 124 && kind > 9) kind = 9; break; case 9: if (curChar == 124) jjstateSet[jjnewStateCnt++] = 8; break; case 10: if (curChar == 84 && kind > 10) kind = 10; break; case 11: if (curChar == 79) jjstateSet[jjnewStateCnt++] = 10; break; case 12: if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; break; case 15: if ((0xffffffffefffffffLL & l) != 0) jjCheckNAddStates(0, 2); break; case 16: if (curChar == 92) jjstateSet[jjnewStateCnt++] = 17; break; case 17: jjCheckNAddStates(0, 2); break; case 19: if (curChar != 126) break; if (kind > 20) kind = 20; jjstateSet[jjnewStateCnt++] = 20; break; case 24: if ((0x97ffffff87ffffffLL & l) == 0) break; if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); break; case 25: if ((0x97ffffff87ffffffLL & l) == 0) break; if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); break; case 26: if (curChar == 92) jjCheckNAddTwoStates(27, 27); break; case 27: if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); break; case 28: if ((0x97ffffff87ffffffLL & l) == 0) break; if (kind > 19) kind = 19; jjCheckNAddStates(3, 7); break; case 29: if ((0x97ffffff87ffffffLL & l) == 0) break; if (kind > 19) kind = 19; jjCheckNAddTwoStates(29, 30); break; case 30: if (curChar == 92) jjCheckNAddTwoStates(31, 31); break; case 31: if (kind > 19) kind = 19; jjCheckNAddTwoStates(29, 30); break; case 32: if ((0x97ffffff87ffffffLL & l) != 0) jjCheckNAddStates(10, 12); break; case 33: if (curChar == 92) jjCheckNAddTwoStates(34, 34); break; case 34: jjCheckNAddStates(10, 12); break; case 35: if (curChar == 92) jjCheckNAddStates(13, 15); break; default: break; } } while (i != startsAt); } else { int32_t hiByte = (int32_t)(curChar >> 8); int32_t i1 = hiByte >> 6; int64_t l1 = (int64_t)1 << (hiByte & 077); int32_t i2 = (curChar & 0xff) >> 6; int64_t l2 = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 36: case 25: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); break; case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { if (kind > 7) kind = 7; } if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); } if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { if (kind > 19) kind = 19; jjCheckNAddStates(3, 7); } break; case 15: case 17: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) jjCheckNAddStates(0, 2); break; case 24: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); break; case 27: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; if (kind > 22) kind = 22; jjCheckNAddTwoStates(25, 26); break; case 28: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 19) kind = 19; jjCheckNAddStates(3, 7); break; case 29: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 19) kind = 19; jjCheckNAddTwoStates(29, 30); break; case 31: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; if (kind > 19) kind = 19; jjCheckNAddTwoStates(29, 30); break; case 32: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) jjCheckNAddStates(10, 12); break; case 34: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) jjCheckNAddStates(10, 12); break; default: break; } } while (i != startsAt); } if (kind != 0x7fffffff) { jjmatchedKind = kind; jjmatchedPos = curPos; kind = 0x7fffffff; } ++curPos; if ((i = jjnewStateCnt) == (startsAt = 36 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream->readChar(); } catch (IOException&) { return curPos; } } } int32_t QueryParserTokenManager::jjStopStringLiteralDfa_1(int32_t pos, int64_t active0) { switch (pos) { case 0: if ((active0 & 0x40000000LL) != 0) { jjmatchedKind = 33; return 6; } return -1; default: return -1; } } int32_t QueryParserTokenManager::jjStartNfa_1(int32_t pos, int64_t active0) { return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_1() { switch (curChar) { case 84: return jjMoveStringLiteralDfa1_1(0x40000000LL); case 125: return jjStopAtPos(0, 31); default: return jjMoveNfa_1(0, 0); } } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa1_1(int64_t active0) { try { curChar = input_stream->readChar(); } catch (IOException&) { jjStopStringLiteralDfa_1(0, active0); return 1; } switch (curChar) { case 79: if ((active0 & 0x40000000LL) != 0) return jjStartNfaWithStates_1(1, 30, 6); break; default: break; } return jjStartNfa_1(0, active0); } int32_t QueryParserTokenManager::jjStartNfaWithStates_1(int32_t pos, int32_t kind, int32_t state) { jjmatchedKind = kind; jjmatchedPos = pos; try { curChar = input_stream->readChar(); } catch (IOException&) { return pos + 1; } return jjMoveNfa_1(state, pos + 1); } int32_t QueryParserTokenManager::jjMoveNfa_1(int32_t startState, int32_t curPos) { int32_t startsAt = 0; jjnewStateCnt = 7; int32_t i = 1; jjstateSet[0] = startState; int32_t kind = 0x7fffffff; while (true) { if (++jjround == 0x7fffffff) ReInitRounds(); if (curChar < 64) { int64_t l = (int64_t)1 << curChar; do { switch (jjstateSet[--i]) { case 0: if ((0xfffffffeffffffffLL & l) != 0) { if (kind > 33) kind = 33; jjCheckNAdd(6); } if ((0x100002600LL & l) != 0) { if (kind > 7) kind = 7; } else if (curChar == 34) jjCheckNAddTwoStates(2, 4); break; case 1: if (curChar == 34) jjCheckNAddTwoStates(2, 4); break; case 2: if ((0xfffffffbffffffffLL & l) != 0) jjCheckNAddStates(16, 18); break; case 3: if (curChar == 34) jjCheckNAddStates(16, 18); break; case 5: if (curChar == 34 && kind > 32) kind = 32; break; case 6: if ((0xfffffffeffffffffLL & l) == 0) break; if (kind > 33) kind = 33; jjCheckNAdd(6); break; default: break; } } while (i != startsAt); } else if (curChar < 128) { int64_t l = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 0: case 6: if ((0xdfffffffffffffffLL & l) == 0) break; if (kind > 33) kind = 33; jjCheckNAdd(6); break; case 2: jjAddStates(16, 18); break; case 4: if (curChar == 92) jjstateSet[jjnewStateCnt++] = 3; break; default: break; } } while (i != startsAt); } else { int32_t hiByte = (int32_t)(curChar >> 8); int32_t i1 = hiByte >> 6; int64_t l1 = (int64_t)1 << (hiByte & 077); int32_t i2 = (curChar & 0xff) >> 6; int64_t l2 = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { if (kind > 7) kind = 7; } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { if (kind > 33) kind = 33; jjCheckNAdd(6); } break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) jjAddStates(16, 18); break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; if (kind > 33) kind = 33; jjCheckNAdd(6); break; default: break; } } while (i != startsAt); } if (kind != 0x7fffffff) { jjmatchedKind = kind; jjmatchedPos = curPos; kind = 0x7fffffff; } ++curPos; if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream->readChar(); } catch (IOException&) { return curPos; } } } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_0() { return jjMoveNfa_0(0, 0); } int32_t QueryParserTokenManager::jjMoveNfa_0(int32_t startState, int32_t curPos) { int32_t startsAt = 0; jjnewStateCnt = 3; int32_t i = 1; jjstateSet[0] = startState; int32_t kind = 0x7fffffff; while (true) { if (++jjround == 0x7fffffff) ReInitRounds(); if (curChar < 64) { int64_t l = (int64_t)1 << curChar; do { switch (jjstateSet[--i]) { case 0: if ((0x3ff000000000000LL & l) == 0) break; if (kind > 25) kind = 25; jjAddStates(19, 20); break; case 1: if (curChar == 46) jjCheckNAdd(2); break; case 2: if ((0x3ff000000000000LL & l) == 0) break; if (kind > 25) kind = 25; jjCheckNAdd(2); break; default: break; } } while (i != startsAt); } else if (curChar < 128) { int64_t l = (int64_t)1 << (curChar & 077); do { jjstateSet[--i]; } while (i != startsAt); } else { int32_t hiByte = (int32_t)(curChar >> 8); int32_t i1 = hiByte >> 6; int64_t l1 = (int64_t)1 << (hiByte & 077); int32_t i2 = (curChar & 0xff) >> 6; int64_t l2 = (int64_t)1 << (curChar & 077); do { jjstateSet[--i]; } while (i != startsAt); } if (kind != 0x7fffffff) { jjmatchedKind = kind; jjmatchedPos = curPos; kind = 0x7fffffff; } ++curPos; if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream->readChar(); } catch (IOException&) { return curPos; } } } int32_t QueryParserTokenManager::jjStopStringLiteralDfa_2(int32_t pos, int64_t active0) { switch (pos) { case 0: if ((active0 & 0x4000000LL) != 0) { jjmatchedKind = 29; return 6; } return -1; default: return -1; } } int32_t QueryParserTokenManager::jjStartNfa_2(int32_t pos, int64_t active0) { return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1); } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_2() { switch (curChar) { case 84: return jjMoveStringLiteralDfa1_2(0x4000000LL); case 93: return jjStopAtPos(0, 27); default: return jjMoveNfa_2(0, 0); } } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa1_2(int64_t active0) { try { curChar = input_stream->readChar(); } catch (IOException&) { jjStopStringLiteralDfa_2(0, active0); return 1; } switch (curChar) { case 79: if ((active0 & 0x4000000LL) != 0) return jjStartNfaWithStates_2(1, 26, 6); break; default: break; } return jjStartNfa_2(0, active0); } int32_t QueryParserTokenManager::jjStartNfaWithStates_2(int32_t pos, int32_t kind, int32_t state) { jjmatchedKind = kind; jjmatchedPos = pos; try { curChar = input_stream->readChar(); } catch (IOException&) { return pos + 1; } return jjMoveNfa_2(state, pos + 1); } int32_t QueryParserTokenManager::jjMoveNfa_2(int32_t startState, int32_t curPos) { int32_t startsAt = 0; jjnewStateCnt = 7; int32_t i = 1; jjstateSet[0] = startState; int32_t kind = 0x7fffffff; while (true) { if (++jjround == 0x7fffffff) ReInitRounds(); if (curChar < 64) { int64_t l = (int64_t)1 << curChar; do { switch (jjstateSet[--i]) { case 0: if ((0xfffffffeffffffffLL & l) != 0) { if (kind > 29) kind = 29; jjCheckNAdd(6); } if ((0x100002600LL & l) != 0) { if (kind > 7) kind = 7; } else if (curChar == 34) jjCheckNAddTwoStates(2, 4); break; case 1: if (curChar == 34) jjCheckNAddTwoStates(2, 4); break; case 2: if ((0xfffffffbffffffffLL & l) != 0) jjCheckNAddStates(16, 18); break; case 3: if (curChar == 34) jjCheckNAddStates(16, 18); break; case 5: if (curChar == 34 && kind > 28) kind = 28; break; case 6: if ((0xfffffffeffffffffLL & l) == 0) break; if (kind > 29) kind = 29; jjCheckNAdd(6); break; default: break; } } while (i != startsAt); } else if (curChar < 128) { int64_t l = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 0: case 6: if ((0xffffffffdfffffffLL & l) == 0) break; if (kind > 29) kind = 29; jjCheckNAdd(6); break; case 2: jjAddStates(16, 18); break; case 4: if (curChar == 92) jjstateSet[jjnewStateCnt++] = 3; break; default: break; } } while (i != startsAt); } else { int32_t hiByte = (int32_t)(curChar >> 8); int32_t i1 = hiByte >> 6; int64_t l1 = (int64_t)1 << (hiByte & 077); int32_t i2 = (curChar & 0xff) >> 6; int64_t l2 = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { if (kind > 7) kind = 7; } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { if (kind > 29) kind = 29; jjCheckNAdd(6); } break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) jjAddStates(16, 18); break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; if (kind > 29) kind = 29; jjCheckNAdd(6); break; default: break; } } while (i != startsAt); } if (kind != 0x7fffffff) { jjmatchedKind = kind; jjmatchedPos = curPos; kind = 0x7fffffff; } ++curPos; if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream->readChar(); } catch (IOException&) { return curPos; } } } bool QueryParserTokenManager::jjCanMove_0(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) { switch (hiByte) { case 48: return ((jjbitVec0[i2] & l2) != 0); default: return false; } } bool QueryParserTokenManager::jjCanMove_1(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) { switch (hiByte) { case 0: return ((jjbitVec3[i2] & l2) != 0); default: if ((jjbitVec1[i1] & l1) != 0) return true; return false; } } bool QueryParserTokenManager::jjCanMove_2(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) { switch (hiByte) { case 0: return ((jjbitVec3[i2] & l2) != 0); case 48: return ((jjbitVec1[i2] & l2) != 0); default: if ((jjbitVec4[i1] & l1) != 0) return true; return false; } } void QueryParserTokenManager::ReInit(QueryParserCharStreamPtr stream) { jjmatchedPos = 0; jjnewStateCnt = 0; curLexState = defaultLexState; input_stream = stream; ReInitRounds(); } void QueryParserTokenManager::ReInitRounds() { jjround = 0x80000001; for (int32_t i = 36; i-- > 0;) jjrounds[i] = 0x80000000; } void QueryParserTokenManager::ReInit(QueryParserCharStreamPtr stream, int32_t lexState) { ReInit(stream); SwitchTo(lexState); } void QueryParserTokenManager::SwitchTo(int32_t lexState) { if (lexState >= 4 || lexState < 0) { boost::throw_exception(QueryParserError(L"Error: Ignoring invalid lexical state : " + StringUtils::toString(lexState) + L". State unchanged.")); } else curLexState = lexState; } QueryParserTokenPtr QueryParserTokenManager::jjFillToken() { String im(jjstrLiteralImages[jjmatchedKind]); String curTokenImage(im.empty() ? input_stream->GetImage() : im); int32_t beginLine = input_stream->getBeginLine(); int32_t beginColumn = input_stream->getBeginColumn(); int32_t endLine = input_stream->getEndLine(); int32_t endColumn = input_stream->getEndColumn(); QueryParserTokenPtr t(QueryParserToken::newToken(jjmatchedKind, curTokenImage)); t->beginLine = beginLine; t->endLine = endLine; t->beginColumn = beginColumn; t->endColumn = endColumn; return t; } QueryParserTokenPtr QueryParserTokenManager::getNextToken() { QueryParserTokenPtr matchedToken; int32_t curPos = 0; while (true) { try { curChar = input_stream->BeginToken(); } catch (IOException&) { jjmatchedKind = 0; matchedToken = jjFillToken(); return matchedToken; } switch (curLexState) { case 0: jjmatchedKind = 0x7fffffff; jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_0(); break; case 1: jjmatchedKind = 0x7fffffff; jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_1(); break; case 2: jjmatchedKind = 0x7fffffff; jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_2(); break; case 3: jjmatchedKind = 0x7fffffff; jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_3(); break; } if (jjmatchedKind != 0x7fffffff) { if (jjmatchedPos + 1 < curPos) input_stream->backup(curPos - jjmatchedPos - 1); if ((jjtoToken[jjmatchedKind >> 6] & ((int64_t)1 << (jjmatchedKind & 077))) != 0) { matchedToken = jjFillToken(); if (jjnewLexState[jjmatchedKind] != -1) curLexState = jjnewLexState[jjmatchedKind]; return matchedToken; } else { if (jjnewLexState[jjmatchedKind] != -1) curLexState = jjnewLexState[jjmatchedKind]; continue; } } int32_t error_line = input_stream->getEndLine(); int32_t error_column = input_stream->getEndColumn(); String error_after; bool EOFSeen = false; try { input_stream->readChar(); input_stream->backup(1); } catch (IOException&) { EOFSeen = true; error_after = curPos <= 1 ? L"" : input_stream->GetImage(); if (curChar == L'\n' || curChar == L'\r') { ++error_line; error_column = 0; } else ++error_column; } if (!EOFSeen) { input_stream->backup(1); error_after = curPos <= 1 ? L"" : input_stream->GetImage(); } boost::throw_exception(QueryParserError(QueryParseError::lexicalError(EOFSeen, curLexState, error_line, error_column, error_after, curChar))); } } void QueryParserTokenManager::jjCheckNAdd(int32_t state) { if (jjrounds[state] != jjround) { jjstateSet[jjnewStateCnt++] = state; jjrounds[state] = jjround; } } void QueryParserTokenManager::jjAddStates(int32_t start, int32_t end) { do { jjstateSet[jjnewStateCnt++] = jjnextStates[start]; } while (start++ != end); } void QueryParserTokenManager::jjCheckNAddTwoStates(int32_t state1, int32_t state2) { jjCheckNAdd(state1); jjCheckNAdd(state2); } void QueryParserTokenManager::jjCheckNAddStates(int32_t start, int32_t end) { do { jjCheckNAdd(jjnextStates[start]); } while (start++ != end); } } LucenePlusPlus-rel_3.0.4/src/core/search/000077500000000000000000000000001217574114600202775ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/search/BooleanClause.cpp000066400000000000000000000036371217574114600235300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BooleanClause.h" #include "Query.h" namespace Lucene { BooleanClause::BooleanClause(QueryPtr query, Occur occur) { this->query = query; this->occur = occur; } BooleanClause::~BooleanClause() { } BooleanClause::Occur BooleanClause::getOccur() { return occur; } void BooleanClause::setOccur(BooleanClause::Occur occur) { this->occur = occur; } QueryPtr BooleanClause::getQuery() { return query; } void BooleanClause::setQuery(QueryPtr query) { this->query = query; } bool BooleanClause::isProhibited() { return (occur == MUST_NOT); } bool BooleanClause::isRequired() { return (occur == MUST); } bool BooleanClause::equals(LuceneObjectPtr other) { BooleanClausePtr otherBooleanClause(boost::dynamic_pointer_cast(other)); if (!otherBooleanClause) return false; return (this->query->equals(otherBooleanClause->query) && this->occur == otherBooleanClause->occur); } int32_t BooleanClause::hashCode() { return query->hashCode() ^ (occur == MUST ? 1 : 0) ^ (occur == MUST_NOT ? 2 : 0); } String BooleanClause::toString() { switch (occur) { case MUST: return L"+" + query->toString(); case MUST_NOT: return L"-" + query->toString(); default: return query->toString(); } } } LucenePlusPlus-rel_3.0.4/src/core/search/BooleanQuery.cpp000066400000000000000000000343741217574114600234230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BooleanQuery.h" #include "_BooleanQuery.h" #include "BooleanScorer.h" #include "BooleanScorer2.h" #include "ComplexExplanation.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { int32_t BooleanQuery::maxClauseCount = 1024; BooleanQuery::BooleanQuery(bool disableCoord) { this->disableCoord = disableCoord; this->clauses = Collection::newInstance(); this->minNrShouldMatch = 0; } BooleanQuery::~BooleanQuery() { } int32_t BooleanQuery::getMaxClauseCount() { return maxClauseCount; } void BooleanQuery::setMaxClauseCount(int32_t maxClauseCount) { if (maxClauseCount < 1) boost::throw_exception(IllegalArgumentException(L"maxClauseCount must be >= 1")); BooleanQuery::maxClauseCount = maxClauseCount; } bool BooleanQuery::isCoordDisabled() { return disableCoord; } SimilarityPtr BooleanQuery::getSimilarity(SearcherPtr searcher) { SimilarityPtr result(Query::getSimilarity(searcher)); if (disableCoord) // disable coord as requested result = newLucene(result); return result; } void BooleanQuery::setMinimumNumberShouldMatch(int32_t min) { this->minNrShouldMatch = min; } int32_t BooleanQuery::getMinimumNumberShouldMatch() { return minNrShouldMatch; } void BooleanQuery::add(QueryPtr query, BooleanClause::Occur occur) { add(newLucene(query, occur)); } void BooleanQuery::add(BooleanClausePtr clause) { if (clauses.size() >= maxClauseCount) boost::throw_exception(TooManyClausesException(L"maxClauseCount is set to " + StringUtils::toString(maxClauseCount))); clauses.add(clause); } Collection BooleanQuery::getClauses() { return clauses; } Collection::iterator BooleanQuery::begin() { return clauses.begin(); } Collection::iterator BooleanQuery::end() { return clauses.end(); } WeightPtr BooleanQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } QueryPtr BooleanQuery::rewrite(IndexReaderPtr reader) { if (minNrShouldMatch == 0 && clauses.size() == 1) // optimize 1-clause queries { BooleanClausePtr c(clauses[0]); if (!c->isProhibited()) // just return clause { QueryPtr query(c->getQuery()->rewrite(reader)); // rewrite first if (getBoost() != 1.0) // incorporate boost { if (query == c->getQuery()) // if rewrite was no-op query = boost::dynamic_pointer_cast(query->clone()); // then clone before boost query->setBoost(getBoost() * query->getBoost()); } return query; } } BooleanQueryPtr clone; // recursively rewrite for (int32_t i = 0; i < clauses.size(); ++i) { BooleanClausePtr c(clauses[i]); QueryPtr query(c->getQuery()->rewrite(reader)); if (query != c->getQuery()) // clause rewrote: must clone { if (!clone) clone = boost::dynamic_pointer_cast(this->clone()); clone->clauses[i] = newLucene(query, c->getOccur()); } } if (clone) return clone; // some clauses rewrote else return shared_from_this(); // no clauses rewrote } void BooleanQuery::extractTerms(SetTerm terms) { for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) (*clause)->getQuery()->extractTerms(terms); } LuceneObjectPtr BooleanQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = Query::clone(other ? other : newLucene()); BooleanQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->disableCoord = disableCoord; cloneQuery->minNrShouldMatch = minNrShouldMatch; cloneQuery->clauses = Collection::newInstance(clauses.begin(), clauses.end()); return cloneQuery; } String BooleanQuery::toString(const String& field) { String buffer; bool needParens = (getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0); if (needParens) buffer += L"("; for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (clause != clauses.begin()) buffer += L" "; if ((*clause)->isProhibited()) buffer += L"-"; else if ((*clause)->isRequired()) buffer += L"+"; QueryPtr subQuery((*clause)->getQuery()); if (subQuery) { if (boost::dynamic_pointer_cast(subQuery)) // wrap sub-bools in parens { buffer += L"("; buffer += subQuery->toString(field); buffer += L")"; } else buffer += subQuery->toString(field); } else buffer += L"null"; } if (needParens) buffer += L")"; if (getMinimumNumberShouldMatch() > 0) { buffer += L"~"; buffer += StringUtils::toString(getMinimumNumberShouldMatch()); } if (getBoost() != 1.0) buffer += boostString(); return buffer; } bool BooleanQuery::equals(LuceneObjectPtr other) { BooleanQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; return (getBoost() == otherQuery->getBoost() && clauses.equals(otherQuery->clauses, luceneEquals()) && getMinimumNumberShouldMatch() == otherQuery->getMinimumNumberShouldMatch() && disableCoord == otherQuery->disableCoord); } int32_t BooleanQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene) + getMinimumNumberShouldMatch() + (disableCoord ? 17 : 0); } BooleanWeight::BooleanWeight(BooleanQueryPtr query, SearcherPtr searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); weights = Collection::newInstance(); for (Collection::iterator clause = query->clauses.begin(); clause != query->clauses.end(); ++clause) weights.add((*clause)->getQuery()->createWeight(searcher)); } BooleanWeight::~BooleanWeight() { } QueryPtr BooleanWeight::getQuery() { return query; } double BooleanWeight::getValue() { return query->getBoost(); } double BooleanWeight::sumOfSquaredWeights() { double sum = 0.0; for (int32_t i = 0; i < weights.size(); ++i) { // call sumOfSquaredWeights for all clauses in case of side effects double s = weights[i]->sumOfSquaredWeights(); // sum sub weights if (!query->clauses[i]->isProhibited()) { // only add to sum for non-prohibited clauses sum += s; } } sum *= query->getBoost() * query->getBoost(); // boost each sub-weight return sum; } void BooleanWeight::normalize(double norm) { norm *= query->getBoost(); // incorporate boost for (Collection::iterator w = weights.begin(); w != weights.end(); ++w) { // normalize all clauses, (even if prohibited in case of side affects) (*w)->normalize(norm); } } ExplanationPtr BooleanWeight::explain(IndexReaderPtr reader, int32_t doc) { int32_t minShouldMatch = query->getMinimumNumberShouldMatch(); ComplexExplanationPtr sumExpl(newLucene()); sumExpl->setDescription(L"sum of:"); int32_t coord = 0; int32_t maxCoord = 0; double sum = 0.0; bool fail = false; int32_t shouldMatchCount = 0; Collection::iterator c = query->clauses.begin(); for (Collection::iterator w = weights.begin(); w != weights.end(); ++w, ++c) { if (!(*w)->scorer(reader, true, true)) continue; ExplanationPtr e((*w)->explain(reader, doc)); if (!(*c)->isProhibited()) ++maxCoord; if (e->isMatch()) { if (!(*c)->isProhibited()) { sumExpl->addDetail(e); sum += e->getValue(); ++coord; } else { ExplanationPtr r(newLucene(0.0, L"match on prohibited clause (" + (*c)->getQuery()->toString() + L")")); r->addDetail(e); sumExpl->addDetail(r); fail = true; } if ((*c)->getOccur() == BooleanClause::SHOULD) ++shouldMatchCount; } else if ((*c)->isRequired()) { ExplanationPtr r(newLucene(0.0, L"no match on required clause (" + (*c)->getQuery()->toString() + L")")); r->addDetail(e); sumExpl->addDetail(r); fail = true; } } if (fail) { sumExpl->setMatch(false); sumExpl->setValue(0.0); sumExpl->setDescription(L"Failure to meet condition(s) of required/prohibited clause(s)"); return sumExpl; } else if (shouldMatchCount < minShouldMatch) { sumExpl->setMatch(false); sumExpl->setValue(0.0); sumExpl->setDescription(L"Failure to match minimum number of optional clauses: " + StringUtils::toString(minShouldMatch)); return sumExpl; } sumExpl->setMatch(0 < coord); sumExpl->setValue(sum); double coordFactor = similarity->coord(coord, maxCoord); if (coordFactor == 1.0) // coord is no-op return sumExpl; // eliminate wrapper else { ComplexExplanationPtr result(newLucene(sumExpl->isMatch(), sum * coordFactor, L"product of:")); result->addDetail(sumExpl); result->addDetail(newLucene(coordFactor, L"coord(" + StringUtils::toString(coord) + L"/" + StringUtils::toString(maxCoord) + L")")); return result; } } ScorerPtr BooleanWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { Collection required(Collection::newInstance()); Collection prohibited(Collection::newInstance()); Collection optional(Collection::newInstance()); Collection::iterator c = query->clauses.begin(); for (Collection::iterator w = weights.begin(); w != weights.end(); ++w, ++c) { ScorerPtr subScorer((*w)->scorer(reader, true, false)); if (!subScorer) { if ((*c)->isRequired()) return ScorerPtr(); } else if ((*c)->isRequired()) required.add(subScorer); else if ((*c)->isProhibited()) prohibited.add(subScorer); else optional.add(subScorer); } // Check if we can return a BooleanScorer if (!scoreDocsInOrder && topScorer && required.empty() && prohibited.size() < 32) return newLucene(similarity, query->minNrShouldMatch, optional, prohibited); if (required.empty() && optional.empty()) { // no required and optional clauses. return ScorerPtr(); } else if (optional.size() < query->minNrShouldMatch) { // either >1 req scorer, or there are 0 req scorers and at least 1 optional scorer. Therefore if there // are not enough optional scorers no documents will be matched by the query return ScorerPtr(); } // Return a BooleanScorer2 return newLucene(similarity, query->minNrShouldMatch, required, prohibited, optional); } bool BooleanWeight::scoresDocsOutOfOrder() { int32_t numProhibited = 0; for (Collection::iterator c = query->clauses.begin(); c != query->clauses.end(); ++c) { if ((*c)->isRequired()) return false; // BS2 (in-order) will be used by scorer() else if ((*c)->isProhibited()) ++numProhibited; } if (numProhibited > 32) // cannot use BS return false; // scorer() will return an out-of-order scorer if requested. return true; } SimilarityDisableCoord::SimilarityDisableCoord(SimilarityPtr delegee) : SimilarityDelegator(delegee) { } SimilarityDisableCoord::~SimilarityDisableCoord() { } double SimilarityDisableCoord::coord(int32_t overlap, int32_t maxOverlap) { return 1.0; // disable coord } } LucenePlusPlus-rel_3.0.4/src/core/search/BooleanScorer.cpp000066400000000000000000000224771217574114600235540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BooleanScorer.h" #include "Similarity.h" namespace Lucene { BooleanScorer::BooleanScorer(SimilarityPtr similarity, int32_t minNrShouldMatch, Collection optionalScorers, Collection prohibitedScorers) : Scorer(similarity) { this->bucketTable = newLucene(); this->maxCoord = 1; this->requiredMask = 0; this->prohibitedMask = 0; this->nextMask = 1; this->minNrShouldMatch = minNrShouldMatch; this->end = 0; this->doc = -1; if (optionalScorers && !optionalScorers.empty()) { for (Collection::iterator scorer = optionalScorers.begin(); scorer != optionalScorers.end(); ++scorer) { ++maxCoord; if ((*scorer)->nextDoc() != NO_MORE_DOCS) scorers = newLucene(*scorer, false, false, bucketTable->newCollector(0), scorers); } } if (prohibitedScorers && !prohibitedScorers.empty()) { for (Collection::iterator scorer = prohibitedScorers.begin(); scorer != prohibitedScorers.end(); ++scorer) { int32_t mask = nextMask; nextMask = nextMask << 1; prohibitedMask |= mask; // update prohibited mask if ((*scorer)->nextDoc() != NO_MORE_DOCS) scorers = newLucene(*scorer, false, true, bucketTable->newCollector(mask), scorers); } } coordFactors = Collection::newInstance(maxCoord); SimilarityPtr sim(getSimilarity()); for (int32_t i = 0; i < maxCoord; ++i) coordFactors[i] = sim->coord(i, maxCoord - 1); } BooleanScorer::~BooleanScorer() { } bool BooleanScorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) { bool more = false; BucketPtr tmp; BucketScorerPtr bs(newLucene()); // The internal loop will set the score and doc before calling collect. collector->setScorer(bs); do { bucketTable->first.reset(); while (current) // more queued { // check prohibited & required if ((current->bits & prohibitedMask) == 0 && (current->bits & requiredMask) == requiredMask) { if (current->doc >= max) { tmp = current; current = current->_next.lock(); tmp->_next = bucketTable->first; bucketTable->first = tmp; continue; } if (current->coord >= minNrShouldMatch) { bs->_score = current->score * coordFactors[current->coord]; bs->doc = current->doc; collector->collect(current->doc); } } current = current->_next.lock(); // pop the queue } if (bucketTable->first) { current = bucketTable->first; bucketTable->first = current->_next.lock(); return true; } // refill the queue more = false; end += BucketTable::SIZE; for (SubScorerPtr sub(scorers); sub; sub = sub->next) { int32_t subScorerDocID = sub->scorer->docID(); if (subScorerDocID != NO_MORE_DOCS) { if (sub->scorer->score(sub->collector, end, subScorerDocID)) more = true; } } current = bucketTable->first; } while (current || more); return false; } int32_t BooleanScorer::advance(int32_t target) { boost::throw_exception(UnsupportedOperationException()); return 0; } int32_t BooleanScorer::docID() { return doc; } int32_t BooleanScorer::nextDoc() { bool more = false; do { while (bucketTable->first) // more queued { current = bucketTable->first; bucketTable->first = current->_next.lock(); // pop the queue // check prohibited & required and minNrShouldMatch if ((current->bits & prohibitedMask) == 0 && (current->bits & requiredMask) == requiredMask && current->coord >= minNrShouldMatch) { doc = current->doc; return doc; } } // refill the queue more = false; end += BucketTable::SIZE; for (SubScorerPtr sub(scorers); sub; sub = sub->next) { ScorerPtr scorer(sub->scorer); sub->collector->setScorer(scorer); int32_t doc = scorer->docID(); while (doc < end) { sub->collector->collect(doc); doc = scorer->nextDoc(); } if (doc != NO_MORE_DOCS) more = true; } } while (bucketTable->first || more); doc = NO_MORE_DOCS; return doc; } double BooleanScorer::score() { return current->score * coordFactors[current->coord]; } void BooleanScorer::score(CollectorPtr collector) { score(collector, INT_MAX, nextDoc()); } String BooleanScorer::toString() { StringStream buffer; buffer << L"boolean("; for (SubScorerPtr sub(scorers); sub; sub = sub->next) buffer << sub->scorer->toString() << L" "; buffer << L")"; return buffer.str(); } BooleanScorerCollector::BooleanScorerCollector(int32_t mask, BucketTablePtr bucketTable) { this->mask = mask; this->_bucketTable = bucketTable; } BooleanScorerCollector::~BooleanScorerCollector() { } void BooleanScorerCollector::collect(int32_t doc) { BucketTablePtr table(_bucketTable); int32_t i = doc & BucketTable::MASK; BucketPtr bucket(table->buckets[i]); if (!bucket) { bucket = newLucene(); table->buckets[i] = bucket; } if (bucket->doc != doc) // invalid bucket { bucket->doc = doc; // set doc bucket->score = ScorerPtr(_scorer)->score(); // initialize score bucket->bits = mask; // initialize mask bucket->coord = 1; // initialize coord bucket->_next = table->first; // push onto valid list table->first = bucket; } else { bucket->score += ScorerPtr(_scorer)->score(); // increment score bucket->bits |= mask; // add bits in mask ++bucket->coord; // increment coord } } void BooleanScorerCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) { // not needed by this implementation } void BooleanScorerCollector::setScorer(ScorerPtr scorer) { this->_scorer = scorer; } bool BooleanScorerCollector::acceptsDocsOutOfOrder() { return true; } BucketScorer::BucketScorer() : Scorer(SimilarityPtr()) { _score = 0; doc = NO_MORE_DOCS; } BucketScorer::~BucketScorer() { } int32_t BucketScorer::advance(int32_t target) { return NO_MORE_DOCS; } int32_t BucketScorer::docID() { return doc; } int32_t BucketScorer::nextDoc() { return NO_MORE_DOCS; } double BucketScorer::score() { return _score; } Bucket::Bucket() { doc = -1; score = 0; bits = 0; coord = 0; } Bucket::~Bucket() { } const int32_t BucketTable::SIZE = 1 << 11; const int32_t BucketTable::MASK = BucketTable::SIZE - 1; BucketTable::BucketTable() { buckets = Collection::newInstance(SIZE); } BucketTable::~BucketTable() { } CollectorPtr BucketTable::newCollector(int32_t mask) { return newLucene(mask, shared_from_this()); } int32_t BucketTable::size() { return SIZE; } SubScorer::SubScorer(ScorerPtr scorer, bool required, bool prohibited, CollectorPtr collector, SubScorerPtr next) { this->scorer = scorer; this->required = required; this->prohibited = prohibited; this->collector = collector; this->next = next; } SubScorer::~SubScorer() { } } LucenePlusPlus-rel_3.0.4/src/core/search/BooleanScorer2.cpp000066400000000000000000000227141217574114600236300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BooleanScorer2.h" #include "ReqOptSumScorer.h" #include "ReqExclScorer.h" #include "Similarity.h" #include "Collector.h" namespace Lucene { BooleanScorer2::BooleanScorer2(SimilarityPtr similarity, int32_t minNrShouldMatch, Collection required, Collection prohibited, Collection optional) : Scorer(similarity) { this->minNrShouldMatch = minNrShouldMatch; this->requiredScorers = required; this->prohibitedScorers = prohibited; this->optionalScorers = optional; this->doc = -1; } BooleanScorer2::~BooleanScorer2() { } void BooleanScorer2::initialize() { if (minNrShouldMatch < 0) boost::throw_exception(IllegalArgumentException(L"Minimum number of optional scorers should not be negative")); coordinator = newLucene(shared_from_this()); coordinator->maxCoord += optionalScorers.size(); coordinator->maxCoord += requiredScorers.size(); coordinator->init(); countingSumScorer = makeCountingSumScorer(); } ScorerPtr BooleanScorer2::countingDisjunctionSumScorer(Collection scorers, int32_t minNrShouldMatch) { // each scorer from the list counted as a single matcher return newLucene(shared_from_this(), scorers, minNrShouldMatch); } ScorerPtr BooleanScorer2::countingConjunctionSumScorer(Collection requiredScorers) { // each scorer from the list counted as a single matcher return newLucene(shared_from_this(), Similarity::getDefault(), requiredScorers); } ScorerPtr BooleanScorer2::dualConjunctionSumScorer(ScorerPtr req1, ScorerPtr req2) { Collection scorers(newCollection(req1, req2)); // All scorers match, so Similarity::getDefault() always has 1 as the coordination factor. // Therefore the sum of the scores of two scorers is used as score. return newLucene(Similarity::getDefault(), scorers); } ScorerPtr BooleanScorer2::makeCountingSumScorer() { return requiredScorers.empty() ? makeCountingSumScorerNoReq() : makeCountingSumScorerSomeReq(); } ScorerPtr BooleanScorer2::makeCountingSumScorerNoReq() { // minNrShouldMatch optional scorers are required, but at least 1 int32_t nrOptRequired = minNrShouldMatch < 1 ? 1 : minNrShouldMatch; ScorerPtr requiredCountingSumScorer; if (optionalScorers.size() > nrOptRequired) requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired); else if (optionalScorers.size() == 1) requiredCountingSumScorer = newLucene(optionalScorers[0], coordinator); else requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers); return addProhibitedScorers(requiredCountingSumScorer); } ScorerPtr BooleanScorer2::makeCountingSumScorerSomeReq() { if (optionalScorers.size() == minNrShouldMatch) // all optional scorers also required. { Collection allReq(Collection::newInstance(requiredScorers.begin(), requiredScorers.end())); allReq.addAll(optionalScorers.begin(), optionalScorers.end()); return addProhibitedScorers(countingConjunctionSumScorer(allReq)); } else // optionalScorers.size() > minNrShouldMatch, and at least one required scorer { ScorerPtr requiredCountingSumScorer = requiredScorers.size() == 1 ? newLucene(requiredScorers[0], coordinator) : countingConjunctionSumScorer(requiredScorers); if (minNrShouldMatch > 0) // use a required disjunction scorer over the optional scorers return addProhibitedScorers(dualConjunctionSumScorer(requiredCountingSumScorer, countingDisjunctionSumScorer(optionalScorers, minNrShouldMatch))); else // minNrShouldMatch == 0 return newLucene(addProhibitedScorers(requiredCountingSumScorer), optionalScorers.size() == 1 ? newLucene(optionalScorers[0], coordinator) : countingDisjunctionSumScorer(optionalScorers, 1)); } } ScorerPtr BooleanScorer2::addProhibitedScorers(ScorerPtr requiredCountingSumScorer) { return prohibitedScorers.empty() ? requiredCountingSumScorer : newLucene(requiredCountingSumScorer, (prohibitedScorers.size() == 1 ? prohibitedScorers[0] : newLucene(prohibitedScorers))); } void BooleanScorer2::score(CollectorPtr collector) { collector->setScorer(shared_from_this()); while ((doc = countingSumScorer->nextDoc()) != NO_MORE_DOCS) collector->collect(doc); } bool BooleanScorer2::score(CollectorPtr collector, int32_t max, int32_t firstDocID) { doc = firstDocID; collector->setScorer(shared_from_this()); while (doc < max) { collector->collect(doc); doc = countingSumScorer->nextDoc(); } return (doc != NO_MORE_DOCS); } int32_t BooleanScorer2::docID() { return doc; } int32_t BooleanScorer2::nextDoc() { doc = countingSumScorer->nextDoc(); return doc; } double BooleanScorer2::score() { coordinator->nrMatchers = 0; double sum = countingSumScorer->score(); return sum * coordinator->coordFactors[coordinator->nrMatchers]; } int32_t BooleanScorer2::advance(int32_t target) { doc = countingSumScorer->advance(target); return doc; } Coordinator::Coordinator(BooleanScorer2Ptr scorer) { _scorer = scorer; maxCoord = 0; nrMatchers = 0; } Coordinator::~Coordinator() { } void Coordinator::init() { coordFactors = Collection::newInstance(maxCoord + 1); SimilarityPtr sim(BooleanScorer2Ptr(_scorer)->getSimilarity()); for (int32_t i = 0; i <= maxCoord; ++i) coordFactors[i] = sim->coord(i, maxCoord); } SingleMatchScorer::SingleMatchScorer(ScorerPtr scorer, CoordinatorPtr coordinator) : Scorer(scorer->getSimilarity()) { lastScoredDoc = -1; lastDocScore = std::numeric_limits::quiet_NaN(); this->scorer = scorer; this->coordinator = coordinator; } SingleMatchScorer::~SingleMatchScorer() { } double SingleMatchScorer::score() { int32_t doc = docID(); if (doc >= lastScoredDoc) { if (doc > lastScoredDoc) { lastDocScore = scorer->score(); lastScoredDoc = doc; } ++coordinator->nrMatchers; } return lastDocScore; } int32_t SingleMatchScorer::docID() { return scorer->docID(); } int32_t SingleMatchScorer::nextDoc() { return scorer->nextDoc(); } int32_t SingleMatchScorer::advance(int32_t target) { return scorer->advance(target); } CountingDisjunctionSumScorer::CountingDisjunctionSumScorer(BooleanScorer2Ptr scorer, Collection subScorers, int32_t minimumNrMatchers) : DisjunctionSumScorer(subScorers, minimumNrMatchers) { _scorer = scorer; lastScoredDoc = -1; lastDocScore = std::numeric_limits::quiet_NaN(); } CountingDisjunctionSumScorer::~CountingDisjunctionSumScorer() { } double CountingDisjunctionSumScorer::score() { int32_t doc = docID(); if (doc >= lastScoredDoc) { if (doc > lastScoredDoc) { lastDocScore = DisjunctionSumScorer::score(); lastScoredDoc = doc; } BooleanScorer2Ptr(_scorer)->coordinator->nrMatchers += DisjunctionSumScorer::_nrMatchers; } return lastDocScore; } CountingConjunctionSumScorer::CountingConjunctionSumScorer(BooleanScorer2Ptr scorer, SimilarityPtr similarity, Collection scorers) : ConjunctionScorer(similarity, scorers) { _scorer = scorer; lastScoredDoc = -1; requiredNrMatchers = scorers.size(); lastDocScore = std::numeric_limits::quiet_NaN(); } CountingConjunctionSumScorer::~CountingConjunctionSumScorer() { } double CountingConjunctionSumScorer::score() { int32_t doc = docID(); if (doc >= lastScoredDoc) { if (doc > lastScoredDoc) { lastDocScore = ConjunctionScorer::score(); lastScoredDoc = doc; } BooleanScorer2Ptr(_scorer)->coordinator->nrMatchers += requiredNrMatchers; } // All scorers match, so Similarity::getDefault() ConjunctionScorer::score() always has 1 as the /// coordination factor. Therefore the sum of the scores of the requiredScorers is used as score. return lastDocScore; } } LucenePlusPlus-rel_3.0.4/src/core/search/CachingSpanFilter.cpp000066400000000000000000000060701217574114600243320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CachingSpanFilter.h" #include "_CachingSpanFilter.h" #include "SpanFilterResult.h" #include "IndexReader.h" namespace Lucene { CachingSpanFilter::CachingSpanFilter(SpanFilterPtr filter, CachingWrapperFilter::DeletesMode deletesMode) { this->filter = filter; if (deletesMode == CachingWrapperFilter::DELETES_DYNAMIC) boost::throw_exception(IllegalArgumentException(L"DeletesMode::DYNAMIC is not supported")); this->cache = newLucene(deletesMode); this->hitCount = 0; this->missCount = 0; } CachingSpanFilter::~CachingSpanFilter() { } DocIdSetPtr CachingSpanFilter::getDocIdSet(IndexReaderPtr reader) { SpanFilterResultPtr result(getCachedResult(reader)); return result ? result->getDocIdSet() : DocIdSetPtr(); } SpanFilterResultPtr CachingSpanFilter::getCachedResult(IndexReaderPtr reader) { LuceneObjectPtr coreKey = reader->getFieldCacheKey(); LuceneObjectPtr delCoreKey = reader->hasDeletions() ? reader->getDeletesCacheKey() : coreKey; SpanFilterResultPtr result(boost::dynamic_pointer_cast(cache->get(reader, coreKey, delCoreKey))); if (result) { ++hitCount; return result; } ++missCount; result = filter->bitSpans(reader); cache->put(coreKey, delCoreKey, result); return result; } SpanFilterResultPtr CachingSpanFilter::bitSpans(IndexReaderPtr reader) { return getCachedResult(reader); } String CachingSpanFilter::toString() { return L"CachingSpanFilter(" + filter->toString() + L")"; } bool CachingSpanFilter::equals(LuceneObjectPtr other) { if (SpanFilter::equals(other)) return true; CachingSpanFilterPtr otherCachingSpanFilter(boost::dynamic_pointer_cast(other)); if (!otherCachingSpanFilter) return false; return this->filter->equals(otherCachingSpanFilter->filter); } int32_t CachingSpanFilter::hashCode() { return filter->hashCode() ^ 0x1117bf25; } FilterCacheSpanFilterResult::FilterCacheSpanFilterResult(CachingWrapperFilter::DeletesMode deletesMode) : FilterCache(deletesMode) { } FilterCacheSpanFilterResult::~FilterCacheSpanFilterResult() { } LuceneObjectPtr FilterCacheSpanFilterResult::mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value) { boost::throw_exception(IllegalStateException(L"DeletesMode::DYNAMIC is not supported")); return LuceneObjectPtr(); } } LucenePlusPlus-rel_3.0.4/src/core/search/CachingWrapperFilter.cpp000066400000000000000000000125661217574114600250600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CachingWrapperFilter.h" #include "_CachingWrapperFilter.h" #include "OpenBitSetDISI.h" #include "IndexReader.h" namespace Lucene { CachingWrapperFilter::CachingWrapperFilter(FilterPtr filter, DeletesMode deletesMode) { this->filter = filter; this->cache = newLucene(deletesMode); this->hitCount = 0; this->missCount = 0; } CachingWrapperFilter::~CachingWrapperFilter() { } DocIdSetPtr CachingWrapperFilter::docIdSetToCache(DocIdSetPtr docIdSet, IndexReaderPtr reader) { if (!docIdSet) { // this is better than returning null, as the nonnull result can be cached return DocIdSet::EMPTY_DOCIDSET(); } else if (docIdSet->isCacheable()) return docIdSet; else { DocIdSetIteratorPtr it(docIdSet->iterator()); // null is allowed to be returned by iterator(), in this case we wrap with the empty set, // which is cacheable. return !it ? DocIdSet::EMPTY_DOCIDSET() : newLucene(it, reader->maxDoc()); } } DocIdSetPtr CachingWrapperFilter::getDocIdSet(IndexReaderPtr reader) { LuceneObjectPtr coreKey = reader->getFieldCacheKey(); LuceneObjectPtr delCoreKey = reader->hasDeletions() ? reader->getDeletesCacheKey() : coreKey; DocIdSetPtr docIdSet(boost::dynamic_pointer_cast(cache->get(reader, coreKey, delCoreKey))); if (docIdSet) { ++hitCount; return docIdSet; } ++missCount; // cache miss docIdSet = docIdSetToCache(filter->getDocIdSet(reader), reader); if (docIdSet) cache->put(coreKey, delCoreKey, docIdSet); return docIdSet; } String CachingWrapperFilter::toString() { return L"CachingWrapperFilter(" + filter->toString() + L")"; } bool CachingWrapperFilter::equals(LuceneObjectPtr other) { if (Filter::equals(other)) return true; CachingWrapperFilterPtr otherCachingWrapperFilter(boost::dynamic_pointer_cast(other)); if (!otherCachingWrapperFilter) return false; return this->filter->equals(otherCachingWrapperFilter->filter); } int32_t CachingWrapperFilter::hashCode() { return filter->hashCode() ^ 0x1117bf25; } FilterCache::FilterCache(CachingWrapperFilter::DeletesMode deletesMode) { this->deletesMode = deletesMode; } FilterCache::~FilterCache() { } LuceneObjectPtr FilterCache::get(IndexReaderPtr reader, LuceneObjectPtr coreKey, LuceneObjectPtr delCoreKey) { SyncLock syncLock(this); if (!cache) cache = WeakMapObjectObject::newInstance(); LuceneObjectPtr value; if (deletesMode == CachingWrapperFilter::DELETES_IGNORE) { // key on core value = cache.get(coreKey); } else if (deletesMode == CachingWrapperFilter::DELETES_RECACHE) { // key on deletes, if any, else core value = cache.get(delCoreKey); } else { BOOST_ASSERT(deletesMode == CachingWrapperFilter::DELETES_DYNAMIC); // first try for exact match value = cache.get(delCoreKey); if (!value) { // now for core match, but dynamically AND NOT deletions value = cache.get(coreKey); if (value && reader->hasDeletions()) value = mergeDeletes(reader, value); } } return value; } void FilterCache::put(LuceneObjectPtr coreKey, LuceneObjectPtr delCoreKey, LuceneObjectPtr value) { SyncLock syncLock(this); if (deletesMode == CachingWrapperFilter::DELETES_IGNORE) cache.put(coreKey, value); else if (deletesMode == CachingWrapperFilter::DELETES_RECACHE) cache.put(delCoreKey, value); else { cache.put(coreKey, value); cache.put(delCoreKey, value); } } FilterCacheDocIdSet::FilterCacheDocIdSet(CachingWrapperFilter::DeletesMode deletesMode) : FilterCache(deletesMode) { } FilterCacheDocIdSet::~FilterCacheDocIdSet() { } LuceneObjectPtr FilterCacheDocIdSet::mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value) { return newLucene(reader, boost::dynamic_pointer_cast(value)); } FilteredCacheDocIdSet::FilteredCacheDocIdSet(IndexReaderPtr reader, DocIdSetPtr innerSet) : FilteredDocIdSet(innerSet) { this->reader = reader; } FilteredCacheDocIdSet::~FilteredCacheDocIdSet() { } bool FilteredCacheDocIdSet::match(int32_t docid) { return !reader->isDeleted(docid); } } LucenePlusPlus-rel_3.0.4/src/core/search/Collector.cpp000066400000000000000000000006771217574114600227430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Collector.h" namespace Lucene { Collector::~Collector() { } } LucenePlusPlus-rel_3.0.4/src/core/search/ComplexExplanation.cpp000066400000000000000000000021361217574114600246170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ComplexExplanation.h" #include "StringUtils.h" namespace Lucene { ComplexExplanation::ComplexExplanation(bool match, double value, const String& description) : Explanation(value, description) { this->match = match; } ComplexExplanation::~ComplexExplanation() { } bool ComplexExplanation::getMatch() { return match; } void ComplexExplanation::setMatch(bool match) { this->match = match; } bool ComplexExplanation::isMatch() { return getMatch(); } String ComplexExplanation::getSummary() { return StringUtils::toString(getValue()) + L" = " + (isMatch() ? L"(MATCH) " : L"(NON-MATCH) ") + getDescription(); } } LucenePlusPlus-rel_3.0.4/src/core/search/ConjunctionScorer.cpp000066400000000000000000000106741217574114600244620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ConjunctionScorer.h" #include "Similarity.h" namespace Lucene { struct lessScorerDocId { inline bool operator()(const ScorerPtr& first, const ScorerPtr& second) const { return (first->docID() < second->docID()); } }; ConjunctionScorer::ConjunctionScorer(SimilarityPtr similarity, Collection scorers) : Scorer(similarity) { this->lastDoc = -1; this->scorers = scorers; this->coord = similarity->coord(scorers.size(), scorers.size()); for (Collection::iterator scorer = scorers.begin(); scorer != scorers.end(); ++scorer) { if ((*scorer)->nextDoc() == NO_MORE_DOCS) { // If even one of the sub-scorers does not have any documents, this scorer should not attempt // to do any more work. lastDoc = NO_MORE_DOCS; return; } } // Sort the array the first time... // We don't need to sort the array in any future calls because we know it will already start off // sorted (all scorers on same doc). std::sort(scorers.begin(), scorers.end(), lessScorerDocId()); // NOTE: doNext() must be called before the re-sorting of the array later on. The reason is this: // assume there are 5 scorers, whose first docs are 1, 2, 3, 5, 5 respectively. Sorting (above) leaves // the array as is. Calling doNext() here advances all the first scorers to 5 (or a larger doc ID // they all agree on). // However, if we re-sort before doNext() is called, the order will be 5, 3, 2, 1, 5 and then doNext() // will stop immediately, since the first scorer's docs equals the last one. So the invariant that after // calling doNext() all scorers are on the same doc ID is broken. if (doNext() == NO_MORE_DOCS) { // The scorers did not agree on any document. lastDoc = NO_MORE_DOCS; return; } // If first-time skip distance is any predictor of scorer sparseness, then we should always try to skip // first on those scorers. Keep last scorer in it's last place (it will be the first to be skipped on), // but reverse all of the others so that they will be skipped on in order of original high skip. int32_t end = scorers.size() - 1; int32_t max = end >> 1; for (int32_t i = 0; i < max; ++i) { ScorerPtr tmp(scorers[i]); int32_t idx = end - i - 1; scorers[i] = scorers[idx]; scorers[idx] = tmp; } } ConjunctionScorer::~ConjunctionScorer() { } int32_t ConjunctionScorer::doNext() { int32_t first = 0; int32_t doc = scorers[scorers.size() - 1]->docID(); ScorerPtr firstScorer; while ((firstScorer = scorers[first])->docID() < doc) { doc = firstScorer->advance(doc); first = first == scorers.size() - 1 ? 0 : first + 1; } return doc; } int32_t ConjunctionScorer::advance(int32_t target) { if (lastDoc == NO_MORE_DOCS) return lastDoc; else if (scorers[(scorers.size() - 1)]->docID() < target) scorers[(scorers.size() - 1)]->advance(target); lastDoc = doNext(); return lastDoc; } int32_t ConjunctionScorer::docID() { return lastDoc; } int32_t ConjunctionScorer::nextDoc() { if (lastDoc == NO_MORE_DOCS) return lastDoc; else if (lastDoc == -1) { lastDoc = scorers[scorers.size() - 1]->docID(); return lastDoc; } scorers[(scorers.size() - 1)]->nextDoc(); lastDoc = doNext(); return lastDoc; } double ConjunctionScorer::score() { double sum = 0.0; for (Collection::iterator scorer = scorers.begin(); scorer != scorers.end(); ++scorer) sum += (*scorer)->score(); return sum * coord; } } LucenePlusPlus-rel_3.0.4/src/core/search/ConstantScoreQuery.cpp000066400000000000000000000130511217574114600246160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ConstantScoreQuery.h" #include "_ConstantScoreQuery.h" #include "Filter.h" #include "ComplexExplanation.h" #include "DocIdSet.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { ConstantScoreQuery::ConstantScoreQuery(FilterPtr filter) { this->filter = filter; } ConstantScoreQuery::~ConstantScoreQuery() { } FilterPtr ConstantScoreQuery::getFilter() { return filter; } QueryPtr ConstantScoreQuery::rewrite(IndexReaderPtr reader) { return shared_from_this(); } void ConstantScoreQuery::extractTerms(SetTerm terms) { // OK to not add any terms when used for MultiSearcher, but may not be OK for highlighting } WeightPtr ConstantScoreQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } String ConstantScoreQuery::toString(const String& field) { return L"ConstantScore(" + filter->toString() + (getBoost() == 1.0 ? L")" : L"^" + StringUtils::toString(getBoost())); } bool ConstantScoreQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; ConstantScoreQueryPtr otherConstantScoreQuery(boost::dynamic_pointer_cast(other)); if (!otherConstantScoreQuery) return false; return (this->getBoost() == otherConstantScoreQuery->getBoost() && this->filter->equals(otherConstantScoreQuery->filter)); } int32_t ConstantScoreQuery::hashCode() { // Simple add is OK since no existing filter hashcode has a float component. return filter->hashCode() + MiscUtils::doubleToIntBits(getBoost()); } LuceneObjectPtr ConstantScoreQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(filter); ConstantScoreQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->filter = filter; return cloneQuery; } ConstantWeight::ConstantWeight(ConstantScoreQueryPtr constantScorer, SearcherPtr searcher) { this->constantScorer = constantScorer; this->similarity = constantScorer->getSimilarity(searcher); this->queryNorm = 0; this->queryWeight = 0; } ConstantWeight::~ConstantWeight() { } QueryPtr ConstantWeight::getQuery() { return constantScorer; } double ConstantWeight::getValue() { return queryWeight; } double ConstantWeight::sumOfSquaredWeights() { queryWeight = constantScorer->getBoost(); return queryWeight * queryWeight; } void ConstantWeight::normalize(double norm) { this->queryNorm = norm; queryWeight *= this->queryNorm; } ScorerPtr ConstantWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(constantScorer, similarity, reader, shared_from_this()); } ExplanationPtr ConstantWeight::explain(IndexReaderPtr reader, int32_t doc) { ConstantScorerPtr cs(newLucene(constantScorer, similarity, reader, shared_from_this())); bool exists = (cs->docIdSetIterator->advance(doc) == doc); ComplexExplanationPtr result(newLucene()); if (exists) { result->setDescription(L"ConstantScoreQuery(" + constantScorer->filter->toString() + L"), product of:"); result->setValue(queryWeight); result->setMatch(true); result->addDetail(newLucene(constantScorer->getBoost(), L"boost")); result->addDetail(newLucene(queryNorm, L"queryNorm")); } else { result->setDescription(L"ConstantScoreQuery(" + constantScorer->filter->toString() + L") doesn't match id " + StringUtils::toString(doc)); result->setValue(0); result->setMatch(false); } return result; } ConstantScorer::ConstantScorer(ConstantScoreQueryPtr constantScorer, SimilarityPtr similarity, IndexReaderPtr reader, WeightPtr w) : Scorer(similarity) { doc = -1; theScore = w->getValue(); DocIdSetPtr docIdSet(constantScorer->filter->getDocIdSet(reader)); if (!docIdSet) docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); else { DocIdSetIteratorPtr iter(docIdSet->iterator()); if (!iter) docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); else docIdSetIterator = iter; } } ConstantScorer::~ConstantScorer() { } int32_t ConstantScorer::nextDoc() { return docIdSetIterator->nextDoc(); } int32_t ConstantScorer::docID() { return docIdSetIterator->docID(); } double ConstantScorer::score() { return theScore; } int32_t ConstantScorer::advance(int32_t target) { return docIdSetIterator->advance(target); } } LucenePlusPlus-rel_3.0.4/src/core/search/DefaultSimilarity.cpp000066400000000000000000000037261217574114600244460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DefaultSimilarity.h" #include "FieldInvertState.h" namespace Lucene { DefaultSimilarity::DefaultSimilarity() { discountOverlaps = false; } DefaultSimilarity::~DefaultSimilarity() { } double DefaultSimilarity::computeNorm(const String& fieldName, FieldInvertStatePtr state) { int32_t numTerms; if (discountOverlaps) numTerms = state->getLength() - state->getNumOverlap(); else numTerms = state->getLength(); return (state->getBoost() * lengthNorm(fieldName, numTerms)); } double DefaultSimilarity::lengthNorm(const String& fieldName, int32_t numTokens) { return (double)(1.0 / std::sqrt((double)numTokens)); } double DefaultSimilarity::queryNorm(double sumOfSquaredWeights) { return (double)(1.0 / std::sqrt(sumOfSquaredWeights)); } double DefaultSimilarity::tf(double freq) { return (double)std::sqrt(freq); } double DefaultSimilarity::sloppyFreq(int32_t distance) { return (1.0 / (double)(distance + 1)); } double DefaultSimilarity::idf(int32_t docFreq, int32_t numDocs) { return (double)(std::log((double)numDocs / (double)(docFreq + 1)) + 1.0); } double DefaultSimilarity::coord(int32_t overlap, int32_t maxOverlap) { return (double)overlap / (double)maxOverlap; } void DefaultSimilarity::setDiscountOverlaps(bool v) { discountOverlaps = v; } bool DefaultSimilarity::getDiscountOverlaps() { return discountOverlaps; } } LucenePlusPlus-rel_3.0.4/src/core/search/DisjunctionMaxQuery.cpp000066400000000000000000000204451217574114600247750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DisjunctionMaxQuery.h" #include "_DisjunctionMaxQuery.h" #include "BooleanQuery.h" #include "DocIdSetIterator.h" #include "ComplexExplanation.h" #include "Searcher.h" #include "DisjunctionMaxScorer.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DisjunctionMaxQuery::DisjunctionMaxQuery(double tieBreakerMultiplier) { this->tieBreakerMultiplier = tieBreakerMultiplier; this->disjuncts = Collection::newInstance(); } DisjunctionMaxQuery::DisjunctionMaxQuery(Collection disjuncts, double tieBreakerMultiplier) { this->tieBreakerMultiplier = tieBreakerMultiplier; this->disjuncts = Collection::newInstance(); add(disjuncts); } DisjunctionMaxQuery::~DisjunctionMaxQuery() { } void DisjunctionMaxQuery::add(QueryPtr query) { disjuncts.add(query); } void DisjunctionMaxQuery::add(Collection disjuncts) { this->disjuncts.addAll(disjuncts.begin(), disjuncts.end()); } Collection::iterator DisjunctionMaxQuery::begin() { return disjuncts.begin(); } Collection::iterator DisjunctionMaxQuery::end() { return disjuncts.end(); } WeightPtr DisjunctionMaxQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } QueryPtr DisjunctionMaxQuery::rewrite(IndexReaderPtr reader) { int32_t numDisjunctions = disjuncts.size(); if (numDisjunctions == 1) { QueryPtr singleton(disjuncts[0]); QueryPtr result(singleton->rewrite(reader)); if (getBoost() != 1.0) { if (result == singleton) result = boost::dynamic_pointer_cast(result->clone()); result->setBoost(getBoost() * result->getBoost()); } return result; } DisjunctionMaxQueryPtr clone; for (int32_t i = 0; i < numDisjunctions; ++i) { QueryPtr clause(disjuncts[i]); QueryPtr rewrite(clause->rewrite(reader)); if (rewrite != clause) { if (!clone) clone = boost::dynamic_pointer_cast(this->clone()); clone->disjuncts[i] = rewrite; } } return clone ? clone : shared_from_this(); } LuceneObjectPtr DisjunctionMaxQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = Query::clone(other ? other : newLucene()); DisjunctionMaxQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->tieBreakerMultiplier = tieBreakerMultiplier; cloneQuery->disjuncts = Collection::newInstance(disjuncts.begin(), disjuncts.end()); return cloneQuery; } void DisjunctionMaxQuery::extractTerms(SetTerm terms) { for (Collection::iterator query = disjuncts.begin(); query != disjuncts.end(); ++query) (*query)->extractTerms(terms); } String DisjunctionMaxQuery::toString(const String& field) { String buffer(L"("); for (Collection::iterator query = disjuncts.begin(); query != disjuncts.end(); ++query) { if (query != disjuncts.begin()) buffer += L" | "; if (boost::dynamic_pointer_cast(*query)) // wrap sub-bools in parens buffer += L"(" + (*query)->toString(field) + L")"; else buffer += (*query)->toString(field); } buffer += L")"; if (tieBreakerMultiplier != 0.0) buffer += L"~" + StringUtils::toString(tieBreakerMultiplier); if (getBoost() != 1.0) buffer += L"^" + StringUtils::toString(getBoost()); return buffer; } bool DisjunctionMaxQuery::equals(LuceneObjectPtr other) { if (!Query::equals(other)) return false; DisjunctionMaxQueryPtr otherDisjunctionMaxQuery(boost::dynamic_pointer_cast(other)); if (!otherDisjunctionMaxQuery) return false; return (tieBreakerMultiplier == otherDisjunctionMaxQuery->tieBreakerMultiplier && disjuncts.equals(otherDisjunctionMaxQuery->disjuncts, luceneEquals())); } int32_t DisjunctionMaxQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) + MiscUtils::doubleToIntBits(tieBreakerMultiplier) + MiscUtils::hashCode(disjuncts.begin(), disjuncts.end(), MiscUtils::hashLucene); } DisjunctionMaxWeight::DisjunctionMaxWeight(DisjunctionMaxQueryPtr query, SearcherPtr searcher) { this->query = query; this->similarity = searcher->getSimilarity(); this->weights = Collection::newInstance(); for (Collection::iterator disjunctQuery = query->disjuncts.begin(); disjunctQuery != query->disjuncts.end(); ++disjunctQuery) this->weights.add((*disjunctQuery)->createWeight(searcher)); } DisjunctionMaxWeight::~DisjunctionMaxWeight() { } QueryPtr DisjunctionMaxWeight::getQuery() { return query; } double DisjunctionMaxWeight::getValue() { return query->getBoost(); } double DisjunctionMaxWeight::sumOfSquaredWeights() { double max = 0.0; double sum = 0.0; for (Collection::iterator currentWeight = weights.begin(); currentWeight != weights.end(); ++currentWeight) { double sub = (*currentWeight)->sumOfSquaredWeights(); sum += sub; max = std::max(max, sub); } double boost = query->getBoost(); return (((sum - max) * query->tieBreakerMultiplier * query->tieBreakerMultiplier) + max) * boost * boost; } void DisjunctionMaxWeight::normalize(double norm) { norm *= query->getBoost(); // Incorporate our boost for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) (*wt)->normalize(norm); } ScorerPtr DisjunctionMaxWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { Collection scorers(Collection::newInstance(weights.size())); int32_t idx = 0; for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) { ScorerPtr subScorer((*wt)->scorer(reader, true, false)); if (subScorer && subScorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS) scorers[idx++] = subScorer; } if (idx == 0) return ScorerPtr(); // all scorers did not have documents DisjunctionMaxScorerPtr result(newLucene(query->tieBreakerMultiplier, similarity, scorers, idx)); return result; } ExplanationPtr DisjunctionMaxWeight::explain(IndexReaderPtr reader, int32_t doc) { if (query->disjuncts.size() == 1) return weights[0]->explain(reader, doc); ComplexExplanationPtr result(newLucene()); double max = 0.0; double sum = 0.0; result->setDescription(query->tieBreakerMultiplier == 0.0 ? L"max of:" : (L"max plus " + StringUtils::toString(query->tieBreakerMultiplier) + L" times others of:")); for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) { ExplanationPtr e = (*wt)->explain(reader, doc); if (e->isMatch()) { result->setMatch(true); result->addDetail(e); sum += e->getValue(); max = std::max(max, e->getValue()); } } result->setValue(max + (sum - max) * query->tieBreakerMultiplier); return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/DisjunctionMaxScorer.cpp000066400000000000000000000117541217574114600251300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DisjunctionMaxScorer.h" namespace Lucene { DisjunctionMaxScorer::DisjunctionMaxScorer(double tieBreakerMultiplier, SimilarityPtr similarity, Collection subScorers, int32_t numScorers) : Scorer(similarity) { this->doc = -1; this->tieBreakerMultiplier = tieBreakerMultiplier; // The passed subScorers array includes only scorers which have documents (DisjunctionMaxQuery takes care // of that), and their nextDoc() was already called. this->subScorers = subScorers; this->numScorers = numScorers; heapify(); } DisjunctionMaxScorer::~DisjunctionMaxScorer() { } int32_t DisjunctionMaxScorer::nextDoc() { if (numScorers == 0) { doc = NO_MORE_DOCS; return doc; } while (subScorers[0]->docID() == doc) { if (subScorers[0]->nextDoc() != NO_MORE_DOCS) heapAdjust(0); else { heapRemoveRoot(); if (numScorers == 0) { doc = NO_MORE_DOCS; return doc; } } } doc = subScorers[0]->docID(); return doc; } int32_t DisjunctionMaxScorer::docID() { return doc; } double DisjunctionMaxScorer::score() { int32_t doc = subScorers[0]->docID(); Collection sum(newCollection(subScorers[0]->score())); Collection max(Collection::newInstance(sum.begin(), sum.end())); int32_t size = numScorers; scoreAll(1, size, doc, sum, max); scoreAll(2, size, doc, sum, max); return max[0] + (sum[0] - max[0]) * tieBreakerMultiplier; } void DisjunctionMaxScorer::scoreAll(int32_t root, int32_t size, int32_t doc, Collection sum, Collection max) { if (root < size && subScorers[root]->docID() == doc) { double sub = subScorers[root]->score(); sum[0] += sub; max[0] = std::max(max[0], sub); scoreAll((root << 1) + 1, size, doc, sum, max); scoreAll((root << 1) + 2, size, doc, sum, max); } } int32_t DisjunctionMaxScorer::advance(int32_t target) { if (numScorers == 0) { doc = NO_MORE_DOCS; return doc; } while (subScorers[0]->docID() < target) { if (subScorers[0]->advance(target) != NO_MORE_DOCS) heapAdjust(0); else { heapRemoveRoot(); if (numScorers == 0) { doc = NO_MORE_DOCS; return doc; } } } doc = subScorers[0]->docID(); return doc; } void DisjunctionMaxScorer::heapify() { for (int32_t i = (numScorers >> 1) - 1; i >= 0; --i) heapAdjust(i); } void DisjunctionMaxScorer::heapAdjust(int32_t root) { ScorerPtr scorer(subScorers[root]); int32_t doc = scorer->docID(); int32_t i = root; while (i <= (numScorers >> 1) - 1) { int32_t lchild = (i << 1) + 1; ScorerPtr lscorer(subScorers[lchild]); int32_t ldoc = lscorer->docID(); int32_t rdoc = INT_MAX; int32_t rchild = (i << 1) + 2; ScorerPtr rscorer; if (rchild < numScorers) { rscorer = subScorers[rchild]; rdoc = rscorer->docID(); } if (ldoc < doc) { if (rdoc < ldoc) { subScorers[i] = rscorer; subScorers[rchild] = scorer; i = rchild; } else { subScorers[i] = lscorer; subScorers[lchild] = scorer; i = lchild; } } else if (rdoc < doc) { subScorers[i] = rscorer; subScorers[rchild] = scorer; i = rchild; } else return; } } void DisjunctionMaxScorer::heapRemoveRoot() { if (numScorers == 1) { subScorers[0].reset(); numScorers = 0; } else { subScorers[0] = subScorers[numScorers - 1]; subScorers[numScorers - 1].reset(); --numScorers; heapAdjust(0); } } } LucenePlusPlus-rel_3.0.4/src/core/search/DisjunctionSumScorer.cpp000066400000000000000000000111211217574114600251330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DisjunctionSumScorer.h" #include "ScorerDocQueue.h" #include "Collector.h" namespace Lucene { DisjunctionSumScorer::DisjunctionSumScorer(Collection subScorers, int32_t minimumNrMatchers) : Scorer(SimilarityPtr()) { this->currentDoc = -1; this->_nrMatchers = -1; this->currentScore = std::numeric_limits::quiet_NaN(); this->nrScorers = subScorers.size(); if (minimumNrMatchers <= 0) boost::throw_exception(IllegalArgumentException(L"Minimum nr of matchers must be positive")); if (nrScorers <= 1) boost::throw_exception(IllegalArgumentException(L"There must be at least 2 subScorers")); this->minimumNrMatchers = minimumNrMatchers; this->subScorers = subScorers; } DisjunctionSumScorer::~DisjunctionSumScorer() { } void DisjunctionSumScorer::initialize() { initScorerDocQueue(); } void DisjunctionSumScorer::initScorerDocQueue() { scorerDocQueue = newLucene(nrScorers); for (Collection::iterator se = subScorers.begin(); se != subScorers.end(); ++se) { if ((*se)->nextDoc() != NO_MORE_DOCS) scorerDocQueue->insert(*se); } } void DisjunctionSumScorer::score(CollectorPtr collector) { collector->setScorer(shared_from_this()); while (nextDoc() != NO_MORE_DOCS) collector->collect(currentDoc); } bool DisjunctionSumScorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) { // firstDocID is ignored since nextDoc() sets 'currentDoc' collector->setScorer(shared_from_this()); while (currentDoc < max) { collector->collect(currentDoc); if (nextDoc() == NO_MORE_DOCS) return false; } return true; } int32_t DisjunctionSumScorer::nextDoc() { if (scorerDocQueue->size() < minimumNrMatchers || !advanceAfterCurrent()) currentDoc = NO_MORE_DOCS; return currentDoc; } bool DisjunctionSumScorer::advanceAfterCurrent() { do // repeat until minimum nr of matchers { currentDoc = scorerDocQueue->topDoc(); currentScore = scorerDocQueue->topScore(); _nrMatchers = 1; do // Until all subscorers are after currentDoc { if (!scorerDocQueue->topNextAndAdjustElsePop()) { if (scorerDocQueue->size() == 0) break; // nothing more to advance, check for last match. } if (scorerDocQueue->topDoc() != currentDoc) break; // All remaining subscorers are after currentDoc. currentScore += scorerDocQueue->topScore(); ++_nrMatchers; } while (true); if (_nrMatchers >= minimumNrMatchers) return true; else if (scorerDocQueue->size() < minimumNrMatchers) return false; } while (true); } double DisjunctionSumScorer::score() { return currentScore; } int32_t DisjunctionSumScorer::docID() { return currentDoc; } int32_t DisjunctionSumScorer::nrMatchers() { return _nrMatchers; } int32_t DisjunctionSumScorer::advance(int32_t target) { if (scorerDocQueue->size() < minimumNrMatchers) { currentDoc = NO_MORE_DOCS; return currentDoc; } if (target <= currentDoc) return currentDoc; do { if (scorerDocQueue->topDoc() >= target) { if (!advanceAfterCurrent()) currentDoc = NO_MORE_DOCS; return currentDoc; } else if (!scorerDocQueue->topSkipToAndAdjustElsePop(target)) { if (scorerDocQueue->size() < minimumNrMatchers) { currentDoc = NO_MORE_DOCS; return currentDoc; } } } while (true); } } LucenePlusPlus-rel_3.0.4/src/core/search/DocIdSet.cpp000066400000000000000000000026341217574114600224460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocIdSet.h" #include "_DocIdSet.h" namespace Lucene { DocIdSet::~DocIdSet() { } bool DocIdSet::isCacheable() { return false; } DocIdSetPtr DocIdSet::EMPTY_DOCIDSET() { static DocIdSetPtr _EMPTY_DOCIDSET; if (!_EMPTY_DOCIDSET) { _EMPTY_DOCIDSET = newLucene(); CycleCheck::addStatic(_EMPTY_DOCIDSET); } return _EMPTY_DOCIDSET; } EmptyDocIdSetIterator::~EmptyDocIdSetIterator() { } int32_t EmptyDocIdSetIterator::advance(int32_t target) { return NO_MORE_DOCS; } int32_t EmptyDocIdSetIterator::docID() { return NO_MORE_DOCS; } int32_t EmptyDocIdSetIterator::nextDoc() { return NO_MORE_DOCS; } EmptyDocIdSet::~EmptyDocIdSet() { } DocIdSetIteratorPtr EmptyDocIdSet::iterator() { return newLucene(); } bool EmptyDocIdSet::isCacheable() { return true; } } LucenePlusPlus-rel_3.0.4/src/core/search/DocIdSetIterator.cpp000066400000000000000000000012321217574114600241510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocIdSetIterator.h" namespace Lucene { /// When returned by {@link #nextDoc()}, {@link #advance(int)} and {@link #docID()} it means there /// docs in the iterator. const int32_t DocIdSetIterator::NO_MORE_DOCS = INT_MAX; DocIdSetIterator::~DocIdSetIterator() { } } LucenePlusPlus-rel_3.0.4/src/core/search/ExactPhraseScorer.cpp000066400000000000000000000033601217574114600243720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ExactPhraseScorer.h" #include "PhrasePositions.h" #include "PhraseQueue.h" namespace Lucene { ExactPhraseScorer::ExactPhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, ByteArray norms) : PhraseScorer(weight, tps, offsets, similarity, norms) { } ExactPhraseScorer::~ExactPhraseScorer() { } double ExactPhraseScorer::phraseFreq() { // sort list with pq pq->clear(); for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) { pp->firstPosition(); pq->add(pp); // build pq from list } pqToList(); // rebuild list from pq // For counting how many times the exact phrase is found in current document, just count how many // times all PhrasePosition's have exactly the same position. int32_t freq = 0; do { while (first->position < last->position) // scan forward in first { do { if (!first->nextPosition()) return freq; } while (first->position < last->position); firstToLast(); } ++freq; // all equal: a match } while (last->nextPosition()); return freq; } } LucenePlusPlus-rel_3.0.4/src/core/search/Explanation.cpp000066400000000000000000000047001217574114600232660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Explanation.h" #include "StringUtils.h" namespace Lucene { Explanation::Explanation(double value, const String& description) { this->value = value; this->description = description; } Explanation::~Explanation() { } bool Explanation::isMatch() { return (0.0 < getValue()); } double Explanation::getValue() { return value; } void Explanation::setValue(double value) { this->value = value; } String Explanation::getDescription() { return description; } void Explanation::setDescription(const String& description) { this->description = description; } String Explanation::getSummary() { return StringUtils::toString(getValue()) + L" = " + getDescription(); } Collection Explanation::getDetails() { if (!details) return Collection(); return Collection::newInstance(this->details.begin(), this->details.end()); } void Explanation::addDetail(ExplanationPtr detail) { if (!details) details = Collection::newInstance(); details.add(detail); } String Explanation::toString() { return toString(0); } String Explanation::toString(int32_t depth) { String buffer; for (int32_t i = 0; i < depth; ++i) buffer += L" "; buffer += getSummary() + L"\n"; if (details) { for (int32_t i = 0; i < details.size(); ++i) buffer += details[i]->toString(depth + 1); } return buffer; } String Explanation::toHtml() { String buffer(L"
    \n
  • " + getSummary() + L"
    \n"); if (details) { for (int32_t i = 0; i < details.size(); ++i) buffer += details[i]->toHtml(); } buffer += L"
  • \n
\n"; return buffer; } IDFExplanation::~IDFExplanation() { } } LucenePlusPlus-rel_3.0.4/src/core/search/FieldCache.cpp000066400000000000000000000235261217574114600227620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCache.h" #include "_FieldCache.h" #include "FieldCacheImpl.h" #include "NumericUtils.h" #include "StringUtils.h" namespace Lucene { /// Indicator for StringIndex values in the cache. const int32_t FieldCache::STRING_INDEX = -1; FieldCache::~FieldCache() { } FieldCachePtr FieldCache::DEFAULT() { static FieldCacheImplPtr _DEFAULT; if (!_DEFAULT) { _DEFAULT = newLucene(); CycleCheck::addStatic(_DEFAULT); } return _DEFAULT; } ByteParserPtr FieldCache::DEFAULT_BYTE_PARSER() { static DefaultByteParserPtr _DEFAULT_BYTE_PARSER; if (!_DEFAULT_BYTE_PARSER) { _DEFAULT_BYTE_PARSER = newLucene(); CycleCheck::addStatic(_DEFAULT_BYTE_PARSER); } return _DEFAULT_BYTE_PARSER; } IntParserPtr FieldCache::DEFAULT_INT_PARSER() { static DefaultIntParserPtr _DEFAULT_INT_PARSER; if (!_DEFAULT_INT_PARSER) { _DEFAULT_INT_PARSER = newLucene(); CycleCheck::addStatic(_DEFAULT_INT_PARSER); } return _DEFAULT_INT_PARSER; } LongParserPtr FieldCache::DEFAULT_LONG_PARSER() { static DefaultLongParserPtr _DEFAULT_LONG_PARSER; if (!_DEFAULT_LONG_PARSER) { _DEFAULT_LONG_PARSER = newLucene(); CycleCheck::addStatic(_DEFAULT_LONG_PARSER); } return _DEFAULT_LONG_PARSER; } DoubleParserPtr FieldCache::DEFAULT_DOUBLE_PARSER() { static DefaultDoubleParserPtr _DEFAULT_DOUBLE_PARSER; if (!_DEFAULT_DOUBLE_PARSER) { _DEFAULT_DOUBLE_PARSER = newLucene(); CycleCheck::addStatic(_DEFAULT_DOUBLE_PARSER); } return _DEFAULT_DOUBLE_PARSER; } IntParserPtr FieldCache::NUMERIC_UTILS_INT_PARSER() { static NumericUtilsIntParserPtr _NUMERIC_UTILS_INT_PARSER; if (!_NUMERIC_UTILS_INT_PARSER) { _NUMERIC_UTILS_INT_PARSER = newLucene(); CycleCheck::addStatic(_NUMERIC_UTILS_INT_PARSER); } return _NUMERIC_UTILS_INT_PARSER; } LongParserPtr FieldCache::NUMERIC_UTILS_LONG_PARSER() { static NumericUtilsLongParserPtr _NUMERIC_UTILS_LONG_PARSER; if (!_NUMERIC_UTILS_LONG_PARSER) { _NUMERIC_UTILS_LONG_PARSER = newLucene(); CycleCheck::addStatic(_NUMERIC_UTILS_LONG_PARSER); } return _NUMERIC_UTILS_LONG_PARSER; } DoubleParserPtr FieldCache::NUMERIC_UTILS_DOUBLE_PARSER() { static NumericUtilsDoubleParserPtr _NUMERIC_UTILS_DOUBLE_PARSER; if (!_NUMERIC_UTILS_DOUBLE_PARSER) { _NUMERIC_UTILS_DOUBLE_PARSER = newLucene(); CycleCheck::addStatic(_NUMERIC_UTILS_DOUBLE_PARSER); } return _NUMERIC_UTILS_DOUBLE_PARSER; } Collection FieldCache::getBytes(IndexReaderPtr reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getBytes(IndexReaderPtr reader, const String& field, ByteParserPtr parser) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getInts(IndexReaderPtr reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getInts(IndexReaderPtr reader, const String& field, IntParserPtr parser) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getLongs(IndexReaderPtr reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getLongs(IndexReaderPtr reader, const String& field, LongParserPtr parser) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getDoubles(IndexReaderPtr reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getDoubles(IndexReaderPtr reader, const String& field, DoubleParserPtr parser) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getStrings(IndexReaderPtr reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } StringIndexPtr FieldCache::getStringIndex(IndexReaderPtr reader, const String& field) { BOOST_ASSERT(false); return StringIndexPtr(); // override } void FieldCache::setInfoStream(InfoStreamPtr stream) { BOOST_ASSERT(false); // override } InfoStreamPtr FieldCache::getInfoStream() { BOOST_ASSERT(false); return InfoStreamPtr(); // override } CreationPlaceholder::~CreationPlaceholder() { } StringIndex::StringIndex(Collection values, Collection lookup) { this->order = values; this->lookup = lookup; } StringIndex::~StringIndex() { } int32_t StringIndex::binarySearchLookup(const String& key) { Collection::iterator search = std::lower_bound(lookup.begin(), lookup.end(), key); int32_t keyPos = std::distance(lookup.begin(), search); return (search == lookup.end() || key < *search) ? -(keyPos + 1) : keyPos; } Parser::~Parser() { } ByteParser::~ByteParser() { } uint8_t ByteParser::parseByte(const String& string) { return 0; // override } DefaultByteParser::~DefaultByteParser() { } uint8_t DefaultByteParser::parseByte(const String& string) { return (uint8_t)StringUtils::toInt(string); } String DefaultByteParser::toString() { return FieldCache::_getClassName() + L".DEFAULT_BYTE_PARSER"; } IntParser::~IntParser() { } int32_t IntParser::parseInt(const String& string) { return 0; // override } DefaultIntParser::~DefaultIntParser() { } int32_t DefaultIntParser::parseInt(const String& string) { return StringUtils::toInt(string); } String DefaultIntParser::toString() { return FieldCache::_getClassName() + L".DEFAULT_INT_PARSER"; } NumericUtilsIntParser::~NumericUtilsIntParser() { } int32_t NumericUtilsIntParser::parseInt(const String& string) { int32_t shift = string[0] - NumericUtils::SHIFT_START_INT; if (shift > 0 && shift <= 31) boost::throw_exception(StopFillCacheException()); return NumericUtils::prefixCodedToInt(string); } String NumericUtilsIntParser::toString() { return FieldCache::_getClassName() + L".NUMERIC_UTILS_INT_PARSER"; } LongParser::~LongParser() { } int64_t LongParser::parseLong(const String& string) { return 0; // override } DefaultLongParser::~DefaultLongParser() { } int64_t DefaultLongParser::parseLong(const String& string) { return StringUtils::toLong(string); } String DefaultLongParser::toString() { return FieldCache::_getClassName() + L".DEFAULT_LONG_PARSER"; } NumericUtilsLongParser::~NumericUtilsLongParser() { } int64_t NumericUtilsLongParser::parseLong(const String& string) { int32_t shift = string[0] - NumericUtils::SHIFT_START_LONG; if (shift > 0 && shift <= 63) boost::throw_exception(StopFillCacheException()); return NumericUtils::prefixCodedToLong(string); } String NumericUtilsLongParser::toString() { return FieldCache::_getClassName() + L".NUMERIC_UTILS_LONG_PARSER"; } DoubleParser::~DoubleParser() { } double DoubleParser::parseDouble(const String& string) { return 0; // override } DefaultDoubleParser::~DefaultDoubleParser() { } double DefaultDoubleParser::parseDouble(const String& string) { return StringUtils::toDouble(string); } String DefaultDoubleParser::toString() { return FieldCache::_getClassName() + L".DEFAULT_DOUBLE_PARSER"; } NumericUtilsDoubleParser::~NumericUtilsDoubleParser() { } double NumericUtilsDoubleParser::parseDouble(const String& string) { int32_t shift = string[0] - NumericUtils::SHIFT_START_LONG; if (shift > 0 && shift <= 63) boost::throw_exception(StopFillCacheException()); return NumericUtils::sortableLongToDouble(NumericUtils::prefixCodedToLong(string)); } String NumericUtilsDoubleParser::toString() { return FieldCache::_getClassName() + L".NUMERIC_UTILS_DOUBLE_PARSER"; } FieldCacheEntry::~FieldCacheEntry() { } String FieldCacheEntry::toString() { StringStream buffer; buffer << L"'" << getReaderKey()->toString() << L"'=>" << getFieldName() << L"'," << getCacheType(); return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/search/FieldCacheImpl.cpp000066400000000000000000000472751217574114600236130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheImpl.h" #include "FieldCacheSanityChecker.h" #include "IndexReader.h" #include "InfoStream.h" #include "TermEnum.h" #include "TermDocs.h" #include "Term.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { FieldCacheImpl::FieldCacheImpl() { } FieldCacheImpl::~FieldCacheImpl() { } void FieldCacheImpl::initialize() { caches = MapStringCache::newInstance(); caches.put(CACHE_BYTE, newLucene(shared_from_this())); caches.put(CACHE_INT, newLucene(shared_from_this())); caches.put(CACHE_LONG, newLucene(shared_from_this())); caches.put(CACHE_DOUBLE, newLucene(shared_from_this())); caches.put(CACHE_STRING, newLucene(shared_from_this())); caches.put(CACHE_STRING_INDEX, newLucene(shared_from_this())); } void FieldCacheImpl::purgeAllCaches() { initialize(); } void FieldCacheImpl::purge(IndexReaderPtr r) { for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) cache->second->purge(r); } Collection FieldCacheImpl::getCacheEntries() { Collection result(Collection::newInstance()); for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) { for (WeakMapLuceneObjectMapEntryAny::iterator key = cache->second->readerCache.begin(); key != cache->second->readerCache.end(); ++key) { LuceneObjectPtr readerKey(key->first.lock()); // we've now materialized a hard ref if (readerKey) { for (MapEntryAny::iterator mapEntry = key->second.begin(); mapEntry != key->second.end(); ++mapEntry) result.add(newLucene(readerKey, mapEntry->first->field, cache->first, mapEntry->first->custom, mapEntry->second)); } } } return result; } Collection FieldCacheImpl::getBytes(IndexReaderPtr reader, const String& field) { return getBytes(reader, field, ByteParserPtr()); } Collection FieldCacheImpl::getBytes(IndexReaderPtr reader, const String& field, ByteParserPtr parser) { return VariantUtils::get< Collection >(caches.get(CACHE_BYTE)->get(reader, newLucene(field, parser))); } Collection FieldCacheImpl::getInts(IndexReaderPtr reader, const String& field) { return getInts(reader, field, IntParserPtr()); } Collection FieldCacheImpl::getInts(IndexReaderPtr reader, const String& field, IntParserPtr parser) { return VariantUtils::get< Collection >(caches.get(CACHE_INT)->get(reader, newLucene(field, parser))); } Collection FieldCacheImpl::getLongs(IndexReaderPtr reader, const String& field) { return getLongs(reader, field, LongParserPtr()); } Collection FieldCacheImpl::getLongs(IndexReaderPtr reader, const String& field, LongParserPtr parser) { return VariantUtils::get< Collection >(caches.get(CACHE_LONG)->get(reader, newLucene(field, parser))); } Collection FieldCacheImpl::getDoubles(IndexReaderPtr reader, const String& field) { return getDoubles(reader, field, DoubleParserPtr()); } Collection FieldCacheImpl::getDoubles(IndexReaderPtr reader, const String& field, DoubleParserPtr parser) { return VariantUtils::get< Collection >(caches.get(CACHE_DOUBLE)->get(reader, newLucene(field, parser))); } Collection FieldCacheImpl::getStrings(IndexReaderPtr reader, const String& field) { return VariantUtils::get< Collection >(caches.get(CACHE_STRING)->get(reader, newLucene(field, ParserPtr()))); } StringIndexPtr FieldCacheImpl::getStringIndex(IndexReaderPtr reader, const String& field) { return VariantUtils::get< StringIndexPtr >(caches.get(CACHE_STRING_INDEX)->get(reader, newLucene(field, ParserPtr()))); } void FieldCacheImpl::setInfoStream(InfoStreamPtr stream) { infoStream = stream; } InfoStreamPtr FieldCacheImpl::getInfoStream() { return infoStream; } Entry::Entry(const String& field, boost::any custom) { this->field = field; this->custom = custom; } Entry::~Entry() { } bool Entry::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; EntryPtr otherEntry(boost::dynamic_pointer_cast(other)); if (otherEntry) { if (otherEntry->field == field) return VariantUtils::equalsType(custom, otherEntry->custom); } return false; } int32_t Entry::hashCode() { return StringUtils::hashCode(field) ^ VariantUtils::hashCode(custom); } Cache::Cache(FieldCachePtr wrapper) { this->_wrapper = wrapper; this->readerCache = WeakMapLuceneObjectMapEntryAny::newInstance(); } Cache::~Cache() { } void Cache::purge(IndexReaderPtr r) { LuceneObjectPtr readerKey(r->getFieldCacheKey()); SyncLock cacheLock(&readerCache); readerCache.remove(readerKey); } boost::any Cache::get(IndexReaderPtr reader, EntryPtr key) { MapEntryAny innerCache; boost::any value; LuceneObjectPtr readerKey(reader->getFieldCacheKey()); { SyncLock cacheLock(&readerCache); innerCache = readerCache.get(readerKey); if (!innerCache) { innerCache = MapEntryAny::newInstance(); readerCache.put(readerKey, innerCache); } else if (innerCache.contains(key)) value = innerCache[key]; if (VariantUtils::isNull(value)) { value = newLucene(); innerCache.put(key, value); } } if (VariantUtils::typeOf(value)) { CreationPlaceholderPtr progress(VariantUtils::get(value)); SyncLock valueLock(progress); if (VariantUtils::isNull(progress->value)) { progress->value = createValue(reader, key); { SyncLock cacheLock(&readerCache); innerCache.put(key, progress->value); } FieldCachePtr wrapper(_wrapper); // Only check if key.custom (the parser) is non-null; else, we check twice for a single // call to FieldCache.getXXX if (!VariantUtils::isNull(key->custom) && wrapper) { InfoStreamPtr infoStream(wrapper->getInfoStream()); if (infoStream) printNewInsanity(infoStream, progress->value); } } return progress->value; } return value; } void Cache::printNewInsanity(InfoStreamPtr infoStream, boost::any value) { Collection insanities(FieldCacheSanityChecker::checkSanity(FieldCachePtr(_wrapper))); for (Collection::iterator insanity = insanities.begin(); insanity != insanities.end(); ++insanity) { Collection entries((*insanity)->getCacheEntries()); for (Collection::iterator entry = entries.begin(); entry != entries.end(); ++entry) { if (VariantUtils::equalsType((*entry)->getValue(), value)) { // OK this insanity involves our entry *infoStream << L"WARNING: new FieldCache insanity created\nDetails: " + (*insanity)->toString() << L"\n"; break; } } } } ByteCache::ByteCache(FieldCachePtr wrapper) : Cache(wrapper) { } ByteCache::~ByteCache() { } boost::any ByteCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); ByteParserPtr parser(VariantUtils::get(entry->custom)); if (!parser) return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER()); Collection retArray(Collection::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; uint8_t termval = parser->parseByte(term->text()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; } IntCache::IntCache(FieldCachePtr wrapper) : Cache(wrapper) { } IntCache::~IntCache() { } boost::any IntCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); IntParserPtr parser(VariantUtils::get(entry->custom)); if (!parser) { FieldCachePtr wrapper(_wrapper); boost::any ints; try { ints = wrapper->getInts(reader, field, FieldCache::DEFAULT_INT_PARSER()); } catch (NumberFormatException&) { ints = wrapper->getInts(reader, field, FieldCache::NUMERIC_UTILS_INT_PARSER()); } return ints; } Collection retArray; TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; int32_t termval = parser->parseInt(term->text()); if (!retArray) // late init retArray = Collection::newInstance(reader->maxDoc()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (!retArray) // no values retArray = Collection::newInstance(reader->maxDoc()); return retArray; } LongCache::LongCache(FieldCachePtr wrapper) : Cache(wrapper) { } LongCache::~LongCache() { } boost::any LongCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); LongParserPtr parser(VariantUtils::get(entry->custom)); if (!parser) { FieldCachePtr wrapper(_wrapper); boost::any longs; try { longs = wrapper->getLongs(reader, field, FieldCache::DEFAULT_LONG_PARSER()); } catch (NumberFormatException&) { longs = wrapper->getLongs(reader, field, FieldCache::NUMERIC_UTILS_LONG_PARSER()); } return longs; } Collection retArray; TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; int64_t termval = parser->parseLong(term->text()); if (!retArray) // late init retArray = Collection::newInstance(reader->maxDoc()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (!retArray) // no values retArray = Collection::newInstance(reader->maxDoc()); return retArray; } DoubleCache::DoubleCache(FieldCachePtr wrapper) : Cache(wrapper) { } DoubleCache::~DoubleCache() { } boost::any DoubleCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); DoubleParserPtr parser(VariantUtils::get(entry->custom)); if (!parser) { FieldCachePtr wrapper(_wrapper); boost::any doubles; try { doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER()); } catch (NumberFormatException&) { doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER()); } return doubles; } Collection retArray; TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; double termval = parser->parseDouble(term->text()); if (!retArray) // late init retArray = Collection::newInstance(reader->maxDoc()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (!retArray) // no values retArray = Collection::newInstance(reader->maxDoc()); return retArray; } StringCache::StringCache(FieldCachePtr wrapper) : Cache(wrapper) { } StringCache::~StringCache() { } boost::any StringCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); Collection retArray(Collection::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; String termval(term->text()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; } StringIndexCache::StringIndexCache(FieldCachePtr wrapper) : Cache(wrapper) { } StringIndexCache::~StringIndexCache() { } boost::any StringIndexCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); Collection retArray(Collection::newInstance(reader->maxDoc())); Collection mterms(Collection::newInstance(reader->maxDoc() + 1)); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); int32_t t = 0; // current term number // an entry for documents that have no terms in this field should a document with no terms be at // top or bottom? This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to // change as well. mterms[t++] = L""; LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field || t >= mterms.size() ) break; // store term text mterms[t] = term->text(); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = t; ++t; } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (t == 0) { // if there are no terms, make the term array have a single null entry mterms = Collection::newInstance(1); } else if (t < mterms.size()) { // if there are less terms than documents, trim off the dead array space mterms.resize(t); } return newLucene(retArray, mterms); } FieldCacheEntryImpl::FieldCacheEntryImpl(LuceneObjectPtr readerKey, const String& fieldName, int32_t cacheType, boost::any custom, boost::any value) { this->readerKey = readerKey; this->fieldName = fieldName; this->cacheType = cacheType; this->custom = custom; this->value = value; } FieldCacheEntryImpl::~FieldCacheEntryImpl() { } LuceneObjectPtr FieldCacheEntryImpl::getReaderKey() { return readerKey; } String FieldCacheEntryImpl::getFieldName() { return fieldName; } int32_t FieldCacheEntryImpl::getCacheType() { return cacheType; } boost::any FieldCacheEntryImpl::getCustom() { return custom; } boost::any FieldCacheEntryImpl::getValue() { return value; } } LucenePlusPlus-rel_3.0.4/src/core/search/FieldCacheRangeFilter.cpp000066400000000000000000000372211217574114600251020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheRangeFilter.h" #include "_FieldCacheRangeFilter.h" #include "FieldCache.h" #include "IndexReader.h" #include "TermDocs.h" #include "NumericUtils.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { FieldCacheRangeFilter::FieldCacheRangeFilter(const String& field, ParserPtr parser, bool includeLower, bool includeUpper) { this->field = field; this->parser = parser; this->includeLower = includeLower; this->includeUpper = includeUpper; } FieldCacheRangeFilter::~FieldCacheRangeFilter() { } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newStringRange(const String& field, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper) { return newLucene(field, ParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newByteRange(const String& field, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) { return newByteRange(field, ByteParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newByteRange(const String& field, ByteParserPtr parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) { return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newIntRange(const String& field, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) { return newIntRange(field, IntParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newIntRange(const String& field, IntParserPtr parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) { return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newLongRange(const String& field, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) { return newLongRange(field, LongParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newLongRange(const String& field, LongParserPtr parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) { return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newDoubleRange(const String& field, double lowerVal, double upperVal, bool includeLower, bool includeUpper) { return newDoubleRange(field, DoubleParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newDoubleRange(const String& field, DoubleParserPtr parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper) { return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); } String FieldCacheRangeFilter::getField() { return field; } bool FieldCacheRangeFilter::includesLower() { return includeLower; } bool FieldCacheRangeFilter::includesUpper() { return includeUpper; } ParserPtr FieldCacheRangeFilter::getParser() { return parser; } FieldCacheRangeFilterString::FieldCacheRangeFilterString(const String& field, ParserPtr parser, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilter(field, parser, includeLower, includeUpper) { this->lowerVal = lowerVal; this->upperVal = upperVal; } FieldCacheRangeFilterString::~FieldCacheRangeFilterString() { } DocIdSetPtr FieldCacheRangeFilterString::getDocIdSet(IndexReaderPtr reader) { StringIndexPtr fcsi(FieldCache::DEFAULT()->getStringIndex(reader, field)); int32_t lowerPoint = fcsi->binarySearchLookup(lowerVal); int32_t upperPoint = fcsi->binarySearchLookup(upperVal); int32_t inclusiveLowerPoint = 0; int32_t inclusiveUpperPoint = 0; // Hints: // * binarySearchLookup returns 0, if value was null. // * the value is <0 if no exact hit was found, the returned value is (-(insertion point) - 1) if (lowerPoint == 0) { BOOST_ASSERT(lowerVal.empty()); inclusiveLowerPoint = 1; } else if (includeLower && lowerPoint > 0) inclusiveLowerPoint = lowerPoint; else if (lowerPoint > 0) inclusiveLowerPoint = lowerPoint + 1; else inclusiveLowerPoint = std::max((int32_t)1, -lowerPoint - 1); if (upperPoint == 0) { BOOST_ASSERT(upperVal.empty()); inclusiveUpperPoint = INT_MAX; } else if (includeUpper && upperPoint > 0) inclusiveUpperPoint = upperPoint; else if (upperPoint > 0) inclusiveUpperPoint = upperPoint - 1; else inclusiveUpperPoint = -upperPoint - 2; if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint) return DocIdSet::EMPTY_DOCIDSET(); BOOST_ASSERT(inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0); // for this DocIdSet, we never need to use TermDocs, because deleted docs have an order of 0 // (null entry in StringIndex) return newLucene(reader, false, fcsi, inclusiveLowerPoint, inclusiveUpperPoint); } String FieldCacheRangeFilterString::toString() { StringStream buffer; buffer << field << L":" << (includeLower ? L"[" : L"{"); buffer << lowerVal << L" TO " << lowerVal; buffer << (includeLower ? L"]" : L"}"); return buffer.str(); } bool FieldCacheRangeFilterString::equals(LuceneObjectPtr other) { if (Filter::equals(other)) return true; FieldCacheRangeFilterStringPtr otherFilter(boost::dynamic_pointer_cast(other)); if (!otherFilter) return false; if (field != otherFilter->field || includeLower != otherFilter->includeLower || includeUpper != otherFilter->includeUpper) return false; if (lowerVal != otherFilter->lowerVal || upperVal != otherFilter->upperVal) return false; if (parser ? !parser->equals(otherFilter->parser) : otherFilter->parser) return false; return true; } int32_t FieldCacheRangeFilterString::hashCode() { int32_t code = StringUtils::hashCode(field); code ^= lowerVal.empty() ? 550356204 : StringUtils::hashCode(lowerVal); code = (code << 1) | MiscUtils::unsignedShift(code, 31); // rotate to distinguish lower from upper code ^= upperVal.empty() ? -1674416163 : StringUtils::hashCode(upperVal); code ^= parser ? parser->hashCode() : -1572457324; code ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653); return code; } FieldCacheRangeFilterByte::FieldCacheRangeFilterByte(const String& field, ParserPtr parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, UCHAR_MAX, includeLower, includeUpper) { } FieldCacheRangeFilterByte::~FieldCacheRangeFilterByte() { } Collection FieldCacheRangeFilterByte::getValues(IndexReaderPtr reader) { return FieldCache::DEFAULT()->getBytes(reader, field, boost::static_pointer_cast(parser)); } FieldCacheRangeFilterInt::FieldCacheRangeFilterInt(const String& field, ParserPtr parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, INT_MAX, includeLower, includeUpper) { } FieldCacheRangeFilterInt::~FieldCacheRangeFilterInt() { } Collection FieldCacheRangeFilterInt::getValues(IndexReaderPtr reader) { return FieldCache::DEFAULT()->getInts(reader, field, boost::static_pointer_cast(parser)); } FieldCacheRangeFilterLong::FieldCacheRangeFilterLong(const String& field, ParserPtr parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, std::numeric_limits::max(), includeLower, includeUpper) { } FieldCacheRangeFilterLong::~FieldCacheRangeFilterLong() { } Collection FieldCacheRangeFilterLong::getValues(IndexReaderPtr reader) { return FieldCache::DEFAULT()->getLongs(reader, field, boost::static_pointer_cast(parser)); } FieldCacheRangeFilterDouble::FieldCacheRangeFilterDouble(const String& field, ParserPtr parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, std::numeric_limits::infinity(), includeLower, includeUpper) { } FieldCacheRangeFilterDouble::~FieldCacheRangeFilterDouble() { } DocIdSetPtr FieldCacheRangeFilterDouble::getDocIdSet(IndexReaderPtr reader) { if (!includeLower && lowerVal > 0.0 && MiscUtils::isInfinite(lowerVal)) return DocIdSet::EMPTY_DOCIDSET(); int64_t lower = NumericUtils::doubleToSortableLong(lowerVal); double inclusiveLowerPoint = NumericUtils::sortableLongToDouble(includeLower ? lower : (lower + 1)); if (!includeUpper && upperVal < 0.0 && MiscUtils::isInfinite(upperVal)) return DocIdSet::EMPTY_DOCIDSET(); int64_t upper = NumericUtils::doubleToSortableLong(upperVal); double inclusiveUpperPoint = NumericUtils::sortableLongToDouble(includeUpper ? upper : (upper - 1)); if (inclusiveLowerPoint > inclusiveUpperPoint) return DocIdSet::EMPTY_DOCIDSET(); // we only request the usage of termDocs, if the range contains 0 return newLucene< FieldCacheDocIdSetNumeric >(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0), getValues(reader), inclusiveLowerPoint, inclusiveUpperPoint); } Collection FieldCacheRangeFilterDouble::getValues(IndexReaderPtr reader) { return FieldCache::DEFAULT()->getDoubles(reader, field, boost::static_pointer_cast(parser)); } FieldCacheDocIdSet::FieldCacheDocIdSet(IndexReaderPtr reader, bool mayUseTermDocs) { this->reader = reader; this->mayUseTermDocs = mayUseTermDocs; } FieldCacheDocIdSet::~FieldCacheDocIdSet() { } bool FieldCacheDocIdSet::isCacheable() { return !(mayUseTermDocs && reader->hasDeletions()); } DocIdSetIteratorPtr FieldCacheDocIdSet::iterator() { // Synchronization needed because deleted docs BitVector can change after call to hasDeletions until // TermDocs creation. We only use an iterator with termDocs, when this was requested (eg. range // contains 0) and the index has deletions TermDocsPtr termDocs; { SyncLock instancesLock(reader); termDocs = isCacheable() ? TermDocsPtr() : reader->termDocs(TermPtr()); } if (termDocs) { // a DocIdSetIterator using TermDocs to iterate valid docIds return newLucene(shared_from_this(), termDocs); } else { // a DocIdSetIterator generating docIds by incrementing a variable - this one can be used if there // are no deletions are on the index return newLucene(shared_from_this()); } } FieldCacheDocIdSetString::FieldCacheDocIdSetString(IndexReaderPtr reader, bool mayUseTermDocs, StringIndexPtr fcsi, int32_t inclusiveLowerPoint, int32_t inclusiveUpperPoint) : FieldCacheDocIdSet(reader, mayUseTermDocs) { this->fcsi = fcsi; this->inclusiveLowerPoint = inclusiveLowerPoint; this->inclusiveUpperPoint = inclusiveUpperPoint; } FieldCacheDocIdSetString::~FieldCacheDocIdSetString() { } bool FieldCacheDocIdSetString::matchDoc(int32_t doc) { if (doc < 0 || doc >= fcsi->order.size()) boost::throw_exception(IndexOutOfBoundsException()); return (fcsi->order[doc] >= inclusiveLowerPoint && fcsi->order[doc] <= inclusiveUpperPoint); } FieldDocIdSetIteratorTermDocs::FieldDocIdSetIteratorTermDocs(FieldCacheDocIdSetPtr cacheDocIdSet, TermDocsPtr termDocs) { this->_cacheDocIdSet = cacheDocIdSet; this->termDocs = termDocs; this->doc = -1; } FieldDocIdSetIteratorTermDocs::~FieldDocIdSetIteratorTermDocs() { } int32_t FieldDocIdSetIteratorTermDocs::docID() { return doc; } int32_t FieldDocIdSetIteratorTermDocs::nextDoc() { FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); do { if (!termDocs->next()) { doc = NO_MORE_DOCS; return doc; } } while (!cacheDocIdSet->matchDoc(doc = termDocs->doc())); return doc; } int32_t FieldDocIdSetIteratorTermDocs::advance(int32_t target) { FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); if (!termDocs->skipTo(target)) { doc = NO_MORE_DOCS; return doc; } while (!cacheDocIdSet->matchDoc(doc = termDocs->doc())) { if (!termDocs->next()) { doc = NO_MORE_DOCS; return doc; } } return doc; } FieldDocIdSetIteratorIncrement::FieldDocIdSetIteratorIncrement(FieldCacheDocIdSetPtr cacheDocIdSet) { this->_cacheDocIdSet = cacheDocIdSet; this->doc = -1; } FieldDocIdSetIteratorIncrement::~FieldDocIdSetIteratorIncrement() { } int32_t FieldDocIdSetIteratorIncrement::docID() { return doc; } int32_t FieldDocIdSetIteratorIncrement::nextDoc() { FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); try { do { ++doc; } while (!cacheDocIdSet->matchDoc(doc)); return doc; } catch (IndexOutOfBoundsException&) { doc = NO_MORE_DOCS; return doc; } } int32_t FieldDocIdSetIteratorIncrement::advance(int32_t target) { FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); try { doc = target; while (!cacheDocIdSet->matchDoc(doc)) ++doc; return doc; } catch (IndexOutOfBoundsException&) { doc = NO_MORE_DOCS; return doc; } } } LucenePlusPlus-rel_3.0.4/src/core/search/FieldCacheTermsFilter.cpp000066400000000000000000000066061217574114600251430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheTermsFilter.h" #include "_FieldCacheTermsFilter.h" #include "FieldCache.h" #include "OpenBitSet.h" namespace Lucene { FieldCacheTermsFilter::FieldCacheTermsFilter(const String& field, Collection terms) { this->field = field; this->terms = terms; } FieldCacheTermsFilter::~FieldCacheTermsFilter() { } FieldCachePtr FieldCacheTermsFilter::getFieldCache() { return FieldCache::DEFAULT(); } DocIdSetPtr FieldCacheTermsFilter::getDocIdSet(IndexReaderPtr reader) { return newLucene(terms, getFieldCache()->getStringIndex(reader, field)); } FieldCacheTermsFilterDocIdSet::FieldCacheTermsFilterDocIdSet(Collection terms, StringIndexPtr fcsi) { this->fcsi = fcsi; openBitSet = newLucene(this->fcsi->lookup.size()); for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { int32_t termNumber = this->fcsi->binarySearchLookup(*term); if (termNumber > 0) openBitSet->set(termNumber); } } FieldCacheTermsFilterDocIdSet::~FieldCacheTermsFilterDocIdSet() { } DocIdSetIteratorPtr FieldCacheTermsFilterDocIdSet::iterator() { return newLucene(fcsi, openBitSet); } bool FieldCacheTermsFilterDocIdSet::isCacheable() { return true; } FieldCacheTermsFilterDocIdSetIterator::FieldCacheTermsFilterDocIdSetIterator(StringIndexPtr fcsi, OpenBitSetPtr openBitSet) { this->fcsi = fcsi; this->openBitSet = openBitSet; this->doc = -1; } FieldCacheTermsFilterDocIdSetIterator::~FieldCacheTermsFilterDocIdSetIterator() { } int32_t FieldCacheTermsFilterDocIdSetIterator::docID() { return doc; } int32_t FieldCacheTermsFilterDocIdSetIterator::nextDoc() { try { if (++doc >= fcsi->order.size()) boost::throw_exception(IndexOutOfBoundsException()); while (!openBitSet->fastGet(fcsi->order[doc])) { if (++doc >= fcsi->order.size()) boost::throw_exception(IndexOutOfBoundsException()); } } catch (IndexOutOfBoundsException&) { doc = NO_MORE_DOCS; } return doc; } int32_t FieldCacheTermsFilterDocIdSetIterator::advance(int32_t target) { try { doc = target; if (doc < 0 || doc >= fcsi->order.size()) boost::throw_exception(IndexOutOfBoundsException()); while (!openBitSet->fastGet(fcsi->order[doc])) { if (++doc >= fcsi->order.size()) boost::throw_exception(IndexOutOfBoundsException()); } } catch (IndexOutOfBoundsException&) { doc = NO_MORE_DOCS; } return doc; } } LucenePlusPlus-rel_3.0.4/src/core/search/FieldComparator.cpp000066400000000000000000000264371217574114600240720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldComparator.h" #include "FieldCache.h" #include "ScoreCachingWrappingScorer.h" #include "Collator.h" namespace Lucene { FieldComparator::~FieldComparator() { } void FieldComparator::setScorer(ScorerPtr scorer) { // Empty implementation since most comparators don't need the score. // This can be overridden by those that need it. } ByteComparator::ByteComparator(int32_t numHits, const String& field, ParserPtr parser) : NumericComparator(numHits, field) { this->parser = boost::static_pointer_cast(parser); } ByteComparator::~ByteComparator() { } void ByteComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getBytes(reader, field, parser); } DocComparator::DocComparator(int32_t numHits) : NumericComparator(numHits) { this->docBase = 0; } DocComparator::~DocComparator() { } int32_t DocComparator::compareBottom(int32_t doc) { // No overflow risk because docIDs are non-negative return (bottom - (docBase + doc)); } void DocComparator::copy(int32_t slot, int32_t doc) { values[slot] = docBase + doc; } void DocComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) { this->docBase = docBase; } DoubleComparator::DoubleComparator(int32_t numHits, const String& field, ParserPtr parser) : NumericComparator(numHits, field) { this->parser = boost::static_pointer_cast(parser); } DoubleComparator::~DoubleComparator() { } int32_t DoubleComparator::compare(int32_t slot1, int32_t slot2) { double v1 = values[slot1]; double v2 = values[slot2]; return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); } int32_t DoubleComparator::compareBottom(int32_t doc) { double v2 = currentReaderValues[doc]; return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); } void DoubleComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getDoubles(reader, field, parser); } IntComparator::IntComparator(int32_t numHits, const String& field, ParserPtr parser) : NumericComparator(numHits, field) { this->parser = boost::static_pointer_cast(parser); } IntComparator::~IntComparator() { } int32_t IntComparator::compare(int32_t slot1, int32_t slot2) { int32_t v1 = values[slot1]; int32_t v2 = values[slot2]; return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); } int32_t IntComparator::compareBottom(int32_t doc) { int32_t v2 = currentReaderValues[doc]; return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); } void IntComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getInts(reader, field, parser); } LongComparator::LongComparator(int32_t numHits, const String& field, ParserPtr parser) : NumericComparator(numHits, field) { this->parser = boost::static_pointer_cast(parser); } LongComparator::~LongComparator() { } int32_t LongComparator::compare(int32_t slot1, int32_t slot2) { int64_t v1 = values[slot1]; int64_t v2 = values[slot2]; return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); } int32_t LongComparator::compareBottom(int32_t doc) { int64_t v2 = currentReaderValues[doc]; return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); } void LongComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getLongs(reader, field, parser); } RelevanceComparator::RelevanceComparator(int32_t numHits) : NumericComparator(numHits) { } RelevanceComparator::~RelevanceComparator() { } int32_t RelevanceComparator::compare(int32_t slot1, int32_t slot2) { double score1 = values[slot1]; double score2 = values[slot2]; return score1 > score2 ? -1 : (score1 < score2 ? 1 : 0); } int32_t RelevanceComparator::compareBottom(int32_t doc) { double score = scorer->score(); return bottom > score ? -1 : (bottom < score ? 1 : 0); } void RelevanceComparator::copy(int32_t slot, int32_t doc) { values[slot] = scorer->score(); } void RelevanceComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) { } void RelevanceComparator::setScorer(ScorerPtr scorer) { this->scorer = newLucene(scorer); } StringComparatorLocale::StringComparatorLocale(int32_t numHits, const String& field, const std::locale& locale) : collator(newLucene(locale)) { this->values = Collection::newInstance(numHits); this->field = field; } StringComparatorLocale::~StringComparatorLocale() { } int32_t StringComparatorLocale::compare(int32_t slot1, int32_t slot2) { return collator->compare(values[slot1], values[slot2]); } int32_t StringComparatorLocale::compareBottom(int32_t doc) { return collator->compare(bottom, currentReaderValues[doc]); } void StringComparatorLocale::copy(int32_t slot, int32_t doc) { values[slot] = currentReaderValues[doc]; } void StringComparatorLocale::setNextReader(IndexReaderPtr reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getStrings(reader, field); } void StringComparatorLocale::setBottom(int32_t slot) { bottom = values[slot]; } ComparableValue StringComparatorLocale::value(int32_t slot) { return values[slot]; } StringOrdValComparator::StringOrdValComparator(int32_t numHits, const String& field, int32_t sortPos, bool reversed) { this->ords = Collection::newInstance(numHits); this->values = Collection::newInstance(numHits); this->readerGen = Collection::newInstance(numHits); this->sortPos = sortPos; this->reversed = reversed; this->field = field; this->currentReaderGen = -1; this->bottomSlot = -1; this->bottomOrd = 0; } StringOrdValComparator::~StringOrdValComparator() { } int32_t StringOrdValComparator::compare(int32_t slot1, int32_t slot2) { if (readerGen[slot1] == readerGen[slot2]) { int32_t cmp = ords[slot1] - ords[slot2]; if (cmp != 0) return cmp; } return values[slot1].compare(values[slot2]); } int32_t StringOrdValComparator::compareBottom(int32_t doc) { BOOST_ASSERT(bottomSlot != -1); int32_t order = this->order[doc]; int32_t cmp = bottomOrd - order; if (cmp != 0) return cmp; return bottomValue.compare(lookup[order]); } void StringOrdValComparator::convert(int32_t slot) { readerGen[slot] = currentReaderGen; int32_t index = 0; String value(values[slot]); if (value.empty()) { ords[slot] = 0; return; } if (sortPos == 0 && bottomSlot != -1 && bottomSlot != slot) { // Since we are the primary sort, the entries in the queue are bounded by bottomOrd BOOST_ASSERT(bottomOrd < lookup.size()); if (reversed) index = binarySearch(lookup, value, bottomOrd, lookup.size() - 1); else index = binarySearch(lookup, value, 0, bottomOrd); } else { // Full binary search index = binarySearch(lookup, value, 0, lookup.size() - 1); } if (index < 0) index = -index - 2; ords[slot] = index; } int32_t StringOrdValComparator::binarySearch(Collection lookup, const String& key, int32_t low, int32_t high) { Collection::iterator search = std::lower_bound(lookup.begin() + low, lookup.begin() + high, key); int32_t keyPos = std::distance(lookup.begin(), search); return (search == lookup.end() || key < *search) ? -(keyPos + 1) : keyPos; } void StringOrdValComparator::copy(int32_t slot, int32_t doc) { int32_t ord = order[doc]; ords[slot] = ord; BOOST_ASSERT(ord >= 0); values[slot] = lookup[ord]; readerGen[slot] = currentReaderGen; } void StringOrdValComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) { StringIndexPtr currentReaderValues(FieldCache::DEFAULT()->getStringIndex(reader, field)); ++currentReaderGen; order = currentReaderValues->order; lookup = currentReaderValues->lookup; BOOST_ASSERT(!lookup.empty()); if (bottomSlot != -1) { convert(bottomSlot); bottomOrd = ords[bottomSlot]; } } void StringOrdValComparator::setBottom(int32_t slot) { bottomSlot = slot; if (readerGen[slot] != currentReaderGen) convert(bottomSlot); bottomOrd = ords[slot]; BOOST_ASSERT(bottomOrd >= 0); BOOST_ASSERT(bottomOrd < lookup.size()); bottomValue = values[slot]; } ComparableValue StringOrdValComparator::value(int32_t slot) { return values[slot]; } Collection StringOrdValComparator::getValues() { return values; } int32_t StringOrdValComparator::getBottomSlot() { return bottomSlot; } String StringOrdValComparator::getField() { return field; } StringValComparator::StringValComparator(int32_t numHits, const String& field) { this->values = Collection::newInstance(numHits); this->field = field; } StringValComparator::~StringValComparator() { } int32_t StringValComparator::compare(int32_t slot1, int32_t slot2) { return values[slot1].compare(values[slot2]); } int32_t StringValComparator::compareBottom(int32_t doc) { return bottom.compare(currentReaderValues[doc]); } void StringValComparator::copy(int32_t slot, int32_t doc) { values[slot] = currentReaderValues[doc]; } void StringValComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getStrings(reader, field); } void StringValComparator::setBottom(int32_t slot) { bottom = values[slot]; } ComparableValue StringValComparator::value(int32_t slot) { return values[slot]; } } LucenePlusPlus-rel_3.0.4/src/core/search/FieldComparatorSource.cpp000066400000000000000000000007431217574114600252430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldComparatorSource.h" namespace Lucene { FieldComparatorSource::~FieldComparatorSource() { } } LucenePlusPlus-rel_3.0.4/src/core/search/FieldDoc.cpp000066400000000000000000000017631217574114600224630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldDoc.h" namespace Lucene { FieldDoc::FieldDoc(int32_t doc, double score, Collection fields) : ScoreDoc(doc, score) { this->fields = fields; } FieldDoc::~FieldDoc() { } String FieldDoc::toString() { StringStream buffer; buffer << ScoreDoc::toString() << L"["; for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if (field != fields.begin()) buffer << L", "; buffer << *field; } buffer << L"]"; return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/search/FieldDocSortedHitQueue.cpp000066400000000000000000000051241217574114600253110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldDocSortedHitQueue.h" #include "FieldDoc.h" #include "SortField.h" #include "Collator.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { FieldDocSortedHitQueue::FieldDocSortedHitQueue(int32_t size) : PriorityQueue(size) { } FieldDocSortedHitQueue::~FieldDocSortedHitQueue() { } void FieldDocSortedHitQueue::setFields(Collection fields) { this->fields = fields; this->collators = hasCollators(fields); } Collection FieldDocSortedHitQueue::getFields() { return fields; } Collection FieldDocSortedHitQueue::hasCollators(Collection fields) { if (!fields) return Collection(); Collection ret(Collection::newInstance(fields.size())); for (int32_t i = 0; i < fields.size(); ++i) { localePtr locale(fields[i]->getLocale()); if (locale) ret[i] = newInstance(*locale); } return ret; } bool FieldDocSortedHitQueue::lessThan(const FieldDocPtr& first, const FieldDocPtr& second) { int32_t n = fields.size(); int32_t c = 0; for (int32_t i = 0; i < n && c == 0; ++i) { int32_t type = fields[i]->getType(); if (type == SortField::STRING) { String s1(VariantUtils::get(first->fields[i])); String s2(VariantUtils::get(second->fields[i])); if (!fields[i]->getLocale()) c = s1.compare(s2); else c = collators[i]->compare(s1, s2); } else { c = VariantUtils::compareTo(first->fields[i], second->fields[i]); if (type == SortField::SCORE) c = -c; } // reverse sort if (fields[i]->getReverse()) c = -c; } // avoid random sort order that could lead to duplicates if (c == 0) return (first->doc > second->doc); return (c > 0); } } LucenePlusPlus-rel_3.0.4/src/core/search/FieldValueHitQueue.cpp000066400000000000000000000127371217574114600245070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldValueHitQueue.h" #include "_FieldValueHitQueue.h" #include "FieldComparator.h" #include "FieldDoc.h" #include "SortField.h" namespace Lucene { FieldValueHitQueue::FieldValueHitQueue(Collection fields, int32_t size) : HitQueueBase(size) { // When we get here, fields.size() is guaranteed to be > 0, therefore no need to check it again. // All these are required by this class's API - need to return arrays. Therefore even in the case // of a single comparator, create an array anyway. this->fields = fields; int32_t numComparators = fields.size(); comparators = Collection::newInstance(numComparators); reverseMul = Collection::newInstance(numComparators); } FieldValueHitQueue::~FieldValueHitQueue() { } FieldValueHitQueuePtr FieldValueHitQueue::create(Collection fields, int32_t size) { if (fields.empty()) boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); if (fields.size() == 1) return newLucene(fields, size); else return newLucene(fields, size); } Collection FieldValueHitQueue::getComparators() { return comparators; } Collection FieldValueHitQueue::getReverseMul() { return reverseMul; } FieldDocPtr FieldValueHitQueue::fillFields(FieldValueHitQueueEntryPtr entry) { int32_t n = comparators.size(); Collection fields(Collection::newInstance(n)); for (int32_t i = 0; i < n; ++i) fields[i] = comparators[i]->value(entry->slot); return newLucene(entry->doc, entry->score, fields); } Collection FieldValueHitQueue::getFields() { return fields; } FieldValueHitQueueEntry::FieldValueHitQueueEntry(int32_t slot, int32_t doc, double score) : ScoreDoc(doc, score) { this->slot = slot; } FieldValueHitQueueEntry::~FieldValueHitQueueEntry() { } String FieldValueHitQueueEntry::toString() { StringStream buffer; buffer << L"slot:" << slot << L" " << ScoreDoc::toString(); return buffer.str(); } OneComparatorFieldValueHitQueue::OneComparatorFieldValueHitQueue(Collection fields, int32_t size) : FieldValueHitQueue(fields, size) { if (fields.empty()) boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); SortFieldPtr field(fields[0]); comparator = field->getComparator(size, 0); oneReverseMul = field->reverse ? -1 : 1; comparators[0] = comparator; reverseMul[0] = oneReverseMul; } OneComparatorFieldValueHitQueue::~OneComparatorFieldValueHitQueue() { } bool OneComparatorFieldValueHitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { FieldValueHitQueueEntryPtr firstEntry(boost::static_pointer_cast(first)); FieldValueHitQueueEntryPtr secondEntry(boost::static_pointer_cast(second)); BOOST_ASSERT(firstEntry != secondEntry); BOOST_ASSERT(firstEntry->slot != secondEntry->slot); int32_t c = oneReverseMul * comparator->compare(firstEntry->slot, secondEntry->slot); // avoid random sort order that could lead to duplicates return c != 0 ? (c > 0) : (firstEntry->doc > secondEntry->doc); } MultiComparatorsFieldValueHitQueue::MultiComparatorsFieldValueHitQueue(Collection fields, int32_t size) : FieldValueHitQueue(fields, size) { int32_t numComparators = comparators.size(); for (int32_t i = 0; i < numComparators; ++i) { SortFieldPtr field(fields[i]); reverseMul[i] = field->reverse ? -1 : 1; comparators[i] = field->getComparator(size, i); } } MultiComparatorsFieldValueHitQueue::~MultiComparatorsFieldValueHitQueue() { } bool MultiComparatorsFieldValueHitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { FieldValueHitQueueEntryPtr firstEntry(boost::static_pointer_cast(first)); FieldValueHitQueueEntryPtr secondEntry(boost::static_pointer_cast(second)); BOOST_ASSERT(firstEntry != secondEntry); BOOST_ASSERT(firstEntry->slot != secondEntry->slot); int32_t numComparators = comparators.size(); for (int32_t i = 0; i < numComparators; ++i) { int32_t c = reverseMul[i] * comparators[i]->compare(firstEntry->slot, secondEntry->slot); if (c != 0) return (c > 0); // Short circuit } // avoid random sort order that could lead to duplicates return (firstEntry->doc > secondEntry->doc); } } LucenePlusPlus-rel_3.0.4/src/core/search/Filter.cpp000066400000000000000000000006661217574114600222400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Filter.h" namespace Lucene { Filter::~Filter() { } } LucenePlusPlus-rel_3.0.4/src/core/search/FilterManager.cpp000066400000000000000000000075211217574114600235300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilterManager.h" #include "_FilterManager.h" #include "Filter.h" #include "MiscUtils.h" namespace Lucene { /// The default maximum number of Filters in the cache const int32_t FilterManager::DEFAULT_CACHE_CLEAN_SIZE = 100; /// The default frequency of cache cleanup const int64_t FilterManager::DEFAULT_CACHE_SLEEP_TIME = 1000 * 60 * 10; FilterManager::FilterManager() { } FilterManager::~FilterManager() { } void FilterManager::initialize() { cache = MapIntFilterItem::newInstance(); cacheCleanSize = DEFAULT_CACHE_CLEAN_SIZE; // Let the cache get to 100 items cleanSleepTime = DEFAULT_CACHE_SLEEP_TIME; // 10 minutes between cleanings filterCleaner = newLucene(shared_from_this()); filterCleaner->start(); } FilterManagerPtr FilterManager::getInstance() { static FilterManagerPtr manager; if (!manager) { manager = newLucene(); CycleCheck::addStatic(manager); } return manager; } void FilterManager::setCacheSize(int32_t cacheCleanSize) { this->cacheCleanSize = cacheCleanSize; } void FilterManager::setCleanThreadSleepTime(int64_t cleanSleepTime) { this->cleanSleepTime = cleanSleepTime; } FilterPtr FilterManager::getFilter(FilterPtr filter) { SyncLock parentLock(&cache); FilterItemPtr fi(cache.get(filter->hashCode())); if (fi) { fi->timestamp = MiscUtils::currentTimeMillis(); return fi->filter; } cache.put(filter->hashCode(), newLucene(filter)); return filter; } FilterItem::FilterItem(FilterPtr filter) { this->filter = filter; this->timestamp = MiscUtils::currentTimeMillis(); } FilterItem::~FilterItem() { } FilterCleaner::FilterCleaner(FilterManagerPtr manager) { _manager = manager; running = true; } FilterCleaner::~FilterCleaner() { } void FilterCleaner::run() { while (running) { FilterManagerPtr manager(_manager); // sort items from oldest to newest we delete the oldest filters if (manager->cache.size() > manager->cacheCleanSize) { // empty the temporary set sortedFilterItems.clear(); { SyncLock parentLock(&manager->cache); for (MapIntFilterItem::iterator item = manager->cache.begin(); item != manager->cache.end(); ++item) sortedFilterItems.put(item->second->timestamp, item->first); int32_t numToDelete = (int32_t)((double)(sortedFilterItems.size() - manager->cacheCleanSize) * 1.5); int32_t counter = 0; // loop over the set and delete all of the cache entries not used in a while for (MapLongInt::iterator item = sortedFilterItems.begin(); item != sortedFilterItems.end() && counter++ < numToDelete; ++item) manager->cache.remove(item->second); } // empty the set so we don't tie up the memory sortedFilterItems.clear(); } // take a nap LuceneThread::threadSleep(manager->cleanSleepTime); } } } LucenePlusPlus-rel_3.0.4/src/core/search/FilteredDocIdSet.cpp000066400000000000000000000024351217574114600241240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilteredDocIdSet.h" #include "_FilteredDocIdSet.h" namespace Lucene { FilteredDocIdSet::FilteredDocIdSet(DocIdSetPtr innerSet) { this->innerSet = innerSet; } FilteredDocIdSet::~FilteredDocIdSet() { } bool FilteredDocIdSet::isCacheable() { return innerSet->isCacheable(); } DocIdSetIteratorPtr FilteredDocIdSet::iterator() { return newLucene(shared_from_this(), innerSet->iterator()); } DefaultFilteredDocIdSetIterator::DefaultFilteredDocIdSetIterator(FilteredDocIdSetPtr filtered, DocIdSetIteratorPtr innerIter) : FilteredDocIdSetIterator(innerIter) { this->filtered = filtered; } DefaultFilteredDocIdSetIterator::~DefaultFilteredDocIdSetIterator() { } bool DefaultFilteredDocIdSetIterator::match(int32_t docid) { return filtered->match(docid); } } LucenePlusPlus-rel_3.0.4/src/core/search/FilteredDocIdSetIterator.cpp000066400000000000000000000030351217574114600256330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilteredDocIdSetIterator.h" namespace Lucene { FilteredDocIdSetIterator::FilteredDocIdSetIterator(DocIdSetIteratorPtr innerIter) { if (!innerIter) boost::throw_exception(IllegalArgumentException(L"null iterator")); this->innerIter = innerIter; this->doc = -1; } FilteredDocIdSetIterator::~FilteredDocIdSetIterator() { } int32_t FilteredDocIdSetIterator::docID() { return doc; } int32_t FilteredDocIdSetIterator::nextDoc() { while ((doc = innerIter->nextDoc()) != NO_MORE_DOCS) { if (match(doc)) return doc; } return doc; } int32_t FilteredDocIdSetIterator::advance(int32_t target) { doc = innerIter->advance(target); if (doc != NO_MORE_DOCS) { if (match(doc)) return doc; else { while ((doc = innerIter->nextDoc()) != NO_MORE_DOCS) { if (match(doc)) return doc; } return doc; } } return doc; } } LucenePlusPlus-rel_3.0.4/src/core/search/FilteredQuery.cpp000066400000000000000000000154141217574114600235740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilteredQuery.h" #include "_FilteredQuery.h" #include "Explanation.h" #include "Filter.h" #include "DocIdSet.h" #include "MiscUtils.h" namespace Lucene { FilteredQuery::FilteredQuery(QueryPtr query, FilterPtr filter) { this->query = query; this->filter = filter; } FilteredQuery::~FilteredQuery() { } WeightPtr FilteredQuery::createWeight(SearcherPtr searcher) { WeightPtr weight(query->createWeight(searcher)); SimilarityPtr similarity(query->getSimilarity(searcher)); return newLucene(shared_from_this(), weight, similarity); } QueryPtr FilteredQuery::rewrite(IndexReaderPtr reader) { QueryPtr rewritten(query->rewrite(reader)); if (rewritten != query) { FilteredQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone())); cloneQuery->query = rewritten; return cloneQuery; } else return shared_from_this(); } QueryPtr FilteredQuery::getQuery() { return query; } FilterPtr FilteredQuery::getFilter() { return filter; } void FilteredQuery::extractTerms(SetTerm terms) { getQuery()->extractTerms(terms); } String FilteredQuery::toString(const String& field) { StringStream buffer; buffer << L"filtered(" << query->toString(field) << L")->" << filter->toString() << boostString(); return buffer.str(); } bool FilteredQuery::equals(LuceneObjectPtr other) { FilteredQueryPtr otherFilteredQuery(boost::dynamic_pointer_cast(other)); if (!otherFilteredQuery) return false; return (Query::equals(other) && query->equals(otherFilteredQuery->query) && filter->equals(otherFilteredQuery->filter)); } int32_t FilteredQuery::hashCode() { return query->hashCode() ^ filter->hashCode() + MiscUtils::doubleToIntBits(getBoost()); } LuceneObjectPtr FilteredQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(query, filter); FilteredQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->query = query; cloneQuery->filter = filter; return cloneQuery; } FilteredQueryWeight::FilteredQueryWeight(FilteredQueryPtr query, WeightPtr weight, SimilarityPtr similarity) { this->query = query; this->weight = weight; this->similarity = similarity; value = 0.0; } FilteredQueryWeight::~FilteredQueryWeight() { } double FilteredQueryWeight::getValue() { return value; } double FilteredQueryWeight::sumOfSquaredWeights() { return weight->sumOfSquaredWeights() * query->getBoost() * query->getBoost(); } void FilteredQueryWeight::normalize(double norm) { weight->normalize(norm); value = weight->getValue() * query->getBoost(); } ExplanationPtr FilteredQueryWeight::explain(IndexReaderPtr reader, int32_t doc) { ExplanationPtr inner(weight->explain(reader, doc)); if (query->getBoost() !=1) { ExplanationPtr preBoost(inner); inner = newLucene(inner->getValue() * query->getBoost(), L"product of:"); inner->addDetail(newLucene(query->getBoost(), L"boost")); inner->addDetail(preBoost); } FilterPtr f(query->filter); DocIdSetPtr docIdSet(f->getDocIdSet(reader)); DocIdSetIteratorPtr docIdSetIterator(!docIdSet ? DocIdSet::EMPTY_DOCIDSET()->iterator() : docIdSet->iterator()); if (!docIdSetIterator) docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); if (docIdSetIterator->advance(doc) == doc) return inner; else { ExplanationPtr result(newLucene(0.0, L"failure to match filter: " + f->toString())); result->addDetail(inner); return result; } } QueryPtr FilteredQueryWeight::getQuery() { return query; } ScorerPtr FilteredQueryWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { ScorerPtr scorer(weight->scorer(reader, true, false)); if (!scorer) return ScorerPtr(); DocIdSetPtr docIdSet(query->filter->getDocIdSet(reader)); if (!docIdSet) return ScorerPtr(); DocIdSetIteratorPtr docIdSetIterator(docIdSet->iterator()); if (!docIdSetIterator) return ScorerPtr(); return newLucene(shared_from_this(), scorer, docIdSetIterator, similarity); } FilteredQueryWeightScorer::FilteredQueryWeightScorer(FilteredQueryWeightPtr weight, ScorerPtr scorer, DocIdSetIteratorPtr docIdSetIterator, SimilarityPtr similarity) : Scorer(similarity) { this->weight = weight; this->scorer = scorer; this->docIdSetIterator = docIdSetIterator; doc = -1; } FilteredQueryWeightScorer::~FilteredQueryWeightScorer() { } int32_t FilteredQueryWeightScorer::advanceToCommon(int32_t scorerDoc, int32_t disiDoc) { while (scorerDoc != disiDoc) { if (scorerDoc < disiDoc) scorerDoc = scorer->advance(disiDoc); else disiDoc = docIdSetIterator->advance(scorerDoc); } return scorerDoc; } int32_t FilteredQueryWeightScorer::nextDoc() { int32_t disiDoc = docIdSetIterator->nextDoc(); int32_t scorerDoc = scorer->nextDoc(); doc = (scorerDoc != NO_MORE_DOCS && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS) ? scorer->docID() : NO_MORE_DOCS; return doc; } int32_t FilteredQueryWeightScorer::docID() { return doc; } int32_t FilteredQueryWeightScorer::advance(int32_t target) { int32_t disiDoc = docIdSetIterator->advance(target); int32_t scorerDoc = scorer->advance(target); doc = (scorerDoc != NO_MORE_DOCS && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS) ? scorer->docID() : NO_MORE_DOCS; return doc; } double FilteredQueryWeightScorer::score() { return weight->query->getBoost() * scorer->score(); } } LucenePlusPlus-rel_3.0.4/src/core/search/FilteredTermEnum.cpp000066400000000000000000000034601217574114600242210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilteredTermEnum.h" namespace Lucene { FilteredTermEnum::~FilteredTermEnum() { } void FilteredTermEnum::setEnum(TermEnumPtr actualEnum) { this->actualEnum = actualEnum; // Find the first term that matches TermPtr term(actualEnum->term()); if (term && termCompare(term)) currentTerm = term; else next(); } int32_t FilteredTermEnum::docFreq() { if (!currentTerm) return -1; BOOST_ASSERT(actualEnum); return actualEnum->docFreq(); } bool FilteredTermEnum::next() { if (!actualEnum) return false; // the actual enumerator is not initialized currentTerm.reset(); while (!currentTerm) { if (endEnum()) return false; if (actualEnum->next()) { TermPtr term(actualEnum->term()); if (termCompare(term)) { currentTerm = term; return true; } } else return false; } currentTerm.reset(); return false; } TermPtr FilteredTermEnum::term() { return currentTerm; } void FilteredTermEnum::close() { if (actualEnum) actualEnum->close(); currentTerm.reset(); actualEnum.reset(); } } LucenePlusPlus-rel_3.0.4/src/core/search/FuzzyQuery.cpp000066400000000000000000000157031217574114600231660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FuzzyQuery.h" #include "_FuzzyQuery.h" #include "FuzzyTermEnum.h" #include "Term.h" #include "TermQuery.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "MiscUtils.h" namespace Lucene { const int32_t FuzzyQuery::defaultPrefixLength = 0; FuzzyQuery::FuzzyQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength) { ConstructQuery(term, minimumSimilarity, prefixLength); } FuzzyQuery::FuzzyQuery(TermPtr term, double minimumSimilarity) { ConstructQuery(term, minimumSimilarity, defaultPrefixLength); } FuzzyQuery::FuzzyQuery(TermPtr term) { ConstructQuery(term, defaultMinSimilarity(), defaultPrefixLength); } FuzzyQuery::~FuzzyQuery() { } void FuzzyQuery::ConstructQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength) { this->term = term; if (minimumSimilarity >= 1.0) boost::throw_exception(IllegalArgumentException(L"minimumSimilarity >= 1")); else if (minimumSimilarity < 0.0) boost::throw_exception(IllegalArgumentException(L"minimumSimilarity < 0")); if (prefixLength < 0) boost::throw_exception(IllegalArgumentException(L"prefixLength < 0")); this->termLongEnough = ((int32_t)term->text().length() > (int32_t)(1.0 / (1.0 - minimumSimilarity))); this->minimumSimilarity = minimumSimilarity; this->prefixLength = prefixLength; rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE(); } double FuzzyQuery::defaultMinSimilarity() { const double _defaultMinSimilarity = 0.5; return _defaultMinSimilarity; } double FuzzyQuery::getMinSimilarity() { return minimumSimilarity; } int32_t FuzzyQuery::getPrefixLength() { return prefixLength; } FilteredTermEnumPtr FuzzyQuery::getEnum(IndexReaderPtr reader) { return newLucene(reader, getTerm(), minimumSimilarity, prefixLength); } TermPtr FuzzyQuery::getTerm() { return term; } void FuzzyQuery::setRewriteMethod(RewriteMethodPtr method) { boost::throw_exception(UnsupportedOperationException(L"FuzzyQuery cannot change rewrite method")); } QueryPtr FuzzyQuery::rewrite(IndexReaderPtr reader) { if (!termLongEnough) // can only match if it's exact return newLucene(term); int32_t maxSize = BooleanQuery::getMaxClauseCount(); ScoreTermQueuePtr stQueue(newLucene(1024)); FilteredTermEnumPtr enumerator(getEnum(reader)); LuceneException finally; try { ScoreTermPtr st = newLucene(); do { TermPtr t(enumerator->term()); if (!t) break; double score = enumerator->difference(); // ignore uncompetitive hits if (stQueue->size() >= maxSize && score <= stQueue->top()->score) continue; // add new entry in PQ st->term = t; st->score = score; stQueue->add(st); // possibly drop entries from queue st = (stQueue->size() > maxSize) ? stQueue->pop() : newLucene(); } while (enumerator->next()); } catch (LuceneException& e) { finally = e; } enumerator->close(); finally.throwException(); BooleanQueryPtr query(newLucene(true)); int32_t size = stQueue->size(); for (int32_t i = 0; i < size; ++i) { ScoreTermPtr st(stQueue->pop()); TermQueryPtr tq(newLucene(st->term)); // found a match tq->setBoost(getBoost() * st->score); // set the boost query->add(tq, BooleanClause::SHOULD); // add to query } return query; } LuceneObjectPtr FuzzyQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(term)); FuzzyQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->minimumSimilarity = minimumSimilarity; cloneQuery->prefixLength = prefixLength; cloneQuery->termLongEnough = termLongEnough; cloneQuery->term = term; return cloneQuery; } String FuzzyQuery::toString(const String& field) { StringStream buffer; if (term->field() != field) buffer << term->field() << L":"; buffer << term->text() << L"~" << minimumSimilarity << boostString(); return buffer.str(); } int32_t FuzzyQuery::hashCode() { int32_t prime = 31; int32_t result = MultiTermQuery::hashCode(); result = prime * result + MiscUtils::doubleToIntBits(minimumSimilarity); result = prime * result + prefixLength; result = prime * result + (term ? term->hashCode() : 0); return result; } bool FuzzyQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!MultiTermQuery::equals(other)) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; FuzzyQueryPtr otherFuzzyQuery(boost::dynamic_pointer_cast(other)); if (!otherFuzzyQuery) return false; if (MiscUtils::doubleToIntBits(minimumSimilarity) != MiscUtils::doubleToIntBits(otherFuzzyQuery->minimumSimilarity)) return false; if (prefixLength != otherFuzzyQuery->prefixLength) return false; if (!term) { if (otherFuzzyQuery->term) return false; } else if (!term->equals(otherFuzzyQuery->term)) return false; return true; } ScoreTerm::~ScoreTerm() { } int32_t ScoreTerm::compareTo(ScoreTermPtr other) { if (this->score == other->score) return other->term->compareTo(this->term); else return this->score < other->score ? -1 : (this->score > other->score ? 1 : 0); } ScoreTermQueue::ScoreTermQueue(int32_t size) : PriorityQueue(size) { } ScoreTermQueue::~ScoreTermQueue() { } bool ScoreTermQueue::lessThan(const ScoreTermPtr& first, const ScoreTermPtr& second) { return (first->compareTo(second) < 0); } } LucenePlusPlus-rel_3.0.4/src/core/search/FuzzyTermEnum.cpp000066400000000000000000000153561217574114600236210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "FuzzyTermEnum.h" #include "FuzzyQuery.h" #include "Term.h" #include "IndexReader.h" namespace Lucene { FuzzyTermEnum::FuzzyTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity, int32_t prefixLength) { ConstructTermEnum(reader, term, minSimilarity, prefixLength); } FuzzyTermEnum::FuzzyTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity) { ConstructTermEnum(reader, term, minSimilarity, FuzzyQuery::defaultPrefixLength); } FuzzyTermEnum::FuzzyTermEnum(IndexReaderPtr reader, TermPtr term) { ConstructTermEnum(reader, term, FuzzyQuery::defaultMinSimilarity(), FuzzyQuery::defaultPrefixLength); } FuzzyTermEnum::~FuzzyTermEnum() { } void FuzzyTermEnum::ConstructTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity, int32_t prefixLength) { if (minSimilarity >= 1.0) boost::throw_exception(IllegalArgumentException(L"minimumSimilarity cannot be greater than or equal to 1")); else if (minSimilarity < 0.0) boost::throw_exception(IllegalArgumentException(L"minimumSimilarity cannot be less than 0")); if (prefixLength < 0) boost::throw_exception(IllegalArgumentException(L"prefixLength cannot be less than 0")); this->minimumSimilarity = minSimilarity; this->scale_factor = 1.0 / (1.0 - minimumSimilarity); this->searchTerm = term; this->field = searchTerm->field(); this->_endEnum = false; this->_similarity = 0.0; // The prefix could be longer than the word. // It's kind of silly though. It means we must match the entire word. int32_t fullSearchTermLength = searchTerm->text().length(); int32_t realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength; this->text = searchTerm->text().substr(realPrefixLength); this->prefix = searchTerm->text().substr(0, realPrefixLength); this->p = Collection::newInstance(this->text.length() + 1); this->d = Collection::newInstance(this->text.length() + 1); setEnum(reader->terms(newLucene(searchTerm->field(), prefix))); } bool FuzzyTermEnum::termCompare(TermPtr term) { if (field == term->field() && boost::starts_with(term->text(), prefix)) { String target(term->text().substr(prefix.length())); this->_similarity = similarity(target); return (_similarity > minimumSimilarity); } _endEnum = true; return false; } double FuzzyTermEnum::difference() { return (_similarity - minimumSimilarity) * scale_factor; } bool FuzzyTermEnum::endEnum() { return _endEnum; } double FuzzyTermEnum::similarity(const String& target) { int32_t m = target.length(); int32_t n = text.length(); if (n == 0) { // We don't have anything to compare. That means if we just add the letters for m we get the new word return prefix.empty() ? 0.0 : 1.0 - ((double)m / (double)prefix.length()); } if (m == 0) return prefix.empty() ? 0.0 : 1.0 - ((double)n / (double)prefix.length()); int32_t maxDistance = calculateMaxDistance(m); if (maxDistance < std::abs(m - n)) { // Just adding the characters of m to n or vice-versa results in too many edits for example "pre" length // is 3 and "prefixes" length is 8. We can see that given this optimal circumstance, the edit distance // cannot be less than 5. which is 8-3 or more precisely std::abs(3 - 8). if our maximum edit distance // is 4, then we can discard this word without looking at it. return 0.0; } // init matrix d for (int32_t i = 0; i <= n; ++i) p[i] = i; // start computing edit distance for (int32_t j = 1; j <= m; ++j) // iterates through target { int32_t bestPossibleEditDistance = m; wchar_t t_j = target[j - 1]; // jth character of t d[0] = j; for (int32_t i = 1; i <= n; ++i) // iterates through text { // minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1) if (t_j != text[i - 1]) d[i] = std::min(std::min(d[i - 1], p[i]), p[i - 1]) + 1; else d[i] = std::min(std::min(d[i - 1] + 1, p[i] + 1), p[i - 1]); bestPossibleEditDistance = std::min(bestPossibleEditDistance, d[i]); } // After calculating row i, the best possible edit distance can be found by found by finding the smallest // value in a given column. If the bestPossibleEditDistance is greater than the max distance, abort. if (j > maxDistance && bestPossibleEditDistance > maxDistance) // equal is okay, but not greater { // The closest the target can be to the text is just too far away. // This target is leaving the party early. return 0.0; } // copy current distance counts to 'previous row' distance counts: swap p and d std::swap(p, d); } // Our last action in the above loop was to switch d and p, so p now actually has the most recent cost counts // This will return less than 0.0 when the edit distance is greater than the number of characters in the shorter // word. But this was the formula that was previously used in FuzzyTermEnum, so it has not been changed (even // though minimumSimilarity must be greater than 0.0) return 1.0 - ((double)p[n] / (double)(prefix.length() + std::min(n, m))); } int32_t FuzzyTermEnum::calculateMaxDistance(int32_t m) { return (int32_t)((1.0 - minimumSimilarity) * (double)(std::min((int32_t)text.length(), m) + prefix.length())); } void FuzzyTermEnum::close() { p.reset(); d.reset(); searchTerm.reset(); FilteredTermEnum::close(); // call FilteredTermEnum::close() and let the garbage collector do its work. } } LucenePlusPlus-rel_3.0.4/src/core/search/HitQueue.cpp000066400000000000000000000023521217574114600225360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "HitQueue.h" #include "ScoreDoc.h" namespace Lucene { HitQueue::HitQueue(int32_t size, bool prePopulate) : HitQueueBase(size) { this->prePopulate = prePopulate; } HitQueue::~HitQueue() { } bool HitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { if (first->score == second->score) return (first->doc > second->doc); else return (first->score < second->score); } ScoreDocPtr HitQueue::getSentinelObject() { // Always set the doc Id to MAX_VALUE so that it won't be favored by lessThan. This generally should // not happen since if score is not NEG_INF, TopScoreDocCollector will always add the object to the queue. return !prePopulate ? ScoreDocPtr() : newLucene(INT_MAX, -std::numeric_limits::infinity()); } } LucenePlusPlus-rel_3.0.4/src/core/search/HitQueueBase.cpp000066400000000000000000000040501217574114600233260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "HitQueueBase.h" #include "ScoreDoc.h" namespace Lucene { HitQueueBase::HitQueueBase(int32_t size) { queueSize = size; } HitQueueBase::~HitQueueBase() { } void HitQueueBase::initialize() { queue = newLucene(shared_from_this(), queueSize); } ScoreDocPtr HitQueueBase::add(ScoreDocPtr scoreDoc) { return queue->add(scoreDoc); } ScoreDocPtr HitQueueBase::addOverflow(ScoreDocPtr scoreDoc) { return queue->addOverflow(scoreDoc); } ScoreDocPtr HitQueueBase::top() { return queue->top(); } ScoreDocPtr HitQueueBase::pop() { return queue->pop(); } ScoreDocPtr HitQueueBase::updateTop() { return queue->updateTop(); } int32_t HitQueueBase::size() { return queue->size(); } bool HitQueueBase::empty() { return queue->empty(); } void HitQueueBase::clear() { queue->clear(); } ScoreDocPtr HitQueueBase::getSentinelObject() { return ScoreDocPtr(); } PriorityQueueScoreDocs::PriorityQueueScoreDocs(HitQueueBasePtr hitQueue, int32_t size) : PriorityQueue(size) { _hitQueue = hitQueue; } PriorityQueueScoreDocs::~PriorityQueueScoreDocs() { } bool PriorityQueueScoreDocs::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { return HitQueueBasePtr(_hitQueue)->lessThan(first, second); } ScoreDocPtr PriorityQueueScoreDocs::getSentinelObject() { return HitQueueBasePtr(_hitQueue)->getSentinelObject(); } } LucenePlusPlus-rel_3.0.4/src/core/search/IndexSearcher.cpp000066400000000000000000000156051217574114600235360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexSearcher.h" #include "IndexReader.h" #include "TopScoreDocCollector.h" #include "TopFieldDocs.h" #include "TopFieldCollector.h" #include "Weight.h" #include "DocIdSet.h" #include "Scorer.h" #include "Filter.h" #include "Query.h" #include "ReaderUtil.h" namespace Lucene { IndexSearcher::IndexSearcher(DirectoryPtr path, bool readOnly) { ConstructSearcher(IndexReader::open(path, readOnly), true); } IndexSearcher::IndexSearcher(IndexReaderPtr reader) { ConstructSearcher(reader, false); } IndexSearcher::IndexSearcher(IndexReaderPtr reader, Collection subReaders, Collection docStarts) { this->fieldSortDoTrackScores = false; this->fieldSortDoMaxScore = false; this->reader = reader; this->subReaders = subReaders; this->docStarts = docStarts; closeReader = false; } IndexSearcher::~IndexSearcher() { } void IndexSearcher::ConstructSearcher(IndexReaderPtr reader, bool closeReader) { this->fieldSortDoTrackScores = false; this->fieldSortDoMaxScore = false; this->reader = reader; this->closeReader = closeReader; Collection subReadersList(Collection::newInstance()); gatherSubReaders(subReadersList, reader); subReaders = subReadersList; docStarts = Collection::newInstance(subReaders.size()); int32_t maxDoc = 0; for (int32_t i = 0; i < subReaders.size(); ++i) { docStarts[i] = maxDoc; maxDoc += subReaders[i]->maxDoc(); } } void IndexSearcher::gatherSubReaders(Collection allSubReaders, IndexReaderPtr reader) { ReaderUtil::gatherSubReaders(allSubReaders, reader); } IndexReaderPtr IndexSearcher::getIndexReader() { return reader; } void IndexSearcher::close() { if (closeReader) reader->close(); } int32_t IndexSearcher::docFreq(TermPtr term) { return reader->docFreq(term); } DocumentPtr IndexSearcher::doc(int32_t n) { return reader->document(n); } DocumentPtr IndexSearcher::doc(int32_t n, FieldSelectorPtr fieldSelector) { return reader->document(n, fieldSelector); } int32_t IndexSearcher::maxDoc() { return reader->maxDoc(); } TopDocsPtr IndexSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n) { if (n <= 0) boost::throw_exception(IllegalArgumentException(L"n must be > 0")); TopScoreDocCollectorPtr collector(TopScoreDocCollector::create(std::min(n, reader->maxDoc()), !weight->scoresDocsOutOfOrder())); search(weight, filter, collector); return collector->topDocs(); } TopFieldDocsPtr IndexSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) { return search(weight, filter, n, sort, true); } TopFieldDocsPtr IndexSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort, bool fillFields) { TopFieldCollectorPtr collector(TopFieldCollector::create(sort, std::min(n, reader->maxDoc()), fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight->scoresDocsOutOfOrder())); search(weight, filter, collector); return boost::dynamic_pointer_cast(collector->topDocs()); } void IndexSearcher::search(WeightPtr weight, FilterPtr filter, CollectorPtr results) { if (!filter) { for (int32_t i = 0; i < subReaders.size(); ++i) // search each subreader { results->setNextReader(subReaders[i], docStarts[i]); ScorerPtr scorer(weight->scorer(subReaders[i], !results->acceptsDocsOutOfOrder(), true)); if (scorer) scorer->score(results); } } else { for (int32_t i = 0; i < subReaders.size(); ++i) // search each subreader { results->setNextReader(subReaders[i], docStarts[i]); searchWithFilter(subReaders[i], weight, filter, results); } } } void IndexSearcher::searchWithFilter(IndexReaderPtr reader, WeightPtr weight, FilterPtr filter, CollectorPtr collector) { BOOST_ASSERT(filter); ScorerPtr scorer(weight->scorer(reader, true, false)); if (!scorer) return; int32_t docID = scorer->docID(); BOOST_ASSERT(docID == -1 || docID == DocIdSetIterator::NO_MORE_DOCS); DocIdSetPtr filterDocIdSet(filter->getDocIdSet(reader)); if (!filterDocIdSet) { // this means the filter does not accept any documents. return; } DocIdSetIteratorPtr filterIter(filterDocIdSet->iterator()); if (!filterIter) { // this means the filter does not accept any documents. return; } int32_t filterDoc = filterIter->nextDoc(); int32_t scorerDoc = scorer->advance(filterDoc); collector->setScorer(scorer); while (true) { if (scorerDoc == filterDoc) { // Check if scorer has exhausted, only before collecting. if (scorerDoc == DocIdSetIterator::NO_MORE_DOCS) break; collector->collect(scorerDoc); filterDoc = filterIter->nextDoc(); scorerDoc = scorer->advance(filterDoc); } else if (scorerDoc > filterDoc) filterDoc = filterIter->advance(scorerDoc); else scorerDoc = scorer->advance(filterDoc); } } QueryPtr IndexSearcher::rewrite(QueryPtr original) { QueryPtr query(original); for (QueryPtr rewrittenQuery(query->rewrite(reader)); rewrittenQuery != query; rewrittenQuery = query->rewrite(reader)) query = rewrittenQuery; return query; } ExplanationPtr IndexSearcher::explain(WeightPtr weight, int32_t doc) { int32_t n = ReaderUtil::subIndex(doc, docStarts); int32_t deBasedDoc = doc - docStarts[n]; return weight->explain(subReaders[n], deBasedDoc); } void IndexSearcher::setDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore) { fieldSortDoTrackScores = doTrackScores; fieldSortDoMaxScore = doMaxScore; } } LucenePlusPlus-rel_3.0.4/src/core/search/MatchAllDocsQuery.cpp000066400000000000000000000105501217574114600243300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MatchAllDocsQuery.h" #include "_MatchAllDocsQuery.h" #include "IndexReader.h" #include "Similarity.h" #include "TermDocs.h" #include "ComplexExplanation.h" #include "Searcher.h" #include "MiscUtils.h" namespace Lucene { MatchAllDocsQuery::MatchAllDocsQuery(const String& normsField) { this->normsField = normsField; } MatchAllDocsQuery::~MatchAllDocsQuery() { } WeightPtr MatchAllDocsQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } void MatchAllDocsQuery::extractTerms(SetTerm terms) { } String MatchAllDocsQuery::toString(const String& field) { StringStream buffer; buffer << L"*:*" << boostString(); return buffer.str(); } bool MatchAllDocsQuery::equals(LuceneObjectPtr other) { return Query::equals(other); } int32_t MatchAllDocsQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ 0x1aa71190; } LuceneObjectPtr MatchAllDocsQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); MatchAllDocsQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->normsField = normsField; return cloneQuery; } MatchAllDocsWeight::MatchAllDocsWeight(MatchAllDocsQueryPtr query, SearcherPtr searcher) { this->query = query; this->similarity = searcher->getSimilarity(); this->queryWeight = 0.0; this->queryNorm = 0.0; } MatchAllDocsWeight::~MatchAllDocsWeight() { } String MatchAllDocsWeight::toString() { StringStream buffer; buffer << L"weight(" << queryWeight << L", " << queryNorm << L")"; return buffer.str(); } QueryPtr MatchAllDocsWeight::getQuery() { return query; } double MatchAllDocsWeight::getValue() { return queryWeight; } double MatchAllDocsWeight::sumOfSquaredWeights() { queryWeight = getQuery()->getBoost(); return queryWeight * queryWeight; } void MatchAllDocsWeight::normalize(double norm) { this->queryNorm = norm; queryWeight *= this->queryNorm; } ScorerPtr MatchAllDocsWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(query, reader, similarity, shared_from_this(), !query->normsField.empty() ? reader->norms(query->normsField) : ByteArray()); } ExplanationPtr MatchAllDocsWeight::explain(IndexReaderPtr reader, int32_t doc) { // explain query weight ExplanationPtr queryExpl(newLucene(true, getValue(), L"MatchAllDocsQuery, product of:")); if (getQuery()->getBoost() != 1.0) queryExpl->addDetail(newLucene(getQuery()->getBoost(), L"boost")); queryExpl->addDetail(newLucene(queryNorm, L"queryNorm")); return queryExpl; } MatchAllScorer::MatchAllScorer(MatchAllDocsQueryPtr query, IndexReaderPtr reader, SimilarityPtr similarity, WeightPtr weight, ByteArray norms) : Scorer(similarity) { this->query = query; this->termDocs = reader->termDocs(TermPtr()); this->_score = weight->getValue(); this->norms = norms; this->doc = -1; } MatchAllScorer::~MatchAllScorer() { } int32_t MatchAllScorer::docID() { return doc; } int32_t MatchAllScorer::nextDoc() { doc = termDocs->next() ? termDocs->doc() : NO_MORE_DOCS; return doc; } double MatchAllScorer::score() { return norms ? _score * Similarity::decodeNorm(norms[docID()]) : _score; } int32_t MatchAllScorer::advance(int32_t target) { doc = termDocs->skipTo(target) ? termDocs->doc() : NO_MORE_DOCS; return doc; } } LucenePlusPlus-rel_3.0.4/src/core/search/MultiPhraseQuery.cpp000066400000000000000000000273341217574114600242770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiPhraseQuery.h" #include "_MultiPhraseQuery.h" #include "Searcher.h" #include "Term.h" #include "TermQuery.h" #include "MultipleTermPositions.h" #include "ExactPhraseScorer.h" #include "SloppyPhraseScorer.h" #include "Similarity.h" #include "IndexReader.h" #include "ComplexExplanation.h" #include "BooleanQuery.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { MultiPhraseQuery::MultiPhraseQuery() { termArrays = Collection< Collection >::newInstance(); positions = Collection::newInstance(); slop = 0; } MultiPhraseQuery::~MultiPhraseQuery() { } void MultiPhraseQuery::setSlop(int32_t s) { slop = s; } int32_t MultiPhraseQuery::getSlop() { return slop; } void MultiPhraseQuery::add(TermPtr term) { add(newCollection(term)); } void MultiPhraseQuery::add(Collection terms) { int32_t position = 0; if (!positions.empty()) position = positions[positions.size() - 1] + 1; add(terms, position); } void MultiPhraseQuery::add(Collection terms, int32_t position) { if (termArrays.empty()) field = terms[0]->field(); for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { if ((*term)->field() != field) boost::throw_exception(IllegalArgumentException(L"All phrase terms must be in the same field (" + field + L"): " + (*term)->toString())); } termArrays.add(terms); positions.add(position); } Collection< Collection > MultiPhraseQuery::getTermArrays() { return termArrays; } Collection MultiPhraseQuery::getPositions() { return positions; } void MultiPhraseQuery::extractTerms(SetTerm terms) { for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) { for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) terms.add(*term); } } QueryPtr MultiPhraseQuery::rewrite(IndexReaderPtr reader) { if (termArrays.size() == 1) // optimize one-term case { Collection terms(termArrays[0]); BooleanQueryPtr boq(newLucene(true)); for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) boq->add(newLucene(*term), BooleanClause::SHOULD); boq->setBoost(getBoost()); return boq; } else return shared_from_this(); } WeightPtr MultiPhraseQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } String MultiPhraseQuery::toString(const String& field) { StringStream buffer; if (this->field != field) buffer << this->field << L":"; buffer << L"\""; for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) { if (arr != termArrays.begin()) buffer << L" "; if (arr->size() > 1) { buffer << L"("; for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) { if (term != arr->begin()) buffer << L" "; buffer << (*term)->text(); } buffer << L")"; } else if (!arr->empty()) buffer << (*arr)[0]->text(); } buffer << L"\""; if (slop != 0) buffer << L"~" << slop; buffer << boostString(); return buffer.str(); } bool MultiPhraseQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; MultiPhraseQueryPtr otherMultiPhraseQuery(boost::dynamic_pointer_cast(other)); if (!otherMultiPhraseQuery) return false; return (getBoost() == otherMultiPhraseQuery->getBoost() && slop == otherMultiPhraseQuery->slop && termArraysEquals(termArrays, otherMultiPhraseQuery->termArrays) && positions.equals(otherMultiPhraseQuery->positions)); } int32_t MultiPhraseQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ slop ^ termArraysHashCode() ^ MiscUtils::hashCode(positions.begin(), positions.end(), MiscUtils::hashNumeric) ^ 0x4ac65113; } int32_t MultiPhraseQuery::termArraysHashCode() { int32_t hashCode = 1; for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) hashCode = 31 * hashCode + MiscUtils::hashCode(arr->begin(), arr->end(), MiscUtils::hashLucene); return hashCode; } struct equalTermArrays { inline bool operator()(const Collection& first, const Collection& second) const { if (first.size() != second.size()) return false; return first.equals(second, luceneEquals()); } }; bool MultiPhraseQuery::termArraysEquals(Collection< Collection > first, Collection< Collection > second) { return first.equals(second, equalTermArrays()); } LuceneObjectPtr MultiPhraseQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); MultiPhraseQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->field = field; cloneQuery->termArrays = termArrays; cloneQuery->positions = positions; cloneQuery->slop = slop; return cloneQuery; } MultiPhraseWeight::MultiPhraseWeight(MultiPhraseQueryPtr query, SearcherPtr searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); this->value = 0.0; this->idf = 0.0; this->queryNorm = 0.0; this->queryWeight = 0.0; // compute idf int32_t maxDoc = searcher->maxDoc(); for (Collection< Collection >::iterator arr = query->termArrays.begin(); arr != query->termArrays.end(); ++arr) { for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) idf += this->similarity->idf(searcher->docFreq(*term), maxDoc); } } MultiPhraseWeight::~MultiPhraseWeight() { } QueryPtr MultiPhraseWeight::getQuery() { return query; } double MultiPhraseWeight::getValue() { return value; } double MultiPhraseWeight::sumOfSquaredWeights() { queryWeight = idf * getQuery()->getBoost(); // compute query weight return queryWeight * queryWeight; // square it } void MultiPhraseWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; // normalize query weight value = queryWeight * idf; // idf for document } ScorerPtr MultiPhraseWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { if (query->termArrays.empty()) // optimize zero-term case return ScorerPtr(); Collection tps(Collection::newInstance(query->termArrays.size())); for (int32_t i = 0; i < tps.size(); ++i) { Collection terms(query->termArrays[i]); TermPositionsPtr p; if (terms.size() > 1) p = newLucene(reader, terms); else p = reader->termPositions(terms[0]); if (!p) return ScorerPtr(); tps[i] = p; } if (query->slop == 0) // optimize exact case return newLucene(shared_from_this(), tps, query->getPositions(), similarity, reader->norms(query->field)); else return newLucene(shared_from_this(), tps, query->getPositions(), similarity, query->slop, reader->norms(query->field)); } ExplanationPtr MultiPhraseWeight::explain(IndexReaderPtr reader, int32_t doc) { ComplexExplanationPtr result(newLucene()); result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); ExplanationPtr idfExpl(newLucene(idf, L"idf(" + query->toString() + L")")); // explain query weight ExplanationPtr queryExpl(newLucene()); queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); if (query->getBoost() != 1.0) queryExpl->addDetail(boostExpl); queryExpl->addDetail(idfExpl); ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); queryExpl->addDetail(queryNormExpl); queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); result->addDetail(queryExpl); // explain field weight ComplexExplanationPtr fieldExpl(newLucene()); fieldExpl->setDescription(L"fieldWeight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast(scorer(reader, true, false))); if (!phraseScorer) return newLucene(0.0, L"no matching docs"); ExplanationPtr tfExplanation(newLucene()); int32_t d = phraseScorer->advance(doc); double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0; tfExplanation->setValue(similarity->tf(phraseFreq)); tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); fieldExpl->addDetail(tfExplanation); fieldExpl->addDetail(idfExpl); ExplanationPtr fieldNormExpl(newLucene()); ByteArray fieldNorms(reader->norms(query->field)); double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; fieldNormExpl->setValue(fieldNorm); fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")"); fieldExpl->addDetail(fieldNormExpl); fieldExpl->setMatch(tfExplanation->isMatch()); fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); result->addDetail(fieldExpl); result->setMatch(fieldExpl->getMatch()); // combine them result->setValue(queryExpl->getValue() * fieldExpl->getValue()); if (queryExpl->getValue() == 1.0) return fieldExpl; return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/MultiSearcher.cpp000066400000000000000000000322541217574114600235600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiSearcher.h" #include "_MultiSearcher.h" #include "Term.h" #include "ReaderUtil.h" #include "HitQueue.h" #include "Query.h" #include "FieldDocSortedHitQueue.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "SortField.h" #include "TopFieldDocs.h" #include "FieldDoc.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { MultiSearcher::MultiSearcher(Collection searchables) { this->searchables = searchables; this->_maxDoc = 0; this->starts = Collection::newInstance(searchables.size() + 1); // build starts array for (int32_t i = 0; i < searchables.size(); ++i) { starts[i] = _maxDoc; _maxDoc += searchables[i]->maxDoc(); // compute maxDocs } starts[searchables.size()] = _maxDoc; } MultiSearcher::~MultiSearcher() { } Collection MultiSearcher::getSearchables() { return searchables; } Collection MultiSearcher::getStarts() { return starts; } void MultiSearcher::close() { for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) (*searchable)->close(); } int32_t MultiSearcher::docFreq(TermPtr term) { int32_t docFreq = 0; for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) docFreq += (*searchable)->docFreq(term); return docFreq; } DocumentPtr MultiSearcher::doc(int32_t n) { int32_t i = subSearcher(n); // find searcher index return searchables[i]->doc(n - starts[i]); // dispatch to searcher } DocumentPtr MultiSearcher::doc(int32_t n, FieldSelectorPtr fieldSelector) { int32_t i = subSearcher(n); // find searcher index return searchables[i]->doc(n - starts[i], fieldSelector); // dispatch to searcher } int32_t MultiSearcher::subSearcher(int32_t n) { return ReaderUtil::subIndex(n, starts); } int32_t MultiSearcher::subDoc(int32_t n) { return n - starts[subSearcher(n)]; } int32_t MultiSearcher::maxDoc() { return _maxDoc; } TopDocsPtr MultiSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n) { HitQueuePtr hq(newLucene(n, false)); int32_t totalHits = 0; for (int32_t i = 0; i < searchables.size(); ++i) // search each searcher { TopDocsPtr docs(newLucene(SynchronizePtr(), searchables[i], weight, filter, n, hq, i, starts)->call()); totalHits += docs->totalHits; // update totalHits } Collection scoreDocs(Collection::newInstance(hq->size())); for (int32_t i = hq->size() - 1; i >= 0; --i) // put docs in array scoreDocs[i] = hq->pop(); double maxScore = totalHits == 0 ? -std::numeric_limits::infinity() : scoreDocs[0]->score; return newLucene(totalHits, scoreDocs, maxScore); } TopFieldDocsPtr MultiSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) { FieldDocSortedHitQueuePtr hq(newLucene(n)); int32_t totalHits = 0; double maxScore = -std::numeric_limits::infinity(); for (int32_t i = 0; i < searchables.size(); ++i) // search each searcher { TopFieldDocsPtr docs(newLucene(SynchronizePtr(), searchables[i], weight, filter, n, hq, sort, i, starts)->call()); totalHits += docs->totalHits; // update totalHits maxScore = std::max(maxScore, docs->maxScore); } Collection scoreDocs(Collection::newInstance(hq->size())); for (int32_t i = hq->size() - 1; i >= 0; --i) // put docs in array scoreDocs[i] = hq->pop(); return newLucene(totalHits, scoreDocs, hq->getFields(), maxScore); } void MultiSearcher::search(WeightPtr weight, FilterPtr filter, CollectorPtr results) { for (int32_t i = 0; i < searchables.size(); ++i) { int32_t start = starts[i]; CollectorPtr hc = newLucene(results, start); searchables[i]->search(weight, filter, hc); } } QueryPtr MultiSearcher::rewrite(QueryPtr query) { Collection queries(Collection::newInstance(searchables.size())); for (int32_t i = 0; i < searchables.size(); ++i) queries[i] = searchables[i]->rewrite(query); return queries[0]->combine(queries); } ExplanationPtr MultiSearcher::explain(WeightPtr weight, int32_t doc) { int32_t i = subSearcher(doc); // find searcher index return searchables[i]->explain(weight, doc - starts[i]); // dispatch to searcher } WeightPtr MultiSearcher::createWeight(QueryPtr query) { // step 1 QueryPtr rewrittenQuery(rewrite(query)); // step 2 SetTerm terms(SetTerm::newInstance()); rewrittenQuery->extractTerms(terms); // step3 Collection allTermsArray(Collection::newInstance(terms.begin(), terms.end())); Collection aggregatedDfs(Collection::newInstance(terms.size())); for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) { Collection dfs((*searchable)->docFreqs(allTermsArray)); for (int32_t j = 0; j < aggregatedDfs.size(); ++j) aggregatedDfs[j] += dfs[j]; } MapTermInt dfMap(MapTermInt::newInstance()); for (int32_t i = 0; i < allTermsArray.size(); ++i) dfMap.put(allTermsArray[i], aggregatedDfs[i]); // step4 int32_t numDocs = maxDoc(); CachedDfSourcePtr cacheSim(newLucene(dfMap, numDocs, getSimilarity())); return rewrittenQuery->weight(cacheSim); } CachedDfSource::CachedDfSource(MapTermInt dfMap, int32_t maxDoc, SimilarityPtr similarity) { this->dfMap = dfMap; this->_maxDoc = maxDoc; setSimilarity(similarity); } CachedDfSource::~CachedDfSource() { } int32_t CachedDfSource::docFreq(TermPtr term) { MapTermInt::iterator df = dfMap.find(term); if (df == dfMap.end()) boost::throw_exception(IllegalArgumentException(L"df for term " + term->text() + L" not available")); return df->second; } Collection CachedDfSource::docFreqs(Collection terms) { Collection result(Collection::newInstance(terms.size())); for (int32_t i = 0; i < terms.size(); ++i) result[i] = docFreq(terms[i]); return result; } int32_t CachedDfSource::maxDoc() { return _maxDoc; } QueryPtr CachedDfSource::rewrite(QueryPtr query) { // This is a bit of a hack. We know that a query which creates a Weight based on this Dummy-Searcher is // always already rewritten (see preparedWeight()). Therefore we just return the unmodified query here. return query; } void CachedDfSource::close() { boost::throw_exception(UnsupportedOperationException()); } DocumentPtr CachedDfSource::doc(int32_t n) { boost::throw_exception(UnsupportedOperationException()); return DocumentPtr(); } DocumentPtr CachedDfSource::doc(int32_t n, FieldSelectorPtr fieldSelector) { boost::throw_exception(UnsupportedOperationException()); return DocumentPtr(); } ExplanationPtr CachedDfSource::explain(WeightPtr weight, int32_t doc) { boost::throw_exception(UnsupportedOperationException()); return ExplanationPtr(); } void CachedDfSource::search(WeightPtr weight, FilterPtr filter, CollectorPtr results) { boost::throw_exception(UnsupportedOperationException()); } TopDocsPtr CachedDfSource::search(WeightPtr weight, FilterPtr filter, int32_t n) { boost::throw_exception(UnsupportedOperationException()); return TopDocsPtr(); } TopFieldDocsPtr CachedDfSource::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) { boost::throw_exception(UnsupportedOperationException()); return TopFieldDocsPtr(); } MultiSearcherCallableNoSort::MultiSearcherCallableNoSort(SynchronizePtr lock, SearchablePtr searchable, WeightPtr weight, FilterPtr filter, int32_t nDocs, HitQueuePtr hq, int32_t i, Collection starts) { this->lock = lock; this->searchable = searchable; this->weight = weight; this->filter = filter; this->nDocs = nDocs; this->hq = hq; this->i = i; this->starts = starts; } MultiSearcherCallableNoSort::~MultiSearcherCallableNoSort() { } TopDocsPtr MultiSearcherCallableNoSort::call() { TopDocsPtr docs(searchable->search(weight, filter, nDocs)); Collection scoreDocs(docs->scoreDocs); for (int32_t j = 0; j < scoreDocs.size(); ++j) // merge scoreDocs into hq { ScoreDocPtr scoreDoc(scoreDocs[j]); scoreDoc->doc += starts[i]; // convert doc SyncLock syncLock(lock); if (scoreDoc == hq->addOverflow(scoreDoc)) break; } return docs; } MultiSearcherCallableWithSort::MultiSearcherCallableWithSort(SynchronizePtr lock, SearchablePtr searchable, WeightPtr weight, FilterPtr filter, int32_t nDocs, FieldDocSortedHitQueuePtr hq, SortPtr sort, int32_t i, Collection starts) { this->lock = lock; this->searchable = searchable; this->weight = weight; this->filter = filter; this->nDocs = nDocs; this->hq = hq; this->i = i; this->starts = starts; this->sort = sort; } MultiSearcherCallableWithSort::~MultiSearcherCallableWithSort() { } TopFieldDocsPtr MultiSearcherCallableWithSort::call() { TopFieldDocsPtr docs(searchable->search(weight, filter, nDocs, sort)); // If one of the Sort fields is FIELD_DOC, need to fix its values, so that it will break ties by doc Id // properly. Otherwise, it will compare to 'relative' doc Ids, that belong to two different searchables. for (int32_t j = 0; j < docs->fields.size(); ++j) { if (docs->fields[j]->getType() == SortField::DOC) { // iterate over the score docs and change their fields value for (int32_t j2 = 0; j2 < docs->scoreDocs.size(); ++j2) { FieldDocPtr fd(boost::dynamic_pointer_cast(docs->scoreDocs[j2])); fd->fields[j] = VariantUtils::get(fd->fields[j]) + starts[i]; } break; } } { SyncLock syncLock(lock); hq->setFields(docs->fields); } Collection scoreDocs(docs->scoreDocs); for (int32_t j = 0; j < scoreDocs.size(); ++j) // merge scoreDocs into hq { FieldDocPtr fieldDoc(boost::dynamic_pointer_cast(scoreDocs[j])); fieldDoc->doc += starts[i]; // convert doc SyncLock syncLock(lock); if (fieldDoc == hq->addOverflow(fieldDoc)) break; } return docs; } MultiSearcherCollector::MultiSearcherCollector(CollectorPtr collector, int32_t start) { this->collector = collector; this->start = start; } MultiSearcherCollector::~MultiSearcherCollector() { } void MultiSearcherCollector::setScorer(ScorerPtr scorer) { collector->setScorer(scorer); } void MultiSearcherCollector::collect(int32_t doc) { collector->collect(doc); } void MultiSearcherCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) { collector->setNextReader(reader, start + docBase); } bool MultiSearcherCollector::acceptsDocsOutOfOrder() { return collector->acceptsDocsOutOfOrder(); } } LucenePlusPlus-rel_3.0.4/src/core/search/MultiTermQuery.cpp000066400000000000000000000272151217574114600237620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiTermQuery.h" #include "_MultiTermQuery.h" #include "ConstantScoreQuery.h" #include "MultiTermQueryWrapperFilter.h" #include "QueryWrapperFilter.h" #include "BooleanQuery.h" #include "Term.h" #include "TermQuery.h" #include "TermDocs.h" #include "FilteredTermEnum.h" #include "IndexReader.h" #include "MiscUtils.h" namespace Lucene { MultiTermQuery::MultiTermQuery() { numberOfTerms = 0; rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); } MultiTermQuery::~MultiTermQuery() { } RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE() { static RewriteMethodPtr _CONSTANT_SCORE_FILTER_REWRITE; if (!_CONSTANT_SCORE_FILTER_REWRITE) { _CONSTANT_SCORE_FILTER_REWRITE = newLucene(); CycleCheck::addStatic(_CONSTANT_SCORE_FILTER_REWRITE); } return _CONSTANT_SCORE_FILTER_REWRITE; } RewriteMethodPtr MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE() { static RewriteMethodPtr _SCORING_BOOLEAN_QUERY_REWRITE; if (!_SCORING_BOOLEAN_QUERY_REWRITE) { _SCORING_BOOLEAN_QUERY_REWRITE = newLucene(); CycleCheck::addStatic(_SCORING_BOOLEAN_QUERY_REWRITE); } return _SCORING_BOOLEAN_QUERY_REWRITE; } RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE() { static RewriteMethodPtr _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; if (!_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE) { _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = newLucene(); CycleCheck::addStatic(_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); } return _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; } RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT() { static RewriteMethodPtr _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; if (!_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT) { _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = newLucene(); CycleCheck::addStatic(_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); } return _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; } int32_t MultiTermQuery::getTotalNumberOfTerms() { return numberOfTerms; } void MultiTermQuery::clearTotalNumberOfTerms() { numberOfTerms = 0; } void MultiTermQuery::incTotalNumberOfTerms(int32_t inc) { numberOfTerms += inc; } QueryPtr MultiTermQuery::rewrite(IndexReaderPtr reader) { return rewriteMethod->rewrite(reader, shared_from_this()); } RewriteMethodPtr MultiTermQuery::getRewriteMethod() { return rewriteMethod; } void MultiTermQuery::setRewriteMethod(RewriteMethodPtr method) { rewriteMethod = method; } LuceneObjectPtr MultiTermQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = Query::clone(other); MultiTermQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->rewriteMethod = rewriteMethod; cloneQuery->numberOfTerms = numberOfTerms; return cloneQuery; } int32_t MultiTermQuery::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + MiscUtils::doubleToIntBits(getBoost()); result = prime * result; result += rewriteMethod->hashCode(); return result; } bool MultiTermQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!other) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; MultiTermQueryPtr otherMultiTermQuery(boost::dynamic_pointer_cast(other)); if (!otherMultiTermQuery) return false; if (MiscUtils::doubleToIntBits(getBoost()) != MiscUtils::doubleToIntBits(otherMultiTermQuery->getBoost())) return false; if (!rewriteMethod->equals(otherMultiTermQuery->rewriteMethod)) return false; return true; } RewriteMethod::~RewriteMethod() { } ConstantScoreFilterRewrite::~ConstantScoreFilterRewrite() { } QueryPtr ConstantScoreFilterRewrite::rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) { QueryPtr result(newLucene(newLucene(query))); result->setBoost(query->getBoost()); return result; } ScoringBooleanQueryRewrite::~ScoringBooleanQueryRewrite() { } QueryPtr ScoringBooleanQueryRewrite::rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) { FilteredTermEnumPtr enumerator(query->getEnum(reader)); BooleanQueryPtr result(newLucene(true)); int32_t count = 0; LuceneException finally; try { do { TermPtr t(enumerator->term()); if (t) { TermQueryPtr tq(newLucene(t)); // found a match tq->setBoost(query->getBoost() * enumerator->difference()); // set the boost result->add(tq, BooleanClause::SHOULD); // add to query ++count; } } while (enumerator->next()); } catch (LuceneException& e) { finally = e; } enumerator->close(); finally.throwException(); query->incTotalNumberOfTerms(count); return result; } ConstantScoreBooleanQueryRewrite::~ConstantScoreBooleanQueryRewrite() { } QueryPtr ConstantScoreBooleanQueryRewrite::rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) { // strip the scores off QueryPtr result(newLucene(newLucene(ScoringBooleanQueryRewrite::rewrite(reader, query)))); result->setBoost(query->getBoost()); return result; } // Defaults derived from rough tests with a 20.0 million doc Wikipedia index. With more than 350 terms // in the query, the filter method is fastest const int32_t ConstantScoreAutoRewrite::DEFAULT_TERM_COUNT_CUTOFF = 350; // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest const double ConstantScoreAutoRewrite::DEFAULT_DOC_COUNT_PERCENT = 0.1; ConstantScoreAutoRewrite::ConstantScoreAutoRewrite() { termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF; docCountPercent = DEFAULT_DOC_COUNT_PERCENT; } ConstantScoreAutoRewrite::~ConstantScoreAutoRewrite() { } void ConstantScoreAutoRewrite::setTermCountCutoff(int32_t count) { termCountCutoff = count; } int32_t ConstantScoreAutoRewrite::getTermCountCutoff() { return termCountCutoff; } void ConstantScoreAutoRewrite::setDocCountPercent(double percent) { docCountPercent = percent; } double ConstantScoreAutoRewrite::getDocCountPercent() { return docCountPercent; } QueryPtr ConstantScoreAutoRewrite::rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) { // Get the enum and start visiting terms. If we exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else ConstantFilterRewrite Collection pendingTerms(Collection::newInstance()); int32_t docCountCutoff = (int32_t)((docCountPercent / 100.0) * (double)reader->maxDoc()); int32_t termCountLimit = std::min(BooleanQuery::getMaxClauseCount(), termCountCutoff); int32_t docVisitCount = 0; FilteredTermEnumPtr enumerator(query->getEnum(reader)); QueryPtr result; LuceneException finally; try { while (true) { TermPtr t(enumerator->term()); if (t) { pendingTerms.add(t); // Loading the TermInfo from the terms dict here should not be costly, because 1) the // query/filter will load the TermInfo when it runs, and 2) the terms dict has a cache docVisitCount += reader->docFreq(t); } if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { // Too many terms -- make a filter. result = newLucene(newLucene(query)); result->setBoost(query->getBoost()); break; } else if (!enumerator->next()) { // Enumeration is done, and we hit a small enough number of terms and docs - // just make a BooleanQuery, now BooleanQueryPtr bq(newLucene(true)); for (Collection::iterator term = pendingTerms.begin(); term != pendingTerms.end(); ++ term) { TermQueryPtr tq(newLucene(*term)); bq->add(tq, BooleanClause::SHOULD); } // Strip scores result = newLucene(newLucene(bq)); result->setBoost(query->getBoost()); query->incTotalNumberOfTerms(pendingTerms.size()); break; } } } catch (LuceneException& e) { finally = e; } enumerator->close(); finally.throwException(); return result; } int32_t ConstantScoreAutoRewrite::hashCode() { int32_t prime = 1279; return (int32_t)(prime * termCountCutoff + MiscUtils::doubleToLongBits(docCountPercent)); } bool ConstantScoreAutoRewrite::equals(LuceneObjectPtr other) { if (RewriteMethod::equals(other)) return true; if (!other) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; ConstantScoreAutoRewritePtr otherConstantScoreAutoRewrite(boost::dynamic_pointer_cast(other)); if (!otherConstantScoreAutoRewrite) return false; if (termCountCutoff != otherConstantScoreAutoRewrite->termCountCutoff) return false; if (MiscUtils::doubleToLongBits(docCountPercent) != MiscUtils::doubleToLongBits(otherConstantScoreAutoRewrite->docCountPercent)) return false; return true; } ConstantScoreAutoRewriteDefault::~ConstantScoreAutoRewriteDefault() { } void ConstantScoreAutoRewriteDefault::setTermCountCutoff(int32_t count) { boost::throw_exception(UnsupportedOperationException(L"Please create a private instance")); } void ConstantScoreAutoRewriteDefault::setDocCountPercent(double percent) { boost::throw_exception(UnsupportedOperationException(L"Please create a private instance")); } } LucenePlusPlus-rel_3.0.4/src/core/search/MultiTermQueryWrapperFilter.cpp000066400000000000000000000073761217574114600264770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiTermQueryWrapperFilter.h" #include "MultiTermQuery.h" #include "IndexReader.h" #include "TermEnum.h" #include "TermDocs.h" #include "Term.h" #include "FilteredTermEnum.h" #include "DocIdSet.h" #include "OpenBitSet.h" #include "MiscUtils.h" namespace Lucene { MultiTermQueryWrapperFilter::MultiTermQueryWrapperFilter(MultiTermQueryPtr query) { this->query = query; } MultiTermQueryWrapperFilter::~MultiTermQueryWrapperFilter() { } String MultiTermQueryWrapperFilter::toString() { // query->toString should be ok for the filter, too, if the query boost is 1.0 return query->toString(); } bool MultiTermQueryWrapperFilter::equals(LuceneObjectPtr other) { if (Filter::equals(other)) return true; if (!other) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; MultiTermQueryWrapperFilterPtr otherMultiTermQueryWrapperFilter(boost::dynamic_pointer_cast(other)); if (otherMultiTermQueryWrapperFilter) return query->equals(otherMultiTermQueryWrapperFilter->query); return false; } int32_t MultiTermQueryWrapperFilter::hashCode() { return query->hashCode(); } int32_t MultiTermQueryWrapperFilter::getTotalNumberOfTerms() { return query->getTotalNumberOfTerms(); } void MultiTermQueryWrapperFilter::clearTotalNumberOfTerms() { query->clearTotalNumberOfTerms(); } DocIdSetPtr MultiTermQueryWrapperFilter::getDocIdSet(IndexReaderPtr reader) { TermEnumPtr enumerator(query->getEnum(reader)); OpenBitSetPtr bitSet; LuceneException finally; try { // if current term in enum is null, the enum is empty -> shortcut if (!enumerator->term()) return DocIdSet::EMPTY_DOCIDSET(); // else fill into a OpenBitSet bitSet = newLucene(reader->maxDoc()); Collection docs(Collection::newInstance(32)); Collection freqs(Collection::newInstance(32)); TermDocsPtr termDocs(reader->termDocs()); try { int32_t termCount = 0; do { TermPtr term(enumerator->term()); if (!term) break; ++termCount; termDocs->seek(term); while (true) { int32_t count = termDocs->read(docs, freqs); if (count != 0) { for (int32_t i = 0; i < count; ++i) bitSet->set(docs[i]); } else break; } } while (enumerator->next()); query->incTotalNumberOfTerms(termCount); } catch (LuceneException& e) { finally = e; } termDocs->close(); } catch (LuceneException& e) { finally = e; } enumerator->close(); finally.throwException(); return bitSet; } } LucenePlusPlus-rel_3.0.4/src/core/search/NumericRangeFilter.cpp000066400000000000000000000066611217574114600245410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericRangeFilter.h" #include "NumericRangeQuery.h" namespace Lucene { NumericRangeFilter::NumericRangeFilter(NumericRangeQueryPtr query) : MultiTermQueryWrapperFilter(query) { } NumericRangeFilter::~NumericRangeFilter() { } NumericRangeFilterPtr NumericRangeFilter::newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { return newLucene(NumericRangeQuery::newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive)); } NumericRangeFilterPtr NumericRangeFilter::newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { return newLucene(NumericRangeQuery::newNumericRange(field, min, max, minInclusive, maxInclusive)); } String NumericRangeFilter::getField() { return boost::static_pointer_cast(query)->field; } bool NumericRangeFilter::includesMin() { return boost::static_pointer_cast(query)->minInclusive; } bool NumericRangeFilter::includesMax() { return boost::static_pointer_cast(query)->maxInclusive; } NumericValue NumericRangeFilter::getMin() { return boost::static_pointer_cast(query)->min; } NumericValue NumericRangeFilter::getMax() { return boost::static_pointer_cast(query)->min; } } LucenePlusPlus-rel_3.0.4/src/core/search/NumericRangeQuery.cpp000066400000000000000000000334711217574114600244200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericRangeQuery.h" #include "_NumericRangeQuery.h" #include "Term.h" #include "IndexReader.h" #include "MiscUtils.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { NumericRangeQuery::NumericRangeQuery(const String& field, int32_t precisionStep, int32_t valSize, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { BOOST_ASSERT(valSize == 32 || valSize == 64); if (precisionStep < 1) boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); this->field = field; this->precisionStep = precisionStep; this->valSize = valSize; this->min = min; this->max = max; this->minInclusive = minInclusive; this->maxInclusive = maxInclusive; // For bigger precisionSteps this query likely hits too many terms, so set to CONSTANT_SCORE_FILTER // right off (especially as the FilteredTermEnum is costly if wasted only for AUTO tests because it // creates new enums from IndexReader for each sub-range) switch (valSize) { case 64: setRewriteMethod(precisionStep > 6 ? CONSTANT_SCORE_FILTER_REWRITE() : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); break; case 32: setRewriteMethod(precisionStep > 8 ? CONSTANT_SCORE_FILTER_REWRITE() : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); break; default: // should never happen boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); } // shortcut if upper bound == lower bound if (!VariantUtils::isNull(min) && min == max) setRewriteMethod(CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); } NumericRangeQuery::~NumericRangeQuery() { } NumericRangeQueryPtr NumericRangeQuery::newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { if (!VariantUtils::equalsType(min, max)) boost::throw_exception(IllegalArgumentException(L"min/max must be of the same type")); int32_t valSize = VariantUtils::typeOf(min) ? 32 : 64; return newLucene(field, precisionStep, valSize, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, NumericUtils::PRECISION_STEP_DEFAULT, min, max, minInclusive, maxInclusive); } FilteredTermEnumPtr NumericRangeQuery::getEnum(IndexReaderPtr reader) { return newLucene(shared_from_this(), reader); } String NumericRangeQuery::getField() { return field; } bool NumericRangeQuery::includesMin() { return minInclusive; } bool NumericRangeQuery::includesMax() { return maxInclusive; } NumericValue NumericRangeQuery::getMin() { return min; } NumericValue NumericRangeQuery::getMax() { return min; } LuceneObjectPtr NumericRangeQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(field, precisionStep, valSize, min, max, minInclusive, maxInclusive)); NumericRangeQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->field = field; cloneQuery->precisionStep = precisionStep; cloneQuery->valSize = valSize; cloneQuery->min = min; cloneQuery->max = max; cloneQuery->minInclusive = minInclusive; cloneQuery->maxInclusive = maxInclusive; return cloneQuery; } String NumericRangeQuery::toString(const String& field) { StringStream buffer; if (this->field != field) buffer << this->field << L":"; buffer << (minInclusive ? L"[" : L"{"); if (VariantUtils::isNull(min)) buffer << L"*"; else buffer << min; buffer << L" TO "; if (VariantUtils::isNull(max)) buffer << L"*"; else buffer << max; buffer << (maxInclusive ? L"]" : L"}"); buffer << boostString(); return buffer.str(); } bool NumericRangeQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!MultiTermQuery::equals(other)) return false; NumericRangeQueryPtr otherNumericRangeQuery(boost::dynamic_pointer_cast(other)); if (!otherNumericRangeQuery) return false; return (field == otherNumericRangeQuery->field && min == otherNumericRangeQuery->min && max == otherNumericRangeQuery->max && minInclusive == otherNumericRangeQuery->minInclusive && maxInclusive == otherNumericRangeQuery->maxInclusive && precisionStep == otherNumericRangeQuery->precisionStep); } int32_t NumericRangeQuery::hashCode() { int32_t hash = MultiTermQuery::hashCode(); hash += StringUtils::hashCode(field) ^ 0x4565fd66 + precisionStep ^ 0x64365465; if (!VariantUtils::isNull(min)) hash += VariantUtils::hashCode(min) ^ 0x14fa55fb; if (!VariantUtils::isNull(max)) hash += VariantUtils::hashCode(max) ^ 0x733fa5fe; return hash + (MiscUtils::hashCode(minInclusive) ^ 0x14fa55fb) + (MiscUtils::hashCode(maxInclusive) ^ 0x733fa5fe); } NumericRangeTermEnum::NumericRangeTermEnum(NumericRangeQueryPtr query, IndexReaderPtr reader) { this->_query = query; this->reader = reader; this->rangeBounds = Collection::newInstance(); this->termTemplate = newLucene(query->field); switch (query->valSize) { case 64: { // lower int64_t minBound = std::numeric_limits::min(); if (VariantUtils::typeOf(query->min)) minBound = VariantUtils::get(query->min); else if (VariantUtils::typeOf(query->min)) minBound = NumericUtils::doubleToSortableLong(VariantUtils::get(query->min)); if (!query->minInclusive && !VariantUtils::isNull(query->min)) { if (minBound == std::numeric_limits::max()) break; ++minBound; } // upper int64_t maxBound = std::numeric_limits::max(); if (VariantUtils::typeOf(query->max)) maxBound = VariantUtils::get(query->max); else if (VariantUtils::typeOf(query->max)) maxBound = NumericUtils::doubleToSortableLong(VariantUtils::get(query->max)); if (!query->maxInclusive && !VariantUtils::isNull(query->max)) { if (maxBound == std::numeric_limits::min()) break; --maxBound; } NumericUtils::splitLongRange(newLucene(rangeBounds), query->precisionStep, minBound, maxBound); break; } case 32: { // lower int32_t minBound = INT_MIN; if (VariantUtils::typeOf(query->min)) minBound = VariantUtils::get(query->min); if (!query->minInclusive && !VariantUtils::isNull(query->min)) { if (minBound == INT_MAX) break; ++minBound; } // upper int32_t maxBound = INT_MAX; if (VariantUtils::typeOf(query->max)) maxBound = VariantUtils::get(query->max); if (!query->maxInclusive && !VariantUtils::isNull(query->max)) { if (maxBound == INT_MIN) break; --maxBound; } NumericUtils::splitIntRange(newLucene(rangeBounds), query->precisionStep, minBound, maxBound); break; } default: // should never happen boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); } // seek to first term next(); } NumericRangeTermEnum::~NumericRangeTermEnum() { } double NumericRangeTermEnum::difference() { return 1.0; } bool NumericRangeTermEnum::endEnum() { boost::throw_exception(UnsupportedOperationException(L"not implemented")); return false; } void NumericRangeTermEnum::setEnum(TermEnumPtr actualEnum) { boost::throw_exception(UnsupportedOperationException(L"not implemented")); } bool NumericRangeTermEnum::termCompare(TermPtr term) { return (term->field() == NumericRangeQueryPtr(_query)->field && term->text().compare(currentUpperBound) <= 0); } bool NumericRangeTermEnum::next() { // if a current term exists, the actual enum is initialized: try change to next term, if no // such term exists, fall-through if (currentTerm) { BOOST_ASSERT(actualEnum); if (actualEnum->next()) { currentTerm = actualEnum->term(); if (termCompare(currentTerm)) return true; } } // if all above fails, we go forward to the next enum, if one is available currentTerm.reset(); while (rangeBounds.size() >= 2) { BOOST_ASSERT(rangeBounds.size() % 2 == 0); // close the current enum and read next bounds if (actualEnum) { actualEnum->close(); actualEnum.reset(); } String lowerBound(rangeBounds.removeFirst()); currentUpperBound = rangeBounds.removeFirst(); // create a new enum actualEnum = reader->terms(termTemplate->createTerm(lowerBound)); currentTerm = actualEnum->term(); if (currentTerm && termCompare(currentTerm)) return true; // clear the current term for next iteration currentTerm.reset(); } // no more sub-range enums available BOOST_ASSERT(rangeBounds.empty() && !currentTerm); return false; } void NumericRangeTermEnum::close() { rangeBounds.clear(); currentUpperBound.clear(); FilteredTermEnum::close(); } NumericLongRangeBuilder::NumericLongRangeBuilder(Collection rangeBounds) { this->rangeBounds = rangeBounds; } NumericLongRangeBuilder::~NumericLongRangeBuilder() { } void NumericLongRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { rangeBounds.add(minPrefixCoded); rangeBounds.add(maxPrefixCoded); } NumericIntRangeBuilder::NumericIntRangeBuilder(Collection rangeBounds) { this->rangeBounds = rangeBounds; } NumericIntRangeBuilder::~NumericIntRangeBuilder() { } void NumericIntRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { rangeBounds.add(minPrefixCoded); rangeBounds.add(maxPrefixCoded); } } LucenePlusPlus-rel_3.0.4/src/core/search/ParallelMultiSearcher.cpp000066400000000000000000000113221217574114600252260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "ParallelMultiSearcher.h" #include "_MultiSearcher.h" #include "HitQueue.h" #include "FieldDocSortedHitQueue.h" #include "FieldDoc.h" #include "TopFieldDocs.h" #include "ThreadPool.h" namespace Lucene { ParallelMultiSearcher::ParallelMultiSearcher(Collection searchables) : MultiSearcher(searchables) { } ParallelMultiSearcher::~ParallelMultiSearcher() { } int32_t ParallelMultiSearcher::docFreq(TermPtr term) { ThreadPoolPtr threadPool(ThreadPool::getInstance()); Collection searchThreads(Collection::newInstance(searchables.size())); for (int32_t i = 0; i < searchables.size(); ++i) searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&Searchable::docFreq), searchables[i], term))); int32_t docFreq = 0; for (int32_t i = 0; i < searchThreads.size(); ++i) docFreq += searchThreads[i]->get(); return docFreq; } TopDocsPtr ParallelMultiSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n) { HitQueuePtr hq(newLucene(n, false)); SynchronizePtr lock(newInstance()); ThreadPoolPtr threadPool(ThreadPool::getInstance()); Collection searchThreads(Collection::newInstance(searchables.size())); Collection multiSearcher(Collection::newInstance(searchables.size())); for (int32_t i = 0; i < searchables.size(); ++i) // search each searchable { multiSearcher[i] = newLucene(lock, searchables[i], weight, filter, n, hq, i, starts); searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&MultiSearcherCallableNoSort::call), multiSearcher[i]))); } int32_t totalHits = 0; double maxScore = -std::numeric_limits::infinity(); for (int32_t i = 0; i < searchThreads.size(); ++i) { TopDocsPtr topDocs(searchThreads[i]->get()); totalHits += topDocs->totalHits; maxScore = std::max(maxScore, topDocs->maxScore); } Collection scoreDocs(Collection::newInstance(hq->size())); for (int32_t i = hq->size() - 1; i >= 0; --i) // put docs in array scoreDocs[i] = hq->pop(); return newLucene(totalHits, scoreDocs, maxScore); } TopFieldDocsPtr ParallelMultiSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) { if (!sort) boost::throw_exception(NullPointerException(L"sort must not be null")); FieldDocSortedHitQueuePtr hq(newLucene(n)); SynchronizePtr lock(newInstance()); ThreadPoolPtr threadPool(ThreadPool::getInstance()); Collection searchThreads(Collection::newInstance(searchables.size())); Collection multiSearcher(Collection::newInstance(searchables.size())); for (int32_t i = 0; i < searchables.size(); ++i) // search each searchable { multiSearcher[i] = newLucene(lock, searchables[i], weight, filter, n, hq, sort, i, starts); searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&MultiSearcherCallableWithSort::call), multiSearcher[i]))); } int32_t totalHits = 0; double maxScore = -std::numeric_limits::infinity(); for (int32_t i = 0; i < searchThreads.size(); ++i) { TopFieldDocsPtr topDocs(searchThreads[i]->get()); totalHits += topDocs->totalHits; maxScore = std::max(maxScore, topDocs->maxScore); } Collection scoreDocs(Collection::newInstance(hq->size())); for (int32_t i = hq->size() - 1; i >= 0; --i) // put docs in array scoreDocs[i] = hq->pop(); return newLucene(totalHits, scoreDocs, hq->getFields(), maxScore); } } LucenePlusPlus-rel_3.0.4/src/core/search/PhrasePositions.cpp000066400000000000000000000031641217574114600241410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PhrasePositions.h" #include "TermPositions.h" namespace Lucene { PhrasePositions::PhrasePositions(TermPositionsPtr t, int32_t o) { doc = 0; position = 0; count = 0; repeats = false; tp = t; offset = o; } PhrasePositions::~PhrasePositions() { } bool PhrasePositions::next() { if (!tp->next()) { tp->close(); // close stream doc = INT_MAX; // sentinel value return false; } doc = tp->doc(); position = 0; return true; } bool PhrasePositions::skipTo(int32_t target) { if (!tp->skipTo(target)) { tp->close(); // close stream doc = INT_MAX; // sentinel value return false; } doc = tp->doc(); position = 0; return true; } void PhrasePositions::firstPosition() { count = tp->freq(); // read first pos nextPosition(); } bool PhrasePositions::nextPosition() { if (count-- > 0) // read subsequent pos's { position = tp->nextPosition() - offset; return true; } else return false; } } LucenePlusPlus-rel_3.0.4/src/core/search/PhraseQuery.cpp000066400000000000000000000234311217574114600232560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PhraseQuery.h" #include "_PhraseQuery.h" #include "Similarity.h" #include "Term.h" #include "TermPositions.h" #include "TermQuery.h" #include "IndexReader.h" #include "ExactPhraseScorer.h" #include "SloppyPhraseScorer.h" #include "Explanation.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { PhraseQuery::PhraseQuery() { terms = Collection::newInstance(); positions = Collection::newInstance(); maxPosition = 0; slop = 0; } PhraseQuery::~PhraseQuery() { } void PhraseQuery::setSlop(int32_t slop) { this->slop = slop; } int32_t PhraseQuery::getSlop() { return slop; } void PhraseQuery::add(TermPtr term) { int32_t position = 0; if (!positions.empty()) position = positions[positions.size() - 1] + 1; add(term, position); } void PhraseQuery::add(TermPtr term, int32_t position) { if (terms.empty()) field = term->field(); else if (term->field() != field) boost::throw_exception(IllegalArgumentException(L"All phrase terms must be in the same field: " + term->toString())); terms.add(term); positions.add(position); if (position > maxPosition) maxPosition = position; } Collection PhraseQuery::getTerms() { return terms; } Collection PhraseQuery::getPositions() { return positions; } WeightPtr PhraseQuery::createWeight(SearcherPtr searcher) { if (terms.size() == 1) // optimize one-term case { QueryPtr termQuery(newLucene(terms[0])); termQuery->setBoost(getBoost()); return termQuery->createWeight(searcher); } return newLucene(shared_from_this(), searcher); } void PhraseQuery::extractTerms(SetTerm terms) { terms.addAll(this->terms.begin(), this->terms.end()); } String PhraseQuery::toString(const String& field) { StringStream buffer; if (this->field != field) buffer << this->field << L":"; buffer << L"\""; Collection pieces(Collection::newInstance(maxPosition + 1)); for (int32_t i = 0; i < terms.size(); ++i) { int32_t pos = positions[i]; String s(pieces[pos]); if (!s.empty()) s += L"|"; s += terms[i]->text(); pieces[pos] = s; } for (int32_t i = 0; i < pieces.size(); ++i) { if (i > 0) buffer << L" "; String s(pieces[i]); buffer << (s.empty() ? L"?" : s); } buffer << L"\""; if (slop != 0) buffer << L"~" << slop; buffer << boostString(); return buffer.str(); } bool PhraseQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; PhraseQueryPtr otherPhraseQuery(boost::dynamic_pointer_cast(other)); if (!otherPhraseQuery) return false; return (getBoost() == otherPhraseQuery->getBoost() && slop == otherPhraseQuery->slop && terms.equals(otherPhraseQuery->terms, luceneEquals()) && positions.equals(otherPhraseQuery->positions)); } int32_t PhraseQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ slop ^ MiscUtils::hashCode(terms.begin(), terms.end(), MiscUtils::hashLucene) ^ MiscUtils::hashCode(positions.begin(), positions.end(), MiscUtils::hashNumeric); } LuceneObjectPtr PhraseQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); PhraseQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->field = field; cloneQuery->terms = terms; cloneQuery->positions = positions; cloneQuery->maxPosition = maxPosition; cloneQuery->slop = slop; return cloneQuery; } PhraseWeight::PhraseWeight(PhraseQueryPtr query, SearcherPtr searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); this->value = 0.0; this->idf = 0.0; this->queryNorm = 0.0; this->queryWeight = 0.0; this->idfExp = similarity->idfExplain(query->terms, searcher); idf = idfExp->getIdf(); } PhraseWeight::~PhraseWeight() { } String PhraseWeight::toString() { return L"weight(" + query->toString() + L")"; } QueryPtr PhraseWeight::getQuery() { return query; } double PhraseWeight::getValue() { return value; } double PhraseWeight::sumOfSquaredWeights() { queryWeight = idf * getQuery()->getBoost(); // compute query weight return queryWeight * queryWeight; // square it } void PhraseWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; // normalize query weight value = queryWeight * idf; // idf for document } ScorerPtr PhraseWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { if (query->terms.empty()) // optimize zero-term case return ScorerPtr(); Collection tps(Collection::newInstance(query->terms.size())); for (int32_t i = 0; i < tps.size(); ++i) { TermPositionsPtr p(reader->termPositions(query->terms[i])); if (!p) return ScorerPtr(); tps[i] = p; } if (query->slop == 0) // optimize exact case return newLucene(shared_from_this(), tps, query->getPositions(), similarity, reader->norms(query->field)); else return newLucene(shared_from_this(), tps, query->getPositions(), similarity, query->slop, reader->norms(query->field)); } ExplanationPtr PhraseWeight::explain(IndexReaderPtr reader, int32_t doc) { ExplanationPtr result(newLucene()); result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); StringStream docFreqsBuffer; StringStream queryBuffer; queryBuffer << L"\""; docFreqsBuffer << idfExp->explain(); for (Collection::iterator term = query->terms.begin(); term != query->terms.end(); ++term) { if (term != query->terms.begin()) queryBuffer << L" "; queryBuffer << (*term)->text(); } queryBuffer << L"\""; ExplanationPtr idfExpl(newLucene(idf, L"idf(" + query->field + L":" + docFreqsBuffer.str() + L")")); // explain query weight ExplanationPtr queryExpl(newLucene()); queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); if (query->getBoost() != 1.0) queryExpl->addDetail(boostExpl); queryExpl->addDetail(idfExpl); ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); queryExpl->addDetail(queryNormExpl); queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); result->addDetail(queryExpl); // explain field weight ExplanationPtr fieldExpl(newLucene()); fieldExpl->setDescription(L"fieldWeight(" + query->field + L":" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast(scorer(reader, true, false))); if (!phraseScorer) return newLucene(0.0, L"no matching docs"); ExplanationPtr tfExplanation(newLucene()); int32_t d = phraseScorer->advance(doc); double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0; tfExplanation->setValue(similarity->tf(phraseFreq)); tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); fieldExpl->addDetail(tfExplanation); fieldExpl->addDetail(idfExpl); ExplanationPtr fieldNormExpl(newLucene()); ByteArray fieldNorms(reader->norms(query->field)); double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; fieldNormExpl->setValue(fieldNorm); fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")"); fieldExpl->addDetail(fieldNormExpl); fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); result->addDetail(fieldExpl); // combine them result->setValue(queryExpl->getValue() * fieldExpl->getValue()); if (queryExpl->getValue() == 1.0) return fieldExpl; return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/PhraseQueue.cpp000066400000000000000000000022301217574114600232270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PhraseQueue.h" #include "PhrasePositions.h" namespace Lucene { PhraseQueue::PhraseQueue(int32_t size) : PriorityQueue(size) { } PhraseQueue::~PhraseQueue() { } bool PhraseQueue::lessThan(const PhrasePositionsPtr& first, const PhrasePositionsPtr& second) { if (first->doc == second->doc) { if (first->position == second->position) { // same doc and pp.position, so decide by actual term positions. // rely on: pp.position == tp.position - offset. return first->offset < second->offset; } else return first->position < second->position; } else return first->doc < second->doc; } } LucenePlusPlus-rel_3.0.4/src/core/search/PhraseScorer.cpp000066400000000000000000000105471217574114600234120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PhraseScorer.h" #include "PhrasePositions.h" #include "PhraseQueue.h" #include "Weight.h" #include "Similarity.h" namespace Lucene { PhraseScorer::PhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, ByteArray norms) : Scorer(similarity) { this->firstTime = true; this->more = true; this->freq = 0.0; this->norms = norms; this->weight = weight; this->value = weight->getValue(); // convert tps to a list of phrase positions. // Note: phrase-position differs from term-position in that its position reflects the phrase offset: pp.pos = tp.pos - offset. // This allows to easily identify a matching (exact) phrase when all PhrasePositions have exactly the same position. for (int32_t i = 0; i < tps.size(); ++i) { PhrasePositionsPtr pp(newLucene(tps[i], offsets[i])); if (last) // add next to end of list last->_next = pp; else first = pp; last = pp; } pq = newLucene(tps.size()); // construct empty pq first->doc = -1; } PhraseScorer::~PhraseScorer() { } int32_t PhraseScorer::docID() { return first->doc; } int32_t PhraseScorer::nextDoc() { if (firstTime) { init(); firstTime = false; } else if (more) more = last->next(); // trigger further scanning if (!doNext()) first->doc = NO_MORE_DOCS; return first->doc; } bool PhraseScorer::doNext() { while (more) { while (more && first->doc < last->doc) // find doc with all the terms { more = first->skipTo(last->doc); // skip first upto last and move it to the end firstToLast(); } if (more) { // found a doc with all of the terms freq = phraseFreq(); // check for phrase if (freq == 0.0) // no match more = last->next(); // trigger further scanning else return true; } } return false; // no more matches } double PhraseScorer::score() { double raw = getSimilarity()->tf(freq) * value; // raw score return !norms ? raw : raw * Similarity::decodeNorm(norms[first->doc]); // normalize } int32_t PhraseScorer::advance(int32_t target) { firstTime = false; for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) more = pp->skipTo(target); if (more) sort(); // re-sort if (!doNext()) first->doc = NO_MORE_DOCS; return first->doc; } double PhraseScorer::currentFreq() { return freq; } void PhraseScorer::init() { for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) more = pp->next(); if (more) sort(); } void PhraseScorer::sort() { pq->clear(); for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) pq->add(pp); pqToList(); } void PhraseScorer::pqToList() { last.reset(); first.reset(); while (pq->top()) { PhrasePositionsPtr pp(pq->pop()); if (last) // add next to end of list last->_next = pp; else first = pp; last = pp; pp->_next.reset(); } } void PhraseScorer::firstToLast() { last->_next = first; // move first to end of list last = first; first = first->_next; last->_next.reset(); } String PhraseScorer::toString() { return L"scorer(" + weight->toString() + L")"; } } LucenePlusPlus-rel_3.0.4/src/core/search/PositiveScoresOnlyCollector.cpp000066400000000000000000000026031217574114600264760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PositiveScoresOnlyCollector.h" #include "ScoreCachingWrappingScorer.h" namespace Lucene { PositiveScoresOnlyCollector::PositiveScoresOnlyCollector(CollectorPtr collector) { this->collector = collector; } PositiveScoresOnlyCollector::~PositiveScoresOnlyCollector() { } void PositiveScoresOnlyCollector::collect(int32_t doc) { if (scorer->score() > 0) collector->collect(doc); } void PositiveScoresOnlyCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) { collector->setNextReader(reader, docBase); } void PositiveScoresOnlyCollector::setScorer(ScorerPtr scorer) { // Set a ScoreCachingWrappingScorer in case the wrapped Collector will call score() also. this->scorer = newLucene(scorer); collector->setScorer(this->scorer); } bool PositiveScoresOnlyCollector::acceptsDocsOutOfOrder() { return collector->acceptsDocsOutOfOrder(); } } LucenePlusPlus-rel_3.0.4/src/core/search/PrefixFilter.cpp000066400000000000000000000016521217574114600234120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PrefixFilter.h" #include "PrefixQuery.h" #include "Term.h" namespace Lucene { PrefixFilter::PrefixFilter(TermPtr prefix) : MultiTermQueryWrapperFilter(newLucene(prefix)) { } PrefixFilter::~PrefixFilter() { } TermPtr PrefixFilter::getPrefix() { return boost::static_pointer_cast(query)->getPrefix(); } String PrefixFilter::toString() { StringStream buffer; buffer << L"PrefixFilter(" << getPrefix()->toString() << L")"; return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/search/PrefixQuery.cpp000066400000000000000000000044251217574114600232730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PrefixQuery.h" #include "PrefixTermEnum.h" #include "Term.h" #include "MiscUtils.h" namespace Lucene { PrefixQuery::PrefixQuery(TermPtr prefix) { this->prefix = prefix; } PrefixQuery::~PrefixQuery() { } TermPtr PrefixQuery::getPrefix() { return prefix; } FilteredTermEnumPtr PrefixQuery::getEnum(IndexReaderPtr reader) { return newLucene(reader, prefix); } String PrefixQuery::toString(const String& field) { StringStream buffer; if (prefix->field() != field) buffer << prefix->field() << L":"; buffer << prefix->text() << L"*" << boostString(); return buffer.str(); } LuceneObjectPtr PrefixQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(prefix)); PrefixQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->prefix = prefix; return cloneQuery; } int32_t PrefixQuery::hashCode() { int32_t prime = 31; int32_t result = MultiTermQuery::hashCode(); result = prime * result + (prefix ? prefix->hashCode() : 0); return result; } bool PrefixQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!MultiTermQuery::equals(other)) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; PrefixQueryPtr otherPrefixQuery(boost::dynamic_pointer_cast(other)); if (!otherPrefixQuery) return false; if (!prefix) { if (otherPrefixQuery->prefix) return false; } else if (!prefix->equals(otherPrefixQuery->prefix)) return false; return true; } } LucenePlusPlus-rel_3.0.4/src/core/search/PrefixTermEnum.cpp000066400000000000000000000023601217574114600237160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "PrefixTermEnum.h" #include "IndexReader.h" #include "Term.h" namespace Lucene { PrefixTermEnum::PrefixTermEnum(IndexReaderPtr reader, TermPtr prefix) { this->_endEnum = false; this->prefix = prefix; setEnum(reader->terms(newLucene(prefix->field(), prefix->text()))); } PrefixTermEnum::~PrefixTermEnum() { } double PrefixTermEnum::difference() { return 1.0; } bool PrefixTermEnum::endEnum() { return _endEnum; } TermPtr PrefixTermEnum::getPrefixTerm() { return prefix; } bool PrefixTermEnum::termCompare(TermPtr term) { if (term->field() == prefix->field() && boost::starts_with(term->text(), prefix->text())) return true; _endEnum = true; return false; } } LucenePlusPlus-rel_3.0.4/src/core/search/Query.cpp000066400000000000000000000125721217574114600221170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Query.h" #include "BooleanQuery.h" #include "Searcher.h" #include "Similarity.h" #include "MiscUtils.h" namespace Lucene { Query::Query() { boost = 1.0; } Query::~Query() { } void Query::setBoost(double boost) { this->boost = boost; } double Query::getBoost() { return boost; } String Query::toString(const String& field) { return L""; // override } String Query::toString() { return toString(L""); } WeightPtr Query::createWeight(SearcherPtr searcher) { boost::throw_exception(UnsupportedOperationException()); return WeightPtr(); } WeightPtr Query::weight(SearcherPtr searcher) { QueryPtr query(searcher->rewrite(shared_from_this())); WeightPtr weight(query->createWeight(searcher)); double sum = weight->sumOfSquaredWeights(); double norm = getSimilarity(searcher)->queryNorm(sum); if (MiscUtils::isInfinite(norm) || MiscUtils::isNaN(norm)) norm = 1.0; weight->normalize(norm); return weight; } QueryPtr Query::rewrite(IndexReaderPtr reader) { return shared_from_this(); } QueryPtr Query::combine(Collection queries) { SetQuery uniques(SetQuery::newInstance()); for (Collection::iterator query = queries.begin(); query != queries.end(); ++query) { Collection clauses; BooleanQueryPtr bq(boost::dynamic_pointer_cast(*query)); // check if we can split the query into clauses bool splittable = bq; if (splittable) { splittable = bq->isCoordDisabled(); clauses = bq->getClauses(); for (Collection::iterator clause = clauses.begin(); splittable && clause != clauses.end(); ++clause) splittable = ((*clause)->getOccur() == BooleanClause::SHOULD); } if (splittable) { for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) uniques.add((*clause)->getQuery()); } else uniques.add(*query); } // optimization: if we have just one query, just return it if (uniques.size() == 1) return *uniques.begin(); BooleanQueryPtr result(newLucene(true)); for (SetQuery::iterator query = uniques.begin(); query != uniques.end(); ++query) result->add(*query, BooleanClause::SHOULD); return result; } void Query::extractTerms(SetTerm terms) { // needs to be implemented by query subclasses boost::throw_exception(UnsupportedOperationException()); } QueryPtr Query::mergeBooleanQueries(Collection queries) { SetBooleanClause allClauses(SetBooleanClause::newInstance()); for (Collection::iterator booleanQuery = queries.begin(); booleanQuery != queries.end(); ++booleanQuery) { for (Collection::iterator clause = (*booleanQuery)->begin(); clause != (*booleanQuery)->end(); ++clause) allClauses.add(*clause); } bool coordDisabled = queries.empty() ? false : queries[0]->isCoordDisabled(); BooleanQueryPtr result(newLucene(coordDisabled)); for (SetBooleanClause::iterator clause2 = allClauses.begin(); clause2 != allClauses.end(); ++clause2) result->add(*clause2); return result; } SimilarityPtr Query::getSimilarity(SearcherPtr searcher) { return searcher->getSimilarity(); } LuceneObjectPtr Query::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); QueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->boost = boost; return cloneQuery; } int32_t Query::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + MiscUtils::doubleToIntBits(boost); return result; } bool Query::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!other) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; QueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; return (boost == otherQuery->boost); } String Query::boostString() { double boost = getBoost(); if (boost == 1.0) return L""; StringStream boostString; boostString.precision(1); boostString.setf(std::ios::fixed); boostString << L"^" << boost; return boostString.str(); } } LucenePlusPlus-rel_3.0.4/src/core/search/QueryTermVector.cpp000066400000000000000000000105571217574114600241330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryTermVector.h" #include "Analyzer.h" #include "TokenStream.h" #include "StringReader.h" #include "TermAttribute.h" namespace Lucene { QueryTermVector::QueryTermVector(Collection queryTerms) { terms = Collection::newInstance(); termFreqs = Collection::newInstance(); processTerms(queryTerms); } QueryTermVector::QueryTermVector(const String& queryString, AnalyzerPtr analyzer) { terms = Collection::newInstance(); termFreqs = Collection::newInstance(); if (analyzer) { TokenStreamPtr stream(analyzer->tokenStream(L"", newLucene(queryString))); if (stream) { Collection terms = Collection::newInstance(); try { bool hasMoreTokens = false; stream->reset(); TermAttributePtr termAtt(stream->addAttribute()); hasMoreTokens = stream->incrementToken(); while (hasMoreTokens) { terms.add(termAtt->term()); hasMoreTokens = stream->incrementToken(); } processTerms(terms); } catch (IOException&) { } } } } QueryTermVector::~QueryTermVector() { } void QueryTermVector::processTerms(Collection queryTerms) { if (queryTerms) { std::sort(queryTerms.begin(), queryTerms.end()); MapStringInt tmpSet(MapStringInt::newInstance()); // filter out duplicates Collection tmpList(Collection::newInstance()); Collection tmpFreqs(Collection::newInstance()); int32_t j = 0; for (int32_t i = 0; i < queryTerms.size(); ++i) { String term(queryTerms[i]); MapStringInt::iterator position = tmpSet.find(term); if (position == tmpSet.end()) { tmpSet.put(term, j++); tmpList.add(term); tmpFreqs.add(1); } else { int32_t freq = tmpFreqs[position->second]; tmpFreqs[position->second] = freq + 1; } } terms = tmpList; termFreqs = Collection::newInstance(tmpFreqs.size()); int32_t i = 0; for (Collection::iterator freq = tmpFreqs.begin(); freq != tmpFreqs.end(); ++freq) termFreqs[i++] = *freq; } } String QueryTermVector::toString() { StringStream buffer; buffer << L"{"; for (int32_t i = 0; i < terms.size(); ++i) { if (i > 0) buffer << L", "; buffer << terms[i] << L'/' << termFreqs[i]; } buffer << L"}"; return buffer.str(); } int32_t QueryTermVector::size() { return terms.size(); } Collection QueryTermVector::getTerms() { return terms; } Collection QueryTermVector::getTermFrequencies() { return termFreqs; } int32_t QueryTermVector::indexOf(const String& term) { Collection::iterator search = std::lower_bound(terms.begin(), terms.end(), term); return (search == terms.end() || term < *search) ? -1 : std::distance(terms.begin(), search); } Collection QueryTermVector::indexesOf(Collection terms, int32_t start, int32_t length) { Collection res(Collection::newInstance(length)); for (int32_t i = 0; i < length; ++i) res[i] = indexOf(terms[i]); return res; } } LucenePlusPlus-rel_3.0.4/src/core/search/QueryWrapperFilter.cpp000066400000000000000000000036521217574114600246250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryWrapperFilter.h" #include "_QueryWrapperFilter.h" #include "Query.h" #include "Weight.h" #include "Scorer.h" #include "IndexSearcher.h" namespace Lucene { QueryWrapperFilter::QueryWrapperFilter(QueryPtr query) { this->query = query; } QueryWrapperFilter::~QueryWrapperFilter() { } DocIdSetPtr QueryWrapperFilter::getDocIdSet(IndexReaderPtr reader) { WeightPtr weight(query->weight(newLucene(reader))); return newLucene(reader, weight); } String QueryWrapperFilter::toString() { return L"QueryWrapperFilter(" + query->toString() + L")"; } bool QueryWrapperFilter::equals(LuceneObjectPtr other) { QueryWrapperFilterPtr otherQueryWrapperFilter(boost::dynamic_pointer_cast(other)); if (!otherQueryWrapperFilter) return false; return this->query->equals(otherQueryWrapperFilter->query); } int32_t QueryWrapperFilter::hashCode() { return query->hashCode() ^ 0x923F64B9; } QueryWrapperFilterDocIdSet::QueryWrapperFilterDocIdSet(IndexReaderPtr reader, WeightPtr weight) { this->reader = reader; this->weight = weight; } QueryWrapperFilterDocIdSet::~QueryWrapperFilterDocIdSet() { } DocIdSetIteratorPtr QueryWrapperFilterDocIdSet::iterator() { return weight->scorer(reader, true, false); } bool QueryWrapperFilterDocIdSet::isCacheable() { return false; } } LucenePlusPlus-rel_3.0.4/src/core/search/ReqExclScorer.cpp000066400000000000000000000051551217574114600235320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReqExclScorer.h" namespace Lucene { ReqExclScorer::ReqExclScorer(ScorerPtr reqScorer, DocIdSetIteratorPtr exclDisi) : Scorer(SimilarityPtr()) // No similarity used. { this->reqScorer = reqScorer; this->exclDisi = exclDisi; this->doc = -1; } ReqExclScorer::~ReqExclScorer() { } int32_t ReqExclScorer::nextDoc() { if (!reqScorer) return doc; doc = reqScorer->nextDoc(); if (doc == NO_MORE_DOCS) { reqScorer.reset(); // exhausted, nothing left return doc; } if (!exclDisi) return doc; doc = toNonExcluded(); return doc; } int32_t ReqExclScorer::toNonExcluded() { int32_t exclDoc = exclDisi->docID(); int32_t reqDoc = reqScorer->docID(); // may be excluded do { if (reqDoc < exclDoc) return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded else if (reqDoc > exclDoc) { exclDoc = exclDisi->advance(reqDoc); if (exclDoc == NO_MORE_DOCS) { exclDisi.reset(); // exhausted, no more exclusions return reqDoc; } if (exclDoc > reqDoc) return reqDoc; // not excluded } } while ((reqDoc = reqScorer->nextDoc()) != NO_MORE_DOCS); reqScorer.reset(); // exhausted, nothing left return NO_MORE_DOCS; } int32_t ReqExclScorer::docID() { return doc; } double ReqExclScorer::score() { return reqScorer->score(); // reqScorer may be null when next() or skipTo() already return false } int32_t ReqExclScorer::advance(int32_t target) { if (!reqScorer) { doc = NO_MORE_DOCS; return doc; } if (!exclDisi) { doc = reqScorer->advance(target); return doc; } if (reqScorer->advance(target) == NO_MORE_DOCS) { reqScorer.reset(); doc = NO_MORE_DOCS; return doc; } doc = toNonExcluded(); return doc; } } LucenePlusPlus-rel_3.0.4/src/core/search/ReqOptSumScorer.cpp000066400000000000000000000027411217574114600240640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReqOptSumScorer.h" namespace Lucene { ReqOptSumScorer::ReqOptSumScorer(ScorerPtr reqScorer, ScorerPtr optScorer) : Scorer(SimilarityPtr()) // No similarity used. { this->reqScorer = reqScorer; this->optScorer = optScorer; } ReqOptSumScorer::~ReqOptSumScorer() { } int32_t ReqOptSumScorer::nextDoc() { return reqScorer->nextDoc(); } int32_t ReqOptSumScorer::advance(int32_t target) { return reqScorer->advance(target); } int32_t ReqOptSumScorer::docID() { return reqScorer->docID(); } double ReqOptSumScorer::score() { int32_t curDoc = reqScorer->docID(); double reqScore = reqScorer->score(); if (!optScorer) return reqScore; int32_t optScorerDoc = optScorer->docID(); if (optScorerDoc < curDoc && (optScorerDoc = optScorer->advance(curDoc)) == NO_MORE_DOCS) { optScorer.reset(); return reqScore; } return optScorerDoc == curDoc ? reqScore + optScorer->score() : reqScore; } } LucenePlusPlus-rel_3.0.4/src/core/search/ScoreCachingWrappingScorer.cpp000066400000000000000000000034121217574114600262210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ScoreCachingWrappingScorer.h" namespace Lucene { ScoreCachingWrappingScorer::ScoreCachingWrappingScorer(ScorerPtr scorer) : Scorer(scorer->getSimilarity()) { this->curDoc = -1; this->curScore = 0.0; this->_scorer = scorer; } ScoreCachingWrappingScorer::~ScoreCachingWrappingScorer() { } bool ScoreCachingWrappingScorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) { return ScorerPtr(_scorer)->score(collector, max, firstDocID); } SimilarityPtr ScoreCachingWrappingScorer::getSimilarity() { return ScorerPtr(_scorer)->getSimilarity(); } double ScoreCachingWrappingScorer::score() { ScorerPtr scorer(_scorer); int32_t doc = scorer->docID(); if (doc != curDoc) { curScore = scorer->score(); curDoc = doc; } return curScore; } int32_t ScoreCachingWrappingScorer::docID() { return ScorerPtr(_scorer)->docID(); } int32_t ScoreCachingWrappingScorer::nextDoc() { return ScorerPtr(_scorer)->nextDoc(); } void ScoreCachingWrappingScorer::score(CollectorPtr collector) { ScorerPtr(_scorer)->score(collector); } int32_t ScoreCachingWrappingScorer::advance(int32_t target) { return ScorerPtr(_scorer)->advance(target); } } LucenePlusPlus-rel_3.0.4/src/core/search/ScoreDoc.cpp000066400000000000000000000013311217574114600225020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ScoreDoc.h" namespace Lucene { ScoreDoc::ScoreDoc(int32_t doc, double score) { this->doc = doc; this->score = score; } ScoreDoc::~ScoreDoc() { } String ScoreDoc::toString() { StringStream buffer; buffer << L"doc=" << doc << L" score=" << score; return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/search/Scorer.cpp000066400000000000000000000022611217574114600222410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Scorer.h" #include "Collector.h" namespace Lucene { Scorer::Scorer(SimilarityPtr similarity) { this->similarity = similarity; } Scorer::~Scorer() { } SimilarityPtr Scorer::getSimilarity() { return similarity; } void Scorer::score(CollectorPtr collector) { collector->setScorer(shared_from_this()); int32_t doc; while ((doc = nextDoc()) != NO_MORE_DOCS) collector->collect(doc); } bool Scorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) { collector->setScorer(shared_from_this()); int32_t doc = firstDocID; while (doc < max) { collector->collect(doc); doc = nextDoc(); } return (doc != NO_MORE_DOCS); } } LucenePlusPlus-rel_3.0.4/src/core/search/Searchable.cpp000066400000000000000000000037201217574114600230360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Searchable.h" namespace Lucene { void Searchable::search(WeightPtr weight, FilterPtr filter, CollectorPtr collector) { BOOST_ASSERT(false); // override } void Searchable::close() { BOOST_ASSERT(false); // override } int32_t Searchable::docFreq(TermPtr term) { BOOST_ASSERT(false); return 0; // override } Collection Searchable::docFreqs(Collection terms) { BOOST_ASSERT(false); return Collection(); // override } int32_t Searchable::maxDoc() { BOOST_ASSERT(false); return 0; // override } TopDocsPtr Searchable::search(WeightPtr weight, FilterPtr filter, int32_t n) { BOOST_ASSERT(false); return TopDocsPtr(); // override } DocumentPtr Searchable::doc(int32_t n) { BOOST_ASSERT(false); return DocumentPtr(); // override } DocumentPtr Searchable::doc(int32_t n, FieldSelectorPtr fieldSelector) { BOOST_ASSERT(false); return DocumentPtr(); // override } QueryPtr Searchable::rewrite(QueryPtr query) { BOOST_ASSERT(false); return QueryPtr(); // override } ExplanationPtr Searchable::explain(WeightPtr weight, int32_t doc) { BOOST_ASSERT(false); return ExplanationPtr(); // override } TopFieldDocsPtr Searchable::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) { BOOST_ASSERT(false); return TopFieldDocsPtr(); // override } } LucenePlusPlus-rel_3.0.4/src/core/search/Searcher.cpp000066400000000000000000000040471217574114600225440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Searcher.h" #include "Similarity.h" #include "Query.h" #include "Collector.h" namespace Lucene { Searcher::Searcher() { similarity = Similarity::getDefault(); } Searcher::~Searcher() { } TopFieldDocsPtr Searcher::search(QueryPtr query, FilterPtr filter, int32_t n, SortPtr sort) { return search(createWeight(query), filter, n, sort); } void Searcher::search(QueryPtr query, CollectorPtr results) { search(createWeight(query), FilterPtr(), results); } void Searcher::search(QueryPtr query, FilterPtr filter, CollectorPtr results) { search(createWeight(query), filter, results); } TopDocsPtr Searcher::search(QueryPtr query, FilterPtr filter, int32_t n) { return search(createWeight(query), filter, n); } TopDocsPtr Searcher::search(QueryPtr query, int32_t n) { return search(query, FilterPtr(), n); } ExplanationPtr Searcher::explain(QueryPtr query, int32_t doc) { return explain(createWeight(query), doc); } void Searcher::setSimilarity(SimilarityPtr similarity) { this->similarity = similarity; } SimilarityPtr Searcher::getSimilarity() { return this->similarity; } WeightPtr Searcher::createWeight(QueryPtr query) { return query->weight(shared_from_this()); } Collection Searcher::docFreqs(Collection terms) { Collection result(Collection::newInstance(terms.size())); for (int32_t i = 0; i < terms.size(); ++i) result[i] = docFreq(terms[i]); return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/Similarity.cpp000066400000000000000000000072421217574114600231360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Similarity.h" #include "_Similarity.h" #include "DefaultSimilarity.h" #include "FieldInvertState.h" #include "Searcher.h" #include "Term.h" #include "SmallDouble.h" #include "StringUtils.h" namespace Lucene { const int32_t Similarity::NO_DOC_ID_PROVIDED = -1; Similarity::Similarity() { } Similarity::~Similarity() { } SimilarityPtr Similarity::getDefault() { static SimilarityPtr defaultImpl; if (!defaultImpl) { defaultImpl = newLucene(); CycleCheck::addStatic(defaultImpl); } return defaultImpl; } const Collection Similarity::NORM_TABLE() { static Collection _NORM_TABLE; if (!_NORM_TABLE) { _NORM_TABLE = Collection::newInstance(256); for (int32_t i = 0; i < 256; ++i) _NORM_TABLE[i] = SmallDouble::byteToDouble((uint8_t)i); } return _NORM_TABLE; } double Similarity::decodeNorm(uint8_t b) { return NORM_TABLE()[b & 0xff]; // & 0xff maps negative bytes to positive above 127 } const Collection Similarity::getNormDecoder() { return NORM_TABLE(); } double Similarity::computeNorm(const String& fieldName, FieldInvertStatePtr state) { return (double)(state->getBoost() * lengthNorm(fieldName, state->getLength())); } uint8_t Similarity::encodeNorm(double f) { return SmallDouble::doubleToByte(f); } double Similarity::tf(int32_t freq) { return tf((double)freq); } IDFExplanationPtr Similarity::idfExplain(TermPtr term, SearcherPtr searcher) { int32_t df = searcher->docFreq(term); int32_t max = searcher->maxDoc(); double _idf = idf(df, max); return newLucene(df, max, _idf); } IDFExplanationPtr Similarity::idfExplain(Collection terms, SearcherPtr searcher) { int32_t max = searcher->maxDoc(); double _idf = 0.0; String exp; for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { int32_t df = searcher->docFreq(*term); _idf += idf(df, max); exp += L" " + (*term)->text() + L"=" + StringUtils::toString(df); } return newLucene(exp, _idf); } double Similarity::scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) { return 1.0; } SimilarityIDFExplanation::SimilarityIDFExplanation(int32_t df, int32_t max, double idf) { this->df = df; this->max = max; this->idf = idf; } SimilarityIDFExplanation::SimilarityIDFExplanation(const String& exp, double idf) { this->exp = exp; this->idf = idf; } SimilarityIDFExplanation::~SimilarityIDFExplanation() { } String SimilarityIDFExplanation::explain() { return !exp.empty() ? exp : L"idf(docFreq=" + StringUtils::toString(df) + L", maxDocs=" + StringUtils::toString(max) + L")"; } double SimilarityIDFExplanation::getIdf() { return idf; } } LucenePlusPlus-rel_3.0.4/src/core/search/SimilarityDelegator.cpp000066400000000000000000000034631217574114600247660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SimilarityDelegator.h" namespace Lucene { SimilarityDelegator::SimilarityDelegator(SimilarityPtr delegee) { this->delegee = delegee; } SimilarityDelegator::~SimilarityDelegator() { } double SimilarityDelegator::computeNorm(const String& fieldName, FieldInvertStatePtr state) { return delegee->computeNorm(fieldName, state); } double SimilarityDelegator::lengthNorm(const String& fieldName, int32_t numTokens) { return delegee->lengthNorm(fieldName, numTokens); } double SimilarityDelegator::queryNorm(double sumOfSquaredWeights) { return delegee->queryNorm(sumOfSquaredWeights); } double SimilarityDelegator::tf(double freq) { return delegee->tf(freq); } double SimilarityDelegator::sloppyFreq(int32_t distance) { return delegee->sloppyFreq(distance); } double SimilarityDelegator::idf(int32_t docFreq, int32_t numDocs) { return delegee->idf(docFreq, numDocs); } double SimilarityDelegator::coord(int32_t overlap, int32_t maxOverlap) { return delegee->coord(overlap, maxOverlap); } double SimilarityDelegator::scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) { return delegee->scorePayload(docId, fieldName, start, end, payload, offset, length); } } LucenePlusPlus-rel_3.0.4/src/core/search/SingleTermEnum.cpp000066400000000000000000000020271217574114600237020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SingleTermEnum.h" #include "IndexReader.h" #include "Term.h" namespace Lucene { SingleTermEnum::SingleTermEnum(IndexReaderPtr reader, TermPtr singleTerm) { this->_endEnum = false; this->singleTerm = singleTerm; setEnum(reader->terms(singleTerm)); } SingleTermEnum::~SingleTermEnum() { } double SingleTermEnum::difference() { return 1.0; } bool SingleTermEnum::endEnum() { return _endEnum; } bool SingleTermEnum::termCompare(TermPtr term) { if (term->equals(singleTerm)) return true; _endEnum = true; return false; } } LucenePlusPlus-rel_3.0.4/src/core/search/SloppyPhraseScorer.cpp000066400000000000000000000146501217574114600246200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SloppyPhraseScorer.h" #include "PhrasePositions.h" #include "PhraseQueue.h" #include "Similarity.h" namespace Lucene { SloppyPhraseScorer::SloppyPhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, int32_t slop, ByteArray norms) : PhraseScorer(weight, tps, offsets, similarity, norms) { this->slop = slop; this->checkedRepeats = false; } SloppyPhraseScorer::~SloppyPhraseScorer() { } double SloppyPhraseScorer::phraseFreq() { int32_t end = initPhrasePositions(); double freq = 0.0; bool done = (end < 0); while (!done) { PhrasePositionsPtr pp(pq->pop()); int32_t start = pp->position; int32_t next = pq->top()->position; bool tpsDiffer = true; for (int32_t pos = start; pos <= next || !tpsDiffer; pos = pp->position) { if (pos<=next && tpsDiffer) start = pos; // advance pp to min window if (!pp->nextPosition()) { done = true; // ran out of a term - done break; } PhrasePositionsPtr pp2; tpsDiffer = (!pp->repeats || !(pp2 = termPositionsDiffer(pp))); if (pp2 && pp2 != pp) pp = flip(pp, pp2); // flip pp to pp2 } int32_t matchLength = end - start; if (matchLength <= slop) freq += getSimilarity()->sloppyFreq(matchLength); // score match if (pp->position > end) end = pp->position; pq->add(pp); // restore pq } return freq; } PhrasePositionsPtr SloppyPhraseScorer::flip(PhrasePositionsPtr pp, PhrasePositionsPtr pp2) { int32_t n = 0; PhrasePositionsPtr pp3; // pop until finding pp2 while ((pp3 = pq->pop()) != pp2) tmpPos[n++] = pp3; // insert back all but pp2 for (n--; n >= 0; --n) pq->addOverflow(tmpPos[n]); // insert pp back pq->add(pp); return pp2; } int32_t SloppyPhraseScorer::initPhrasePositions() { int32_t end = 0; // no repeats at all (most common case is also the simplest one) if (checkedRepeats && !repeats) { // build queue from list pq->clear(); for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) { pp->firstPosition(); if (pp->position > end) end = pp->position; pq->add(pp); // build pq from list } return end; } // position the pp's for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) pp->firstPosition(); // one time initialization for this scorer if (!checkedRepeats) { checkedRepeats = true; // check for repeats MapPhrasePositionsLuceneObject m; for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) { int32_t tpPos = pp->position + pp->offset; for (PhrasePositionsPtr pp2(pp->_next); pp2; pp2 = pp2->_next) { int32_t tpPos2 = pp2->position + pp2->offset; if (tpPos2 == tpPos) { if (!m) m = MapPhrasePositionsLuceneObject::newInstance(); pp->repeats = true; pp2->repeats = true; m.put(pp, LuceneObjectPtr()); m.put(pp2, LuceneObjectPtr()); } } } if (m) { repeats = Collection::newInstance(); for (MapPhrasePositionsLuceneObject::iterator key = m.begin(); key != m.end(); ++key) repeats.add(key->first); } } // with repeats must advance some repeating pp's so they all start with differing tp's if (repeats) { for (Collection::iterator pp = repeats.begin(); pp != repeats.end(); ++pp) { PhrasePositionsPtr pp2; while (pp2 = termPositionsDiffer(*pp)) { if (!pp2->nextPosition()) // out of pps that do not differ, advance the pp with higher offset return -1; // ran out of a term - done } } } // build queue from list pq->clear(); for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) { if (pp->position > end) end = pp->position; pq->add(pp); // build pq from list } if (repeats) tmpPos = Collection::newInstance(pq->size()); return end; } PhrasePositionsPtr SloppyPhraseScorer::termPositionsDiffer(PhrasePositionsPtr pp) { // Efficiency note: a more efficient implementation could keep a map between repeating pp's, so that if // pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats of term2, pp2a would only be checked // against pp2b but not against pp1a, pp1b, pp1c. However this would complicate code, for a rather rare // case, so choice is to compromise here. int32_t tpPos = pp->position + pp->offset; for (Collection::iterator pp2 = repeats.begin(); pp2 != repeats.end(); ++pp2) { if (*pp2 == pp) continue; int32_t tpPos2 = (*pp2)->position + (*pp2)->offset; if (tpPos2 == tpPos) return pp->offset > (*pp2)->offset ? pp : *pp2; // do not differ: return the one with higher offset. } return PhrasePositionsPtr(); } } LucenePlusPlus-rel_3.0.4/src/core/search/Sort.cpp000066400000000000000000000044661217574114600217440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Sort.h" #include "SortField.h" #include "MiscUtils.h" namespace Lucene { Sort::Sort() { setSort(SortField::FIELD_SCORE()); } Sort::Sort(SortFieldPtr field) { setSort(field); } Sort::Sort(Collection fields) { setSort(fields); } Sort::~Sort() { } SortPtr Sort::RELEVANCE() { static SortPtr _RELEVANCE; if (!_RELEVANCE) { _RELEVANCE = newLucene(); CycleCheck::addStatic(_RELEVANCE); } return _RELEVANCE; } SortPtr Sort::INDEXORDER() { static SortPtr _INDEXORDER; if (!_INDEXORDER) { _INDEXORDER = newLucene(SortField::FIELD_DOC()); CycleCheck::addStatic(_INDEXORDER); } return _INDEXORDER; } void Sort::setSort(SortFieldPtr field) { this->fields = newCollection(field); } void Sort::setSort(Collection fields) { this->fields = fields; } Collection Sort::getSort() { return fields; } String Sort::toString() { StringStream buffer; for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if (field != fields.begin()) buffer << L","; buffer << (*field)->toString(); } return buffer.str(); } bool Sort::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; SortPtr otherSort(boost::dynamic_pointer_cast(other)); if (!otherSort) return false; return fields.equals(otherSort->fields); } int32_t Sort::hashCode() { return 0x45aaf665 + MiscUtils::hashCode(fields.begin(), fields.end(), MiscUtils::hashLucene); } } LucenePlusPlus-rel_3.0.4/src/core/search/SortField.cpp000066400000000000000000000220511217574114600226760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SortField.h" #include "FieldCache.h" #include "FieldComparator.h" #include "FieldComparatorSource.h" #include "StringUtils.h" namespace Lucene { /// Sort by document score (relevancy). Sort values are Double and higher values are at the front. const int32_t SortField::SCORE = 0; /// Sort by document number (index order). Sort values are Integer and lower values are at the front. const int32_t SortField::DOC = 1; /// Sort using term values as Strings. Sort values are String and lower values are at the front. const int32_t SortField::STRING = 3; /// Sort using term values as Integers. Sort values are Integer and lower values are at the front. const int32_t SortField::INT = 4; /// Sort using term values as Floats. Sort values are Float and lower values are at the front. const int32_t SortField::FLOAT = 5; /// Sort using term values as Longs. Sort values are Long and lower values are at the front. const int32_t SortField::LONG = 6; /// Sort using term values as Doubles. Sort values are Double and lower values are at the front. const int32_t SortField::DOUBLE = 7; /// Sort using term values as Shorts. Sort values are Short and lower values are at the front. const int32_t SortField::SHORT = 8; /// Sort using a custom Comparator. Sort values are any ComparableValue and sorting is done according /// to natural order. const int32_t SortField::CUSTOM = 9; /// Sort using term values as Bytes. Sort values are Byte and lower values are at the front. const int32_t SortField::BYTE = 10; /// Sort using term values as Strings, but comparing by value (using String::compare) for all comparisons. /// This is typically slower than {@link #STRING}, which uses ordinals to do the sorting. const int32_t SortField::STRING_VAL = 11; SortField::SortField(const String& field, int32_t type, bool reverse) { initFieldType(field, type); this->reverse = reverse; } SortField::SortField(const String& field, ParserPtr parser, bool reverse) { if (boost::dynamic_pointer_cast(parser)) initFieldType(field, INT); else if (boost::dynamic_pointer_cast(parser)) initFieldType(field, BYTE); else if (boost::dynamic_pointer_cast(parser)) initFieldType(field, LONG); else if (boost::dynamic_pointer_cast(parser)) initFieldType(field, DOUBLE); else boost::throw_exception(IllegalArgumentException(L"Parser instance does not subclass existing numeric parser from FieldCache")); this->reverse = reverse; this->parser = parser; } SortField::SortField(const String& field, const std::locale& locale, bool reverse) { initFieldType(field, STRING); this->locale = newInstance(locale); this->reverse = reverse; } SortField::SortField(const String& field, FieldComparatorSourcePtr comparator, bool reverse) { initFieldType(field, CUSTOM); this->comparatorSource = comparator; this->reverse = reverse; } SortField::~SortField() { } SortFieldPtr SortField::FIELD_SCORE() { static SortFieldPtr _FIELD_SCORE; if (!_FIELD_SCORE) { _FIELD_SCORE = newLucene(L"", SCORE); CycleCheck::addStatic(_FIELD_SCORE); } return _FIELD_SCORE; } SortFieldPtr SortField::FIELD_DOC() { static SortFieldPtr _FIELD_DOC; if (!_FIELD_DOC) { _FIELD_DOC = newLucene(L"", DOC); CycleCheck::addStatic(_FIELD_DOC); } return _FIELD_DOC; } void SortField::initFieldType(const String& field, int32_t type) { this->type = type; if (field.empty() && type != SCORE && type != DOC) boost::throw_exception(IllegalArgumentException(L"Field can only be null when type is SCORE or DOC")); this->field = field; } String SortField::getField() { return field; } int32_t SortField::getType() { return type; } localePtr SortField::getLocale() { return locale; } ParserPtr SortField::getParser() { return parser; } bool SortField::getReverse() { return reverse; } FieldComparatorSourcePtr SortField::getComparatorSource() { return comparatorSource; } String SortField::toString() { StringStream buffer; switch (type) { case SCORE: buffer << L""; break; case DOC: buffer << L""; break; case STRING: buffer << L""; break; case STRING_VAL: buffer << L""; break; case BYTE: buffer << L""; break; case SHORT: buffer << L""; break; case INT: buffer << L""; break; case LONG: buffer << L""; break; case FLOAT: buffer << L""; break; case DOUBLE: buffer << L""; break; case CUSTOM: buffer << L"toString() << L">"; break; default: buffer << L""; break; } if (parser) buffer << L"(" << parser->toString() << L")"; if (reverse) buffer << L"!"; return buffer.str(); } bool SortField::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; SortFieldPtr otherSortField(boost::dynamic_pointer_cast(other)); if (!otherSortField) return false; return (field == otherSortField->field && type == otherSortField->type && reverse == otherSortField->reverse && ((locale && otherSortField->locale && *locale == *otherSortField->locale) || (!locale && !otherSortField->locale)) && (comparatorSource ? comparatorSource->equals(otherSortField->comparatorSource) : !otherSortField->comparatorSource) && (parser ? parser->equals(otherSortField->parser) : !otherSortField->parser)); } int32_t SortField::hashCode() { int32_t hash = type ^ 0x346565dd + (reverse ? 1 : 0) ^ 0xaf5998bb; hash += StringUtils::hashCode(field) ^ 0xff5685dd; if (locale) hash += StringUtils::hashCode(StringUtils::toUnicode(locale->name().c_str())) ^ 0xff5685dd; if (comparatorSource) hash += comparatorSource->hashCode(); if (parser) hash += parser->hashCode() ^ 0x3aaf56ff; return hash; } FieldComparatorPtr SortField::getComparator(int32_t numHits, int32_t sortPos) { if (locale) return newLucene(numHits, field, *locale); switch (type) { case SCORE: return newLucene(numHits); case DOC: return newLucene(numHits); case SHORT: case INT: return newLucene(numHits, field, parser); case FLOAT: case DOUBLE: return newLucene(numHits, field, parser); case LONG: return newLucene(numHits, field, parser); case BYTE: return newLucene(numHits, field, parser); case CUSTOM: BOOST_ASSERT(comparatorSource); return comparatorSource->newComparator(field, numHits, sortPos, reverse); case STRING: return newLucene(numHits, field, sortPos, reverse); case STRING_VAL: return newLucene(numHits, field); default: boost::throw_exception(IllegalStateException(L"Illegal sort type: " + StringUtils::toString(type))); return FieldComparatorPtr(); } } } LucenePlusPlus-rel_3.0.4/src/core/search/SpanFilter.cpp000066400000000000000000000007021217574114600230510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanFilter.h" namespace Lucene { SpanFilter::~SpanFilter() { } } LucenePlusPlus-rel_3.0.4/src/core/search/SpanFilterResult.cpp000066400000000000000000000032071217574114600242530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanFilterResult.h" namespace Lucene { SpanFilterResult::SpanFilterResult(DocIdSetPtr docIdSet, Collection positions) { this->docIdSet = docIdSet; this->positions = positions; } SpanFilterResult::~SpanFilterResult() { } Collection SpanFilterResult::getPositions() { return positions; } DocIdSetPtr SpanFilterResult::getDocIdSet() { return docIdSet; } PositionInfo::PositionInfo(int32_t doc) { this->doc = doc; this->positions = Collection::newInstance(); } PositionInfo::~PositionInfo() { } void PositionInfo::addPosition(int32_t start, int32_t end) { positions.add(newLucene(start, end)); } int32_t PositionInfo::getDoc() { return doc; } Collection PositionInfo::getPositions() { return positions; } StartEnd::StartEnd(int32_t start, int32_t end) { this->start = start; this->end = end; } StartEnd::~StartEnd() { } int32_t StartEnd::getEnd() { return end; } int32_t StartEnd::getStart() { return start; } } LucenePlusPlus-rel_3.0.4/src/core/search/SpanQueryFilter.cpp000066400000000000000000000044461217574114600241100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanQueryFilter.h" #include "SpanQuery.h" #include "SpanFilterResult.h" #include "Spans.h" #include "OpenBitSet.h" #include "IndexReader.h" namespace Lucene { SpanQueryFilter::SpanQueryFilter(SpanQueryPtr query) { this->query = query; } SpanQueryFilter::~SpanQueryFilter() { } DocIdSetPtr SpanQueryFilter::getDocIdSet(IndexReaderPtr reader) { SpanFilterResultPtr result(bitSpans(reader)); return result->getDocIdSet(); } SpanFilterResultPtr SpanQueryFilter::bitSpans(IndexReaderPtr reader) { OpenBitSetPtr bits(newLucene(reader->maxDoc())); SpansPtr spans(query->getSpans(reader)); Collection tmp(Collection::newInstance()); int32_t currentDoc = -1; PositionInfoPtr currentInfo; while (spans->next()) { int32_t doc = spans->doc(); bits->set(doc); if (currentDoc != doc) { currentInfo = newLucene(doc); tmp.add(currentInfo); currentDoc = doc; } currentInfo->addPosition(spans->start(), spans->end()); } return newLucene(bits, tmp); } SpanQueryPtr SpanQueryFilter::getQuery() { return query; } String SpanQueryFilter::toString() { return L"SpanQueryFilter(" + query->toString() + L")"; } bool SpanQueryFilter::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; SpanQueryFilterPtr otherSpanQueryFilter(boost::dynamic_pointer_cast(other)); if (!otherSpanQueryFilter) return false; return query->equals(otherSpanQueryFilter->query); } int32_t SpanQueryFilter::hashCode() { return query->hashCode() ^ 0x923f64b9; } } LucenePlusPlus-rel_3.0.4/src/core/search/TermQuery.cpp000066400000000000000000000152041217574114600227420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermQuery.h" #include "_TermQuery.h" #include "TermScorer.h" #include "IndexReader.h" #include "ComplexExplanation.h" #include "Term.h" #include "TermDocs.h" #include "Similarity.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { TermQuery::TermQuery(TermPtr term) { this->term = term; } TermQuery::~TermQuery() { } TermPtr TermQuery::getTerm() { return term; } WeightPtr TermQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } void TermQuery::extractTerms(SetTerm terms) { terms.add(getTerm()); } String TermQuery::toString(const String& field) { StringStream buffer; if (term->field() != field) buffer << term->field() << L":"; buffer << term->text() << boostString(); return buffer.str(); } bool TermQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; TermQueryPtr otherTermQuery(boost::dynamic_pointer_cast(other)); if (!otherTermQuery) return false; return (getBoost() == otherTermQuery->getBoost() && term->equals(otherTermQuery->term)); } int32_t TermQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ term->hashCode(); } LuceneObjectPtr TermQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(term); TermQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->term = term; return cloneQuery; } TermWeight::TermWeight(TermQueryPtr query, SearcherPtr searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); this->value = 0.0; this->idf = 0.0; this->queryNorm = 0.0; this->queryWeight = 0.0; this->idfExp = similarity->idfExplain(query->term, searcher); idf = idfExp->getIdf(); } TermWeight::~TermWeight() { } String TermWeight::toString() { return L"weight(" + query->toString() + L")"; } QueryPtr TermWeight::getQuery() { return query; } double TermWeight::getValue() { return value; } double TermWeight::sumOfSquaredWeights() { queryWeight = idf * getQuery()->getBoost(); // compute query weight return queryWeight * queryWeight; // square it } void TermWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; // normalize query weight value = queryWeight * idf; // idf for document } ScorerPtr TermWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { TermDocsPtr termDocs(reader->termDocs(query->term)); return termDocs ? newLucene(shared_from_this(), termDocs, similarity, reader->norms(query->term->field())) : ScorerPtr(); } ExplanationPtr TermWeight::explain(IndexReaderPtr reader, int32_t doc) { ComplexExplanationPtr result(newLucene()); result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); ExplanationPtr expl(newLucene(idf, idfExp->explain())); // explain query weight ExplanationPtr queryExpl(newLucene()); queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); if (query->getBoost() != 1.0) queryExpl->addDetail(boostExpl); queryExpl->addDetail(expl); ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); queryExpl->addDetail(queryNormExpl); queryExpl->setValue(boostExpl->getValue() * expl->getValue() * queryNormExpl->getValue()); result->addDetail(queryExpl); // explain field weight String field(query->term->field()); ComplexExplanationPtr fieldExpl(newLucene()); fieldExpl->setDescription(L"fieldWeight(" + query->term->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); ExplanationPtr tfExplanation(newLucene()); int32_t tf = 0; TermDocsPtr termDocs(reader->termDocs(query->term)); if (termDocs) { LuceneException finally; try { if (termDocs->skipTo(doc) && termDocs->doc() == doc) tf = termDocs->freq(); } catch (LuceneException& e) { finally = e; } termDocs->close(); finally.throwException(); tfExplanation->setValue(similarity->tf(tf)); tfExplanation->setDescription(L"tf(termFreq(" + query->term->toString() + L")=" + StringUtils::toString(tf) + L")"); } else { tfExplanation->setValue(0.0); tfExplanation->setDescription(L"no matching term"); } fieldExpl->addDetail(tfExplanation); fieldExpl->addDetail(expl); ExplanationPtr fieldNormExpl(newLucene()); ByteArray fieldNorms(reader->norms(field)); double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; fieldNormExpl->setValue(fieldNorm); fieldNormExpl->setDescription(L"fieldNorm(field=" + field + L", doc=" + StringUtils::toString(doc) + L")"); fieldExpl->addDetail(fieldNormExpl); fieldExpl->setMatch(tfExplanation->isMatch()); fieldExpl->setValue(tfExplanation->getValue() * expl->getValue() * fieldNormExpl->getValue()); result->addDetail(fieldExpl); result->setMatch(fieldExpl->getMatch()); // combine them result->setValue(queryExpl->getValue() * fieldExpl->getValue()); if (queryExpl->getValue() == 1.0) return fieldExpl; return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/TermRangeFilter.cpp000066400000000000000000000042761217574114600240460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermRangeFilter.h" #include "TermRangeQuery.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { TermRangeFilter::TermRangeFilter(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, bool includeUpper, CollatorPtr collator) : MultiTermQueryWrapperFilter(newLucene(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)) { } TermRangeFilter::~TermRangeFilter() { } TermRangeFilterPtr TermRangeFilter::Less(const String& fieldName, StringValue upperTerm) { return newLucene(fieldName, VariantUtils::null(), upperTerm, false, true); } TermRangeFilterPtr TermRangeFilter::More(const String& fieldName, StringValue lowerTerm) { return newLucene(fieldName, lowerTerm, VariantUtils::null(), true, false); } String TermRangeFilter::getField() { return boost::static_pointer_cast(query)->getField(); } String TermRangeFilter::getLowerTerm() { return boost::static_pointer_cast(query)->getLowerTerm(); } String TermRangeFilter::getUpperTerm() { return boost::static_pointer_cast(query)->getUpperTerm(); } bool TermRangeFilter::includesLower() { return boost::static_pointer_cast(query)->includesLower(); } bool TermRangeFilter::includesUpper() { return boost::static_pointer_cast(query)->includesUpper(); } CollatorPtr TermRangeFilter::getCollator() { return boost::static_pointer_cast(query)->getCollator(); } } LucenePlusPlus-rel_3.0.4/src/core/search/TermRangeQuery.cpp000066400000000000000000000120001217574114600237060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermRangeQuery.h" #include "TermRangeTermEnum.h" #include "Collator.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { TermRangeQuery::TermRangeQuery(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, bool includeUpper, CollatorPtr collator) { this->field = fieldName; this->lowerTerm = lowerTerm; this->upperTerm = upperTerm; this->includeLower = includeLower; this->includeUpper = includeUpper; this->collator = collator; } TermRangeQuery::~TermRangeQuery() { } String TermRangeQuery::getField() { return field; } String TermRangeQuery::getLowerTerm() { return VariantUtils::get(lowerTerm); } String TermRangeQuery::getUpperTerm() { return VariantUtils::get(upperTerm); } bool TermRangeQuery::includesLower() { return includeLower; } bool TermRangeQuery::includesUpper() { return includeUpper; } CollatorPtr TermRangeQuery::getCollator() { return collator; } FilteredTermEnumPtr TermRangeQuery::getEnum(IndexReaderPtr reader) { return newLucene(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); } LuceneObjectPtr TermRangeQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(field, lowerTerm, upperTerm, includeLower, includeUpper, collator)); TermRangeQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->lowerTerm = lowerTerm; cloneQuery->upperTerm = upperTerm; cloneQuery->collator = collator; cloneQuery->field = field; cloneQuery->includeLower = includeLower; cloneQuery->includeUpper = includeUpper; return cloneQuery; } String TermRangeQuery::toString(const String& field) { StringStream buffer; if (getField() != field) buffer << getField() << L":"; buffer << (includeLower ? L"[" : L"{"); if (VariantUtils::isNull(lowerTerm)) buffer << L"*"; else buffer << lowerTerm; buffer << L" TO "; if (VariantUtils::isNull(upperTerm)) buffer << L"*"; else buffer << upperTerm; buffer << (includeUpper ? L"]" : L"}"); buffer << boostString(); return buffer.str(); } bool TermRangeQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!MultiTermQuery::equals(other)) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; TermRangeQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; if (!collator) { if (otherQuery->collator) return false; } else if (!collator->equals(otherQuery->collator)) return false; if (field != otherQuery->field) return false; if (includeLower != otherQuery->includeLower) return false; if (includeUpper != otherQuery->includeUpper) return false; if (VariantUtils::isNull(lowerTerm)) { if (!VariantUtils::isNull(otherQuery->lowerTerm)) return false; } else if (!VariantUtils::equals(lowerTerm, otherQuery->lowerTerm)) return false; if (VariantUtils::isNull(upperTerm)) { if (!VariantUtils::isNull(otherQuery->upperTerm)) return false; } else if (!VariantUtils::equals(upperTerm, otherQuery->upperTerm)) return false; return true; } int32_t TermRangeQuery::hashCode() { int32_t prime = 31; int32_t result = MultiTermQuery::hashCode(); result = prime * result + (collator ? collator->hashCode() : 0); result = prime * result + (field.empty() ? 0 : StringUtils::hashCode(field)); result = prime * result + (includeLower ? 1231 : 1237); result = prime * result + (includeUpper ? 1231 : 1237); result = prime * result + (VariantUtils::isNull(lowerTerm) ? 0 : StringUtils::hashCode(VariantUtils::get(lowerTerm))); result = prime * result + (VariantUtils::isNull(upperTerm) ? 0 : StringUtils::hashCode(VariantUtils::get(upperTerm))); return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/TermRangeTermEnum.cpp000066400000000000000000000074541217574114600243560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermRangeTermEnum.h" #include "IndexReader.h" #include "Term.h" #include "Collator.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { TermRangeTermEnum::TermRangeTermEnum(IndexReaderPtr reader, const String& field, StringValue lowerTermText, StringValue upperTermText, bool includeLower, bool includeUpper, CollatorPtr collator) { this->collator = collator; this->_endEnum = false; this->upperTermText = upperTermText; this->lowerTermText = lowerTermText; this->includeLower = includeLower; this->includeUpper = includeUpper; this->field = field; // do a little bit of normalization: open ended range queries should always be inclusive. if (VariantUtils::isNull(this->lowerTermText)) this->includeLower = true; if (VariantUtils::isNull(this->upperTermText)) this->includeUpper = true; String startTermText(collator ? L"" : VariantUtils::get(this->lowerTermText)); setEnum(reader->terms(newLucene(this->field, startTermText))); } TermRangeTermEnum::~TermRangeTermEnum() { } double TermRangeTermEnum::difference() { return 1.0; } bool TermRangeTermEnum::endEnum() { return _endEnum; } bool TermRangeTermEnum::termCompare(TermPtr term) { if (!collator) { // Use Unicode code point ordering bool checkLower = false; if (!includeLower) // make adjustments to set to exclusive checkLower = true; if (term && term->field() == field) { if (!checkLower || VariantUtils::isNull(lowerTermText) || term->text().compare(VariantUtils::get(lowerTermText)) > 0) { checkLower = false; if (!VariantUtils::isNull(upperTermText)) { int32_t compare = VariantUtils::get(upperTermText).compare(term->text()); // if beyond the upper term, or is exclusive and this is equal to the upper term, break out if (compare < 0 || (!includeUpper && compare == 0)) { _endEnum = true; return false; } } return true; } } else { // break _endEnum = true; return false; } return false; } else { if (term && term->field() == field) { if ((VariantUtils::isNull(lowerTermText) || (includeLower ? collator->compare(term->text(), VariantUtils::get(lowerTermText)) >= 0 : collator->compare(term->text(), VariantUtils::get(lowerTermText)) > 0)) && (VariantUtils::isNull(upperTermText) || (includeUpper ? collator->compare(term->text(), VariantUtils::get(upperTermText)) <= 0 : collator->compare(term->text(), VariantUtils::get(upperTermText)) < 0))) return true; return false; } _endEnum = true; return false; } } } LucenePlusPlus-rel_3.0.4/src/core/search/TermScorer.cpp000066400000000000000000000076331217574114600231010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermScorer.h" #include "TermDocs.h" #include "Similarity.h" #include "Weight.h" #include "Collector.h" namespace Lucene { const int32_t TermScorer::SCORE_CACHE_SIZE = 32; TermScorer::TermScorer(WeightPtr weight, TermDocsPtr td, SimilarityPtr similarity, ByteArray norms) : Scorer(similarity) { this->weight = weight; this->termDocs = td; this->norms = norms; this->weightValue = weight->getValue(); this->doc = -1; this->docs = Collection::newInstance(32); this->freqs = Collection::newInstance(32); this->pointer = 0; this->pointerMax = 0; this->scoreCache = Collection::newInstance(SCORE_CACHE_SIZE); for (int32_t i = 0; i < SCORE_CACHE_SIZE; ++i) scoreCache[i] = getSimilarity()->tf(i) * weightValue; } TermScorer::~TermScorer() { } const Collection TermScorer::SIM_NORM_DECODER() { return Similarity::getNormDecoder(); } void TermScorer::score(CollectorPtr collector) { score(collector, INT_MAX, nextDoc()); } bool TermScorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) { // firstDocID is ignored since nextDoc() sets 'doc' collector->setScorer(shared_from_this()); while (doc < max) // for docs in window { collector->collect(doc); if (++pointer >= pointerMax) { pointerMax = termDocs->read(docs, freqs); // refill buffers if (pointerMax != 0) pointer = 0; else { termDocs->close(); // close stream doc = INT_MAX; // set to sentinel value return false; } } doc = docs[pointer]; } return true; } int32_t TermScorer::docID() { return doc; } int32_t TermScorer::nextDoc() { ++pointer; if (pointer >= pointerMax) { pointerMax = termDocs->read(docs, freqs); // refill buffer if (pointerMax != 0) pointer = 0; else { termDocs->close(); // close stream doc = NO_MORE_DOCS; return doc; } } doc = docs[pointer]; return doc; } double TermScorer::score() { BOOST_ASSERT(doc != -1); int32_t f = freqs[pointer]; double raw = f < SCORE_CACHE_SIZE ? scoreCache[f] : getSimilarity()->tf(f) * weightValue; // compute tf(f) * weight return norms ? raw * SIM_NORM_DECODER()[norms[doc] & 0xff] : raw; // normalize for field } int32_t TermScorer::advance(int32_t target) { // first scan in cache for (++pointer; pointer < pointerMax; ++pointer) { if (docs[pointer] >= target) { doc = docs[pointer]; return doc; } } // not found in cache, seek underlying stream bool result = termDocs->skipTo(target); if (result) { pointerMax = 1; pointer = 0; doc = termDocs->doc(); docs[pointer] = doc; freqs[pointer] = termDocs->freq(); } else doc = NO_MORE_DOCS; return doc; } String TermScorer::toString() { return L"scorer(" + weight->toString() + L")"; } } LucenePlusPlus-rel_3.0.4/src/core/search/TimeLimitingCollector.cpp000066400000000000000000000077361217574114600252620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TimeLimitingCollector.h" #include "_TimeLimitingCollector.h" #include "StringUtils.h" namespace Lucene { /// Default timer resolution. const int32_t TimeLimitingCollector::DEFAULT_RESOLUTION = 20; int64_t TimeLimitingCollector::resolution = TimeLimitingCollector::DEFAULT_RESOLUTION; TimeLimitingCollector::TimeLimitingCollector(CollectorPtr collector, int64_t timeAllowed) { this->DEFAULT_GREEDY = false; this->greedy = DEFAULT_GREEDY; this->collector = collector; this->t0 = TIMER_THREAD()->getMilliseconds(); this->timeout = t0 + timeAllowed; this->docBase = 0; } TimeLimitingCollector::~TimeLimitingCollector() { } TimerThreadPtr TimeLimitingCollector::TIMER_THREAD() { static TimerThreadPtr _TIMER_THREAD; if (!_TIMER_THREAD) { _TIMER_THREAD = newLucene(); CycleCheck::addStatic(_TIMER_THREAD); } if (!_TIMER_THREAD->isAlive()) _TIMER_THREAD->start(); // start single thread instance return _TIMER_THREAD; } int64_t TimeLimitingCollector::getResolution() { return resolution; } void TimeLimitingCollector::setResolution(int64_t newResolution) { resolution = std::max(newResolution, (int64_t)5); // 5 milliseconds is about the minimum reasonable time for a wait call. } void TimeLimitingCollector::stopTimer() { if (TIMER_THREAD()->isAlive()) { TIMER_THREAD()->stopThread(); TIMER_THREAD()->join(); } } bool TimeLimitingCollector::isGreedy() { return greedy; } void TimeLimitingCollector::setGreedy(bool greedy) { this->greedy = greedy; } void TimeLimitingCollector::collect(int32_t doc) { int64_t time = TIMER_THREAD()->getMilliseconds(); if (timeout < time) { if (greedy) collector->collect(doc); boost::throw_exception(TimeExceededException(L"Elapsed time:" + StringUtils::toString(timeout - t0) + L" ms. " + L"Exceeded allowed search time:" + StringUtils::toString(time - t0) + L" ms. " + L"Last doc:" + StringUtils::toString(docBase + doc))); } collector->collect(doc); } void TimeLimitingCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) { collector->setNextReader(reader, docBase); this->docBase = docBase; } void TimeLimitingCollector::setScorer(ScorerPtr scorer) { collector->setScorer(scorer); } bool TimeLimitingCollector::acceptsDocsOutOfOrder() { return collector->acceptsDocsOutOfOrder(); } TimerThread::TimerThread() { time = 0; _stopThread = false; } TimerThread::~TimerThread() { } void TimerThread::start() { _stopThread = false; LuceneThread::start(); } void TimerThread::run() { while (!_stopThread) { int64_t resolution; { SyncLock syncLock(this); resolution = TimeLimitingCollector::resolution; time += resolution; } LuceneThread::threadSleep(resolution); } } int64_t TimerThread::getMilliseconds() { SyncLock syncLock(this); return time; } void TimerThread::stopThread() { _stopThread = true; } } LucenePlusPlus-rel_3.0.4/src/core/search/TopDocs.cpp000066400000000000000000000020531217574114600223560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopDocs.h" namespace Lucene { TopDocs::TopDocs(int32_t totalHits, Collection scoreDocs) { this->totalHits = totalHits; this->scoreDocs = scoreDocs; this->maxScore = std::numeric_limits::quiet_NaN(); } TopDocs::TopDocs(int32_t totalHits, Collection scoreDocs, double maxScore) { this->totalHits = totalHits; this->scoreDocs = scoreDocs; this->maxScore = maxScore; } TopDocs::~TopDocs() { } double TopDocs::getMaxScore() { return maxScore; } void TopDocs::setMaxScore(double maxScore) { this->maxScore = maxScore; } } LucenePlusPlus-rel_3.0.4/src/core/search/TopDocsCollector.cpp000066400000000000000000000067611217574114600242370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopDocsCollector.h" #include "TopDocs.h" #include "HitQueueBase.h" namespace Lucene { TopDocsCollector::TopDocsCollector(HitQueueBasePtr pq) { this->pq = pq; this->totalHits = 0; } TopDocsCollector::~TopDocsCollector() { } TopDocsPtr TopDocsCollector::EMPTY_TOPDOCS() { static TopDocsPtr _EMPTY_TOPDOCS; if (!_EMPTY_TOPDOCS) { _EMPTY_TOPDOCS = newLucene(0, Collection::newInstance(), std::numeric_limits::quiet_NaN()); CycleCheck::addStatic(_EMPTY_TOPDOCS); } return _EMPTY_TOPDOCS; } void TopDocsCollector::populateResults(Collection results, int32_t howMany) { for (int32_t i = howMany - 1; i >= 0; --i) results[i] = pq->pop(); } TopDocsPtr TopDocsCollector::newTopDocs(Collection results, int32_t start) { return results ? newLucene(totalHits, results) : EMPTY_TOPDOCS(); } int32_t TopDocsCollector::getTotalHits() { return totalHits; } TopDocsPtr TopDocsCollector::topDocs() { // In case pq was populated with sentinel values, there might be less results than pq.size(). // Therefore return all results until either pq.size() or totalHits. return topDocs(0, totalHits < pq->size() ? totalHits : pq->size()); } TopDocsPtr TopDocsCollector::topDocs(int32_t start) { // In case pq was populated with sentinel values, there might be less results than pq.size(). // Therefore return all results until either pq.size() or totalHits. return topDocs(start, totalHits < pq->size() ? totalHits : pq->size()); } TopDocsPtr TopDocsCollector::topDocs(int32_t start, int32_t howMany) { // In case pq was populated with sentinel values, there might be less results than pq.size(). // Therefore return all results until either pq.size() or totalHits. int32_t size = totalHits < pq->size() ? totalHits : pq->size(); // Don't bother to throw an exception, just return an empty TopDocs in case the parameters are // invalid or out of range. if (start < 0 || start >= size || howMany <= 0) return newTopDocs(Collection(), start); // We know that start < pq.size, so just fix howMany. howMany = std::min(size - start, howMany); Collection results = Collection::newInstance(howMany); // pq's pop() returns the 'least' element in the queue, therefore need to discard the first ones, // until we reach the requested range. Note that this loop will usually not be executed, since the // common usage should be that the caller asks for the last howMany results. However it's needed // here for completeness. for (int32_t i = pq->size() - start - howMany; i > 0; --i) pq->pop(); // Get the requested results from pq. populateResults(results, howMany); return newTopDocs(results, start); } } LucenePlusPlus-rel_3.0.4/src/core/search/TopFieldCollector.cpp000066400000000000000000001035401217574114600243630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopFieldCollector.h" #include "_TopFieldCollector.h" #include "FieldValueHitQueue.h" #include "FieldComparator.h" #include "FieldDoc.h" #include "Scorer.h" #include "Sort.h" #include "TopFieldDocs.h" namespace Lucene { TopFieldCollector::TopFieldCollector(HitQueueBasePtr pq, int32_t numHits, bool fillFields) : TopDocsCollector(pq) { this->numHits = numHits; this->fillFields = fillFields; this->maxScore = std::numeric_limits::quiet_NaN(); this->queueFull = false; this->docBase = 0; } TopFieldCollector::~TopFieldCollector() { } const Collection TopFieldCollector::EMPTY_SCOREDOCS() { static Collection _EMPTY_SCOREDOCS; if (!_EMPTY_SCOREDOCS) _EMPTY_SCOREDOCS = Collection::newInstance(); return _EMPTY_SCOREDOCS; } TopFieldCollectorPtr TopFieldCollector::create(SortPtr sort, int32_t numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder) { if (sort->fields.empty()) boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); FieldValueHitQueuePtr queue(FieldValueHitQueue::create(sort->fields, numHits)); if (queue->getComparators().size() == 1) { if (docsScoredInOrder) { if (trackMaxScore) return newLucene(queue, numHits, fillFields); else if (trackDocScores) return newLucene(queue, numHits, fillFields); else return newLucene(queue, numHits, fillFields); } else { if (trackMaxScore) return newLucene(queue, numHits, fillFields); else if (trackDocScores) return newLucene(queue, numHits, fillFields); else return newLucene(queue, numHits, fillFields); } } // multiple comparators if (docsScoredInOrder) { if (trackMaxScore) return newLucene(queue, numHits, fillFields); else if (trackDocScores) return newLucene(queue, numHits, fillFields); else return newLucene(queue, numHits, fillFields); } else { if (trackMaxScore) return newLucene(queue, numHits, fillFields); else if (trackDocScores) return newLucene(queue, numHits, fillFields); else return newLucene(queue, numHits, fillFields); } } void TopFieldCollector::add(int32_t slot, int32_t doc, double score) { bottom = boost::static_pointer_cast(pq->add(newLucene(slot, docBase + doc, score))); queueFull = (totalHits == numHits); } void TopFieldCollector::populateResults(Collection results, int32_t howMany) { if (fillFields) { FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); for (int32_t i = howMany - 1; i >= 0; --i) results[i] = queue->fillFields(boost::static_pointer_cast(queue->pop())); } else { for (int32_t i = howMany - 1; i >= 0; --i) { FieldValueHitQueueEntryPtr entry(boost::static_pointer_cast(pq->pop())); results[i] = newLucene(entry->doc, entry->score); } } } TopDocsPtr TopFieldCollector::newTopDocs(Collection results, int32_t start) { if (!results) { results = EMPTY_SCOREDOCS(); // Set maxScore to NaN, in case this is a maxScore tracking collector maxScore = std::numeric_limits::quiet_NaN(); } // If this is a maxScoring tracking collector and there were no results return newLucene(totalHits, results, boost::static_pointer_cast(pq)->getFields(), maxScore); } bool TopFieldCollector::acceptsDocsOutOfOrder() { return false; } OneComparatorNonScoringCollector::OneComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : TopFieldCollector(queue, numHits, fillFields) { } OneComparatorNonScoringCollector::~OneComparatorNonScoringCollector() { } void OneComparatorNonScoringCollector::initialize() { TopFieldCollector::initialize(); FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); comparator = queue->getComparators()[0]; reverseMul = queue->getReverseMul()[0]; } void OneComparatorNonScoringCollector::updateBottom(int32_t doc) { // bottom.score is already set to NaN in add(). bottom->doc = docBase + doc; bottom = boost::static_pointer_cast(pq->updateTop()); } void OneComparatorNonScoringCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { if ((reverseMul * comparator->compareBottom(doc)) <= 0) { // since docs are visited in doc Id order, if compare is 0, it means this document is largest // than anything else in the queue, and therefore not competitive. return; } // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc); comparator->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, std::numeric_limits::quiet_NaN()); if (queueFull) comparator->setBottom(bottom->slot); } } void OneComparatorNonScoringCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) { this->docBase = docBase; comparator->setNextReader(reader, docBase); } void OneComparatorNonScoringCollector::setScorer(ScorerPtr scorer) { comparator->setScorer(scorer); } OutOfOrderOneComparatorNonScoringCollector::OutOfOrderOneComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) { } OutOfOrderOneComparatorNonScoringCollector::~OutOfOrderOneComparatorNonScoringCollector() { } void OutOfOrderOneComparatorNonScoringCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive int32_t cmp = reverseMul * comparator->compareBottom(doc); if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) return; // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc); comparator->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, std::numeric_limits::quiet_NaN()); if (queueFull) comparator->setBottom(bottom->slot); } } bool OutOfOrderOneComparatorNonScoringCollector::acceptsDocsOutOfOrder() { return true; } OneComparatorScoringNoMaxScoreCollector::OneComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) { } OneComparatorScoringNoMaxScoreCollector::~OneComparatorScoringNoMaxScoreCollector() { } void OneComparatorScoringNoMaxScoreCollector::updateBottom(int32_t doc, double score) { bottom->doc = docBase + doc; bottom->score = score; bottom = boost::static_pointer_cast(pq->updateTop()); } void OneComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { if ((reverseMul * comparator->compareBottom(doc)) <= 0) { // since docs are visited in doc Id order, if compare is 0, it means this document is largest // than anything else in the queue, and therefore not competitive. return; } // Compute the score only if the hit is competitive. double score = scorer->score(); // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc, score); comparator->setBottom(bottom->slot); } else { // Compute the score only if the hit is competitive. double score = scorer->score(); // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, score); if (queueFull) comparator->setBottom(bottom->slot); } } void OneComparatorScoringNoMaxScoreCollector::setScorer(ScorerPtr scorer) { this->scorer = scorer; comparator->setScorer(scorer); } OutOfOrderOneComparatorScoringNoMaxScoreCollector::OutOfOrderOneComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields) { } OutOfOrderOneComparatorScoringNoMaxScoreCollector::~OutOfOrderOneComparatorScoringNoMaxScoreCollector() { } void OutOfOrderOneComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive int32_t cmp = reverseMul * comparator->compareBottom(doc); if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) return; // Compute the score only if the hit is competitive. double score = scorer->score(); // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc, score); comparator->setBottom(bottom->slot); } else { // Compute the score only if the hit is competitive. double score = scorer->score(); // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, score); if (queueFull) comparator->setBottom(bottom->slot); } } bool OutOfOrderOneComparatorScoringNoMaxScoreCollector::acceptsDocsOutOfOrder() { return true; } OneComparatorScoringMaxScoreCollector::OneComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) { // Must set maxScore to NEG_INF, or otherwise std::max always returns NaN. this->maxScore = -std::numeric_limits::infinity(); } OneComparatorScoringMaxScoreCollector::~OneComparatorScoringMaxScoreCollector() { } void OneComparatorScoringMaxScoreCollector::updateBottom(int32_t doc, double score) { bottom->doc = docBase + doc; bottom->score = score; bottom = boost::static_pointer_cast(pq->updateTop()); } void OneComparatorScoringMaxScoreCollector::collect(int32_t doc) { double score = scorer->score(); if (score > maxScore) maxScore = score; ++totalHits; if (queueFull) { if ((reverseMul * comparator->compareBottom(doc)) <= 0) { // since docs are visited in doc Id order, if compare is 0, it means this document is largest // than anything else in the queue, and therefore not competitive. return; } // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc, score); comparator->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, score); if (queueFull) comparator->setBottom(bottom->slot); } } void OneComparatorScoringMaxScoreCollector::setScorer(ScorerPtr scorer) { this->scorer = scorer; OneComparatorNonScoringCollector::setScorer(scorer); } OutOfOrderOneComparatorScoringMaxScoreCollector::OutOfOrderOneComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields) { } OutOfOrderOneComparatorScoringMaxScoreCollector::~OutOfOrderOneComparatorScoringMaxScoreCollector() { } void OutOfOrderOneComparatorScoringMaxScoreCollector::collect(int32_t doc) { double score = scorer->score(); if (score > maxScore) maxScore = score; ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive int32_t cmp = reverseMul * comparator->compareBottom(doc); if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) return; // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc, score); comparator->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, score); if (queueFull) comparator->setBottom(bottom->slot); } } bool OutOfOrderOneComparatorScoringMaxScoreCollector::acceptsDocsOutOfOrder() { return true; } MultiComparatorNonScoringCollector::MultiComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : TopFieldCollector(queue, numHits, fillFields) { } MultiComparatorNonScoringCollector::~MultiComparatorNonScoringCollector() { } void MultiComparatorNonScoringCollector::initialize() { TopFieldCollector::initialize(); FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); comparators = queue->getComparators(); reverseMul = queue->getReverseMul(); } void MultiComparatorNonScoringCollector::updateBottom(int32_t doc) { // bottom.score is already set to NaN in add(). bottom->doc = docBase + doc; bottom = boost::static_pointer_cast(pq->updateTop()); } void MultiComparatorNonScoringCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are // visited in doc Id order, which means this doc cannot compete with any other document // in the queue. return; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(bottom->slot, doc); updateBottom(doc); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(slot, doc); add(slot, doc, std::numeric_limits::quiet_NaN()); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } } } void MultiComparatorNonScoringCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) { this->docBase = docBase; for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setNextReader(reader, docBase); } void MultiComparatorNonScoringCollector::setScorer(ScorerPtr scorer) { // set the scorer on all comparators for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setScorer(scorer); } OutOfOrderMultiComparatorNonScoringCollector::OutOfOrderMultiComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) { } OutOfOrderMultiComparatorNonScoringCollector::~OutOfOrderMultiComparatorNonScoringCollector() { } void OutOfOrderMultiComparatorNonScoringCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // This is the equals case. if (doc + docBase > bottom->doc) { // Definitely not competitive return; } break; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(bottom->slot, doc); updateBottom(doc); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(slot, doc); add(slot, doc, std::numeric_limits::quiet_NaN()); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } } } bool OutOfOrderMultiComparatorNonScoringCollector::acceptsDocsOutOfOrder() { return true; } MultiComparatorScoringMaxScoreCollector::MultiComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) { // Must set maxScore to NEG_INF, or otherwise std::max always returns NaN. this->maxScore = -std::numeric_limits::infinity(); } MultiComparatorScoringMaxScoreCollector::~MultiComparatorScoringMaxScoreCollector() { } void MultiComparatorScoringMaxScoreCollector::updateBottom(int32_t doc, double score) { bottom->doc = docBase + doc; bottom->score = score; bottom = boost::static_pointer_cast(pq->updateTop()); } void MultiComparatorScoringMaxScoreCollector::collect(int32_t doc) { double score = ScorerPtr(_scorer)->score(); if (score > maxScore) maxScore = score; ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are // visited in doc Id order, which means this doc cannot compete with any other document // in the queue. return; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(bottom->slot, doc); updateBottom(doc, score); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(slot, doc); add(slot, doc, score); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } } } void MultiComparatorScoringMaxScoreCollector::setScorer(ScorerPtr scorer) { this->_scorer = scorer; MultiComparatorNonScoringCollector::setScorer(scorer); } OutOfOrderMultiComparatorScoringMaxScoreCollector::OutOfOrderMultiComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields) { } OutOfOrderMultiComparatorScoringMaxScoreCollector::~OutOfOrderMultiComparatorScoringMaxScoreCollector() { } void OutOfOrderMultiComparatorScoringMaxScoreCollector::collect(int32_t doc) { double score = ScorerPtr(_scorer)->score(); if (score > maxScore) maxScore = score; ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // This is the equals case. if (doc + docBase > bottom->doc) { // Definitely not competitive return; } break; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(bottom->slot, doc); updateBottom(doc, score); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(slot, doc); add(slot, doc, score); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } } } bool OutOfOrderMultiComparatorScoringMaxScoreCollector::acceptsDocsOutOfOrder() { return true; } MultiComparatorScoringNoMaxScoreCollector::MultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) { } MultiComparatorScoringNoMaxScoreCollector::~MultiComparatorScoringNoMaxScoreCollector() { } void MultiComparatorScoringNoMaxScoreCollector::updateBottom(int32_t doc, double score) { bottom->doc = docBase + doc; bottom->score = score; bottom = boost::static_pointer_cast(pq->updateTop()); } void MultiComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are // visited in doc Id order, which means this doc cannot compete with any other document // in the queue. return; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(bottom->slot, doc); // Compute score only if it is competitive. double score = ScorerPtr(_scorer)->score(); updateBottom(doc, score); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(slot, doc); // Compute score only if it is competitive. double score = ScorerPtr(_scorer)->score(); add(slot, doc, score); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } } } void MultiComparatorScoringNoMaxScoreCollector::setScorer(ScorerPtr scorer) { this->_scorer = scorer; MultiComparatorNonScoringCollector::setScorer(scorer); } OutOfOrderMultiComparatorScoringNoMaxScoreCollector::OutOfOrderMultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields) { } OutOfOrderMultiComparatorScoringNoMaxScoreCollector::~OutOfOrderMultiComparatorScoringNoMaxScoreCollector() { } void OutOfOrderMultiComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // This is the equals case. if (doc + docBase > bottom->doc) { // Definitely not competitive return; } break; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(bottom->slot, doc); // Compute score only if it is competitive. double score = ScorerPtr(_scorer)->score(); updateBottom(doc, score); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->copy(slot, doc); // Compute score only if it is competitive. double score = ScorerPtr(_scorer)->score(); add(slot, doc, score); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) (*cmp)->setBottom(bottom->slot); } } } void OutOfOrderMultiComparatorScoringNoMaxScoreCollector::setScorer(ScorerPtr scorer) { this->_scorer = scorer; MultiComparatorScoringNoMaxScoreCollector::setScorer(scorer); } bool OutOfOrderMultiComparatorScoringNoMaxScoreCollector::acceptsDocsOutOfOrder() { return true; } } LucenePlusPlus-rel_3.0.4/src/core/search/TopFieldDocs.cpp000066400000000000000000000012511217574114600233210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopFieldDocs.h" namespace Lucene { TopFieldDocs::TopFieldDocs(int32_t totalHits, Collection scoreDocs, Collection fields, double maxScore) : TopDocs(totalHits, scoreDocs, maxScore) { this->fields = fields; } TopFieldDocs::~TopFieldDocs() { } } LucenePlusPlus-rel_3.0.4/src/core/search/TopScoreDocCollector.cpp000066400000000000000000000101351217574114600250360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopScoreDocCollector.h" #include "_TopScoreDocCollector.h" #include "HitQueue.h" #include "ScoreDoc.h" #include "Scorer.h" #include "TopDocs.h" #include "MiscUtils.h" namespace Lucene { TopScoreDocCollector::TopScoreDocCollector(int32_t numHits) : TopDocsCollector(newLucene(numHits, true)) { // HitQueue implements getSentinelObject to return a ScoreDoc, so we know that at this point top() // is already initialized. pqTop = pq->top(); docBase = 0; } TopScoreDocCollector::~TopScoreDocCollector() { } TopScoreDocCollectorPtr TopScoreDocCollector::create(int32_t numHits, bool docsScoredInOrder) { if (docsScoredInOrder) return newLucene(numHits); else return newLucene(numHits); } TopDocsPtr TopScoreDocCollector::newTopDocs(Collection results, int32_t start) { if (!results) return EMPTY_TOPDOCS(); // We need to compute maxScore in order to set it in TopDocs. If start == 0, it means the largest element // is already in results, use its score as maxScore. Otherwise pop everything else, until the largest // element is extracted and use its score as maxScore. double maxScore = std::numeric_limits::quiet_NaN(); if (start == 0) maxScore = results[0]->score; else { for (int32_t i = pq->size(); i > 1; --i) pq->pop(); maxScore = pq->pop()->score; } return newLucene(totalHits, results, maxScore); } void TopScoreDocCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) { this->docBase = docBase; } void TopScoreDocCollector::setScorer(ScorerPtr scorer) { this->_scorer = scorer; } InOrderTopScoreDocCollector::InOrderTopScoreDocCollector(int32_t numHits) : TopScoreDocCollector(numHits) { } InOrderTopScoreDocCollector::~InOrderTopScoreDocCollector() { } void InOrderTopScoreDocCollector::collect(int32_t doc) { double score = ScorerPtr(_scorer)->score(); // This collector cannot handle these scores BOOST_ASSERT(score != -std::numeric_limits::infinity()); BOOST_ASSERT(!MiscUtils::isNaN(score)); ++totalHits; if (score <= pqTop->score) { // Since docs are returned in-order (ie., increasing doc Id), a document with equal score to // pqTop.score cannot compete since HitQueue favours documents with lower doc Ids. Therefore // reject those docs too. return; } pqTop->doc = doc + docBase; pqTop->score = score; pqTop = pq->updateTop(); } bool InOrderTopScoreDocCollector::acceptsDocsOutOfOrder() { return false; } OutOfOrderTopScoreDocCollector::OutOfOrderTopScoreDocCollector(int32_t numHits) : TopScoreDocCollector(numHits) { } OutOfOrderTopScoreDocCollector::~OutOfOrderTopScoreDocCollector() { } void OutOfOrderTopScoreDocCollector::collect(int32_t doc) { double score = ScorerPtr(_scorer)->score(); // This collector cannot handle NaN BOOST_ASSERT(!MiscUtils::isNaN(score)); ++totalHits; doc += docBase; if (score < pqTop->score || (score == pqTop->score && doc > pqTop->doc)) return; pqTop->doc = doc; pqTop->score = score; pqTop = pq->updateTop(); } bool OutOfOrderTopScoreDocCollector::acceptsDocsOutOfOrder() { return true; } } LucenePlusPlus-rel_3.0.4/src/core/search/Weight.cpp000066400000000000000000000010051217574114600222260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Weight.h" namespace Lucene { Weight::~Weight() { } bool Weight::scoresDocsOutOfOrder() { return false; } } LucenePlusPlus-rel_3.0.4/src/core/search/WildcardQuery.cpp000066400000000000000000000065451217574114600235740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "WildcardQuery.h" #include "WildcardTermEnum.h" #include "Term.h" #include "PrefixQuery.h" #include "SingleTermEnum.h" #include "MiscUtils.h" namespace Lucene { WildcardQuery::WildcardQuery(TermPtr term) { this->term = term; String text(term->text()); this->termContainsWildcard = boost::contains(text, L"*") || boost::contains(text, L"?"); this->termIsPrefix = termContainsWildcard && !boost::contains(text, L"?") && text.find_first_of(L"*") == text.length() - 1; } WildcardQuery::~WildcardQuery() { } FilteredTermEnumPtr WildcardQuery::getEnum(IndexReaderPtr reader) { if (termContainsWildcard) return newLucene(reader, getTerm()); else return newLucene(reader, getTerm()); } TermPtr WildcardQuery::getTerm() { return term; } QueryPtr WildcardQuery::rewrite(IndexReaderPtr reader) { if (termIsPrefix) { MultiTermQueryPtr rewritten(newLucene(term->createTerm(term->text().substr(0, term->text().find('*'))))); rewritten->setBoost(getBoost()); rewritten->setRewriteMethod(getRewriteMethod()); return rewritten; } else return MultiTermQuery::rewrite(reader); } String WildcardQuery::toString(const String& field) { StringStream buffer; if (term->field() != field) buffer << term->field() << L":"; buffer << term->text() << boostString(); return buffer.str(); } LuceneObjectPtr WildcardQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(term)); WildcardQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->termContainsWildcard = termContainsWildcard; cloneQuery->termIsPrefix = termIsPrefix; cloneQuery->term = term; return cloneQuery; } int32_t WildcardQuery::hashCode() { int32_t prime = 31; int32_t result = MultiTermQuery::hashCode(); result = prime * result + (term ? term->hashCode() : 0); return result; } bool WildcardQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!MultiTermQuery::equals(other)) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; WildcardQueryPtr otherWildcardQuery(boost::dynamic_pointer_cast(other)); if (!otherWildcardQuery) return false; if (!term) { if (otherWildcardQuery->term) return false; } else if (!term->equals(otherWildcardQuery->term)) return false; return true; } } LucenePlusPlus-rel_3.0.4/src/core/search/WildcardTermEnum.cpp000066400000000000000000000113721217574114600242150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "WildcardTermEnum.h" #include "Term.h" #include "IndexReader.h" namespace Lucene { const wchar_t WildcardTermEnum::WILDCARD_STRING = L'*'; const wchar_t WildcardTermEnum::WILDCARD_CHAR = L'?'; WildcardTermEnum::WildcardTermEnum(IndexReaderPtr reader, TermPtr term) { _endEnum = false; searchTerm = term; field = searchTerm->field(); String searchTermText(searchTerm->text()); String::size_type sidx = searchTermText.find(WILDCARD_STRING); String::size_type cidx = searchTermText.find(WILDCARD_CHAR); String::size_type idx = sidx; if (idx == String::npos) idx = cidx; else if (cidx != String::npos) idx = std::min(idx, cidx); pre = idx != String::npos ? searchTerm->text().substr(0, idx) : L""; preLen = pre.length(); text = searchTermText.substr(preLen); setEnum(reader->terms(newLucene(searchTerm->field(), pre))); } WildcardTermEnum::~WildcardTermEnum() { } bool WildcardTermEnum::termCompare(TermPtr term) { if (field == term->field()) { String searchText(term->text()); if (boost::starts_with(searchText, pre)) return wildcardEquals(text, 0, searchText, preLen); } _endEnum = true; return false; } double WildcardTermEnum::difference() { return 1.0; } bool WildcardTermEnum::endEnum() { return _endEnum; } bool WildcardTermEnum::wildcardEquals(const String& pattern, int32_t patternIdx, const String& string, int32_t stringIdx) { int32_t p = patternIdx; for (int32_t s = stringIdx; ; ++p, ++s) { // End of string yet? bool sEnd = (s >= (int32_t)string.length()); // End of pattern yet? bool pEnd = (p >= (int32_t)pattern.length()); // If we're looking at the end of the string if (sEnd) { // Assume the only thing left on the pattern is/are wildcards bool justWildcardsLeft = true; // Current wildcard position int32_t wildcardSearchPos = p; // While we haven't found the end of the pattern, and haven't encountered any non-wildcard characters while (wildcardSearchPos < (int32_t)pattern.length() && justWildcardsLeft) { // Check the character at the current position wchar_t wildchar = pattern[wildcardSearchPos]; // If it's not a wildcard character, then there is more pattern information after this/these wildcards. if (wildchar != WILDCARD_CHAR && wildchar != WILDCARD_STRING) justWildcardsLeft = false; else { // to prevent "cat" matches "ca??" if (wildchar == WILDCARD_CHAR) return false; // Look at the next character ++wildcardSearchPos; } } // This was a prefix wildcard search, and we've matched, so return true. if (justWildcardsLeft) return true; } // If we've gone past the end of the string, or the pattern, return false. if (sEnd || pEnd) break; // Match a single character, so continue. if (pattern[p] == WILDCARD_CHAR) continue; if (pattern[p] == WILDCARD_STRING) { // Look at the character beyond the '*' characters. while (p < (int32_t)pattern.length() && pattern[p] == WILDCARD_STRING) ++p; // Examine the string, starting at the last character. for (int32_t i = string.length(); i >= s; --i) { if (wildcardEquals(pattern, p, string, i)) return true; } break; } if (pattern[p] != string[s]) break; } return false; } } LucenePlusPlus-rel_3.0.4/src/core/search/function/000077500000000000000000000000001217574114600221245ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/search/function/ByteFieldSource.cpp000066400000000000000000000050721217574114600256640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ByteFieldSource.h" #include "_ByteFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { ByteFieldSource::ByteFieldSource(const String& field, ByteParserPtr parser) : FieldCacheSource(field) { this->parser = parser; } ByteFieldSource::~ByteFieldSource() { } String ByteFieldSource::description() { return L"byte(" + FieldCacheSource::description() + L")"; } DocValuesPtr ByteFieldSource::getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader) { Collection arr(cache->getBytes(reader, field, parser)); return newLucene(shared_from_this(), arr); } bool ByteFieldSource::cachedFieldSourceEquals(FieldCacheSourcePtr other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; ByteFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) return false; return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; } int32_t ByteFieldSource::cachedFieldSourceHashCode() { return StringUtils::hashCode(parser ? ByteParser::_getClassName() : ByteFieldSource::_getClassName()); } ByteDocValues::ByteDocValues(ByteFieldSourcePtr source, Collection arr) { this->_source = source; this->arr = arr; } ByteDocValues::~ByteDocValues() { } double ByteDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) boost::throw_exception(IndexOutOfBoundsException()); return (double)arr[doc]; } int32_t ByteDocValues::intVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) boost::throw_exception(IndexOutOfBoundsException()); return (int32_t)arr[doc]; } String ByteDocValues::toString(int32_t doc) { return ByteFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); } CollectionValue ByteDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.4/src/core/search/function/CustomScoreProvider.cpp000066400000000000000000000051231217574114600266120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CustomScoreProvider.h" #include "Explanation.h" namespace Lucene { CustomScoreProvider::CustomScoreProvider(IndexReaderPtr reader) { this->reader = reader; } CustomScoreProvider::~CustomScoreProvider() { } double CustomScoreProvider::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { if (valSrcScores.size() == 1) return customScore(doc, subQueryScore, valSrcScores[0]); if (valSrcScores.empty()) return customScore(doc, subQueryScore, 1); double score = subQueryScore; for (Collection::iterator srcScore = valSrcScores.begin(); srcScore != valSrcScores.end(); ++srcScore) score *= *srcScore; return score; } double CustomScoreProvider::customScore(int32_t doc, double subQueryScore, double valSrcScore) { return subQueryScore * valSrcScore; } ExplanationPtr CustomScoreProvider::customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls) { if (valSrcExpls.size() == 1) return customExplain(doc, subQueryExpl, valSrcExpls[0]); if (valSrcExpls.empty()) return subQueryExpl; double valSrcScore = 1; for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) valSrcScore *= (*srcExpl)->getValue(); ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); exp->addDetail(subQueryExpl); for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) exp->addDetail(*srcExpl); return exp; } ExplanationPtr CustomScoreProvider::customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl) { double valSrcScore = 1; if (valSrcExpl) valSrcScore *= valSrcExpl->getValue(); ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); exp->addDetail(subQueryExpl); exp->addDetail(valSrcExpl); return exp; } } LucenePlusPlus-rel_3.0.4/src/core/search/function/CustomScoreQuery.cpp000066400000000000000000000336111217574114600261300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CustomScoreQuery.h" #include "_CustomScoreQuery.h" #include "ValueSourceQuery.h" #include "ComplexExplanation.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { CustomScoreQuery::CustomScoreQuery(QueryPtr subQuery) { ConstructQuery(subQuery, Collection::newInstance()); } CustomScoreQuery::CustomScoreQuery(QueryPtr subQuery, ValueSourceQueryPtr valSrcQuery) { Collection valSrcQueries(Collection::newInstance()); if (valSrcQuery) valSrcQueries.add(valSrcQuery); ConstructQuery(subQuery, valSrcQueries); } CustomScoreQuery::CustomScoreQuery(QueryPtr subQuery, Collection valSrcQueries) { ConstructQuery(subQuery, valSrcQueries); } CustomScoreQuery::~CustomScoreQuery() { } void CustomScoreQuery::ConstructQuery(QueryPtr subQuery, Collection valSrcQueries) { this->strict = false; this->subQuery = subQuery; this->valSrcQueries = valSrcQueries ? valSrcQueries : Collection::newInstance(); if (!subQuery) boost::throw_exception(IllegalArgumentException(L" must not be null!")); } QueryPtr CustomScoreQuery::rewrite(IndexReaderPtr reader) { CustomScoreQueryPtr cloneQuery; QueryPtr sq = subQuery->rewrite(reader); if (sq != subQuery) { cloneQuery = boost::static_pointer_cast(clone()); cloneQuery->subQuery = sq; } for (int32_t i = 0; i < valSrcQueries.size(); ++i) { ValueSourceQueryPtr v = boost::dynamic_pointer_cast(valSrcQueries[i]->rewrite(reader)); if (v != valSrcQueries[i]) { if (!cloneQuery) cloneQuery = boost::static_pointer_cast(clone()); cloneQuery->valSrcQueries[i] = v; } } return cloneQuery ? cloneQuery : shared_from_this(); } void CustomScoreQuery::extractTerms(SetTerm terms) { subQuery->extractTerms(terms); for (Collection::iterator srcQuery = valSrcQueries.begin(); srcQuery != valSrcQueries.end(); ++srcQuery) (*srcQuery)->extractTerms(terms); } LuceneObjectPtr CustomScoreQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = Query::clone(other ? other : newLucene(subQuery)); CustomScoreQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->strict = strict; cloneQuery->subQuery = boost::dynamic_pointer_cast(subQuery->clone()); cloneQuery->valSrcQueries = Collection::newInstance(valSrcQueries.size()); for (int32_t i = 0; i < valSrcQueries.size(); ++i) cloneQuery->valSrcQueries[i] = boost::dynamic_pointer_cast(valSrcQueries[i]->clone()); return cloneQuery; } String CustomScoreQuery::toString(const String& field) { StringStream buffer; buffer << name() << L"(" << subQuery->toString(field); for (Collection::iterator srcQuery = valSrcQueries.begin(); srcQuery != valSrcQueries.end(); ++srcQuery) buffer << L", " << (*srcQuery)->toString(field); buffer << L")" << (strict ? L" STRICT" : L"") << boostString(); return buffer.str(); } bool CustomScoreQuery::equals(LuceneObjectPtr other) { CustomScoreQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; if (getBoost() != otherQuery->getBoost() || !subQuery->equals(otherQuery->subQuery) || strict != otherQuery->strict) return false; return valSrcQueries.equals(otherQuery->valSrcQueries, luceneEquals()); } int32_t CustomScoreQuery::hashCode() { return (StringUtils::hashCode(CustomScoreQuery::_getClassName()) + StringUtils::hashCode(Query::_getClassName()) + MiscUtils::hashCode(valSrcQueries.begin(), valSrcQueries.end(), MiscUtils::hashLucene)) ^ MiscUtils::doubleToIntBits(getBoost()) ^ (strict ? 1234 : 4321); } CustomScoreProviderPtr CustomScoreQuery::getCustomScoreProvider(IndexReaderPtr reader) { // when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider return newLucene(shared_from_this(), reader); } double CustomScoreQuery::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { if (valSrcScores.size() == 1) return customScore(doc, subQueryScore, valSrcScores[0]); if (valSrcScores.empty()) return customScore(doc, subQueryScore, 1); double score = subQueryScore; for (Collection::iterator srcScore = valSrcScores.begin(); srcScore != valSrcScores.end(); ++srcScore) score *= *srcScore; return score; } double CustomScoreQuery::customScore(int32_t doc, double subQueryScore, double valSrcScore) { return subQueryScore * valSrcScore; } ExplanationPtr CustomScoreQuery::customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls) { if (valSrcExpls.size() == 1) return customExplain(doc, subQueryExpl, valSrcExpls[0]); if (valSrcExpls.empty()) return subQueryExpl; double valSrcScore = 1; for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) valSrcScore *= (*srcExpl)->getValue(); ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); exp->addDetail(subQueryExpl); for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) exp->addDetail(*srcExpl); return exp; } ExplanationPtr CustomScoreQuery::customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl) { double valSrcScore = 1; if (valSrcExpl) valSrcScore *= valSrcExpl->getValue(); ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); exp->addDetail(subQueryExpl); exp->addDetail(valSrcExpl); return exp; } WeightPtr CustomScoreQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } bool CustomScoreQuery::isStrict() { return strict; } void CustomScoreQuery::setStrict(bool strict) { this->strict = strict; } String CustomScoreQuery::name() { return L"custom"; } DefaultCustomScoreProvider::DefaultCustomScoreProvider(CustomScoreQueryPtr customQuery, IndexReaderPtr reader) : CustomScoreProvider(reader) { _customQuery = customQuery; } DefaultCustomScoreProvider::~DefaultCustomScoreProvider() { } double DefaultCustomScoreProvider::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { return CustomScoreQueryPtr(_customQuery)->customScore(doc, subQueryScore, valSrcScores); } double DefaultCustomScoreProvider::customScore(int32_t doc, double subQueryScore, double valSrcScore) { return CustomScoreQueryPtr(_customQuery)->customScore(doc, subQueryScore, valSrcScore); } ExplanationPtr DefaultCustomScoreProvider::customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls) { return CustomScoreQueryPtr(_customQuery)->customExplain(doc, subQueryExpl, valSrcExpls); } ExplanationPtr DefaultCustomScoreProvider::customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl) { return CustomScoreQueryPtr(_customQuery)->customExplain(doc, subQueryExpl, valSrcExpl); } CustomWeight::CustomWeight(CustomScoreQueryPtr query, SearcherPtr searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); this->subQueryWeight = query->subQuery->weight(searcher); this->valSrcWeights = Collection::newInstance(query->valSrcQueries.size()); for (int32_t i = 0; i < query->valSrcQueries.size(); ++i) this->valSrcWeights[i] = query->valSrcQueries[i]->createWeight(searcher); this->qStrict = query->strict; } CustomWeight::~CustomWeight() { } QueryPtr CustomWeight::getQuery() { return query; } double CustomWeight::getValue() { return query->getBoost(); } double CustomWeight::sumOfSquaredWeights() { double sum = subQueryWeight->sumOfSquaredWeights(); for (int32_t i = 0; i < valSrcWeights.size(); ++i) { if (qStrict) valSrcWeights[i]->sumOfSquaredWeights(); // do not include ValueSource part in the query normalization else sum += valSrcWeights[i]->sumOfSquaredWeights(); } sum *= query->getBoost() * query->getBoost(); // boost each sub-weight return sum; } void CustomWeight::normalize(double norm) { norm *= query->getBoost(); // incorporate boost subQueryWeight->normalize(norm); for (int32_t i = 0; i < valSrcWeights.size(); ++i) { if (qStrict) valSrcWeights[i]->normalize(1.0); // do not normalize the ValueSource part else valSrcWeights[i]->normalize(norm); } } ScorerPtr CustomWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { // Pass true for "scoresDocsInOrder", because we require in-order scoring, even if caller does not, // since we call advance on the valSrcScorers. Pass false for "topScorer" because we will not invoke // score(Collector) on these scorers ScorerPtr subQueryScorer(subQueryWeight->scorer(reader, true, false)); if (!subQueryScorer) return ScorerPtr(); Collection valSrcScorers(Collection::newInstance(valSrcWeights.size())); for (int32_t i = 0; i < valSrcScorers.size(); ++i) valSrcScorers[i] = valSrcWeights[i]->scorer(reader, true, topScorer); return newLucene(similarity, reader, shared_from_this(), subQueryScorer, valSrcScorers); } ExplanationPtr CustomWeight::explain(IndexReaderPtr reader, int32_t doc) { ExplanationPtr explain(doExplain(reader, doc)); return explain ? explain : newLucene(0.0, L"no matching docs"); } ExplanationPtr CustomWeight::doExplain(IndexReaderPtr reader, int32_t doc) { ExplanationPtr subQueryExpl(subQueryWeight->explain(reader, doc)); if (!subQueryExpl->isMatch()) return subQueryExpl; // match Collection valSrcExpls(Collection::newInstance(valSrcWeights.size())); for (int32_t i = 0; i < valSrcWeights.size(); ++i) valSrcExpls[i] = valSrcWeights[i]->explain(reader, doc); ExplanationPtr customExp(query->getCustomScoreProvider(reader)->customExplain(doc, subQueryExpl, valSrcExpls)); double sc = getValue() * customExp->getValue(); ExplanationPtr res(newLucene(true, sc, query->toString() + L", product of:")); res->addDetail(customExp); res->addDetail(newLucene(getValue(), L"queryBoost")); // actually using the q boost as q weight (== weight value) return res; } bool CustomWeight::scoresDocsOutOfOrder() { return false; } CustomScorer::CustomScorer(SimilarityPtr similarity, IndexReaderPtr reader, CustomWeightPtr weight, ScorerPtr subQueryScorer, Collection valSrcScorers) : Scorer(similarity) { this->qWeight = weight->getValue(); this->subQueryScorer = subQueryScorer; this->valSrcScorers = valSrcScorers; this->reader = reader; this->vScores = Collection::newInstance(valSrcScorers.size()); this->provider = weight->query->getCustomScoreProvider(reader); } CustomScorer::~CustomScorer() { } int32_t CustomScorer::nextDoc() { int32_t doc = subQueryScorer->nextDoc(); if (doc != NO_MORE_DOCS) { for (int32_t i = 0; i < valSrcScorers.size(); ++i) valSrcScorers[i]->advance(doc); } return doc; } int32_t CustomScorer::docID() { return subQueryScorer->docID(); } double CustomScorer::score() { for (int32_t i = 0; i < valSrcScorers.size(); ++i) vScores[i] = valSrcScorers[i]->score(); return qWeight * provider->customScore(subQueryScorer->docID(), subQueryScorer->score(), vScores); } int32_t CustomScorer::advance(int32_t target) { int32_t doc = subQueryScorer->advance(target); if (doc != NO_MORE_DOCS) { for (int32_t i = 0; i < valSrcScorers.size(); ++i) valSrcScorers[i]->advance(doc); } return doc; } } LucenePlusPlus-rel_3.0.4/src/core/search/function/DocValues.cpp000066400000000000000000000046131217574114600245210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocValues.h" #include "Explanation.h" #include "MiscUtils.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { DocValues::DocValues() { minVal = std::numeric_limits::quiet_NaN(); maxVal = std::numeric_limits::quiet_NaN(); avgVal = std::numeric_limits::quiet_NaN(); computed = false; } DocValues::~DocValues() { } int32_t DocValues::intVal(int32_t doc) { return (int32_t)doubleVal(doc); } int64_t DocValues::longVal(int32_t doc) { return (int64_t)doubleVal(doc); } String DocValues::strVal(int32_t doc) { return StringUtils::toString(doubleVal(doc)); } ExplanationPtr DocValues::explain(int32_t doc) { return newLucene(doubleVal(doc), toString(doc)); } CollectionValue DocValues::getInnerArray() { boost::throw_exception(UnsupportedOperationException(L"This optional method is for test purposes only")); return VariantUtils::null(); } void DocValues::compute() { if (computed) return; double sum = 0; int32_t n = 0; while (true) { double val; try { val = doubleVal(n); } catch (IndexOutOfBoundsException&) { break; } sum += val; minVal = MiscUtils::isNaN(minVal) ? val : std::min(minVal, val); maxVal = MiscUtils::isNaN(maxVal) ? val : std::max(maxVal, val); ++n; } avgVal = n == 0 ? std::numeric_limits::quiet_NaN() : sum / (double)n; computed = true; } double DocValues::getMinValue() { compute(); return minVal; } double DocValues::getMaxValue() { compute(); return maxVal; } double DocValues::getAverageValue() { compute(); return avgVal; } } LucenePlusPlus-rel_3.0.4/src/core/search/function/DoubleFieldSource.cpp000066400000000000000000000045731217574114600262000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DoubleFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DoubleFieldSource::DoubleFieldSource(const String& field, DoubleParserPtr parser) : FieldCacheSource(field) { this->parser = parser; } DoubleFieldSource::~DoubleFieldSource() { } String DoubleFieldSource::description() { return L"double(" + FieldCacheSource::description() + L")"; } DocValuesPtr DoubleFieldSource::getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader) { Collection arr(cache->getDoubles(reader, field, parser)); return newLucene(shared_from_this(), arr); } bool DoubleFieldSource::cachedFieldSourceEquals(FieldCacheSourcePtr other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; DoubleFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) return false; return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; } int32_t DoubleFieldSource::cachedFieldSourceHashCode() { return StringUtils::hashCode(parser ? DoubleParser::_getClassName() : DoubleFieldSource::_getClassName()); } DoubleDocValues::DoubleDocValues(DoubleFieldSourcePtr source, Collection arr) { this->_source = source; this->arr = arr; } DoubleDocValues::~DoubleDocValues() { } double DoubleDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) boost::throw_exception(IndexOutOfBoundsException()); return arr[doc]; } String DoubleDocValues::toString(int32_t doc) { return DoubleFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(doubleVal(doc)); } CollectionValue DoubleDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.4/src/core/search/function/FieldCacheSource.cpp000066400000000000000000000024271217574114600257650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheSource.h" #include "FieldCache.h" #include "StringUtils.h" namespace Lucene { FieldCacheSource::FieldCacheSource(const String& field) { this->field = field; } FieldCacheSource::~FieldCacheSource() { } DocValuesPtr FieldCacheSource::getValues(IndexReaderPtr reader) { return getCachedFieldValues(FieldCache::DEFAULT(), field, reader); } String FieldCacheSource::description() { return field; } bool FieldCacheSource::equals(LuceneObjectPtr other) { FieldCacheSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) return false; return field == otherSource->field && cachedFieldSourceEquals(otherSource); } int32_t FieldCacheSource::hashCode() { return StringUtils::hashCode(field) + cachedFieldSourceHashCode(); } } LucenePlusPlus-rel_3.0.4/src/core/search/function/FieldScoreQuery.cpp000066400000000000000000000023301217574114600256730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldScoreQuery.h" #include "ByteFieldSource.h" #include "IntFieldSource.h" #include "DoubleFieldSource.h" namespace Lucene { FieldScoreQuery::FieldScoreQuery(const String& field, Type type) : ValueSourceQuery(getValueSource(field,type)) { } FieldScoreQuery::~FieldScoreQuery() { } ValueSourcePtr FieldScoreQuery::getValueSource(const String& field, Type type) { switch (type) { case BYTE: return newLucene(field); case INT: return newLucene(field); case DOUBLE: return newLucene(field); default: boost::throw_exception(IllegalArgumentException(L"not a known Field Score Query Type")); return ValueSourcePtr(); } } } LucenePlusPlus-rel_3.0.4/src/core/search/function/IntFieldSource.cpp000066400000000000000000000050241217574114600255100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IntFieldSource.h" #include "_IntFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { IntFieldSource::IntFieldSource(const String& field, IntParserPtr parser) : FieldCacheSource(field) { this->parser = parser; } IntFieldSource::~IntFieldSource() { } String IntFieldSource::description() { return L"int(" + FieldCacheSource::description() + L")"; } DocValuesPtr IntFieldSource::getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader) { Collection arr(cache->getInts(reader, field, parser)); return newLucene(shared_from_this(), arr); } bool IntFieldSource::cachedFieldSourceEquals(FieldCacheSourcePtr other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; IntFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) return false; return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; } int32_t IntFieldSource::cachedFieldSourceHashCode() { return StringUtils::hashCode(parser ? IntParser::_getClassName() : IntFieldSource::_getClassName()); } IntDocValues::IntDocValues(IntFieldSourcePtr source, Collection arr) { this->_source = source; this->arr = arr; } IntDocValues::~IntDocValues() { } double IntDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) boost::throw_exception(IndexOutOfBoundsException()); return (double)arr[doc]; } int32_t IntDocValues::intVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) boost::throw_exception(IndexOutOfBoundsException()); return arr[doc]; } String IntDocValues::toString(int32_t doc) { return IntFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); } CollectionValue IntDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.4/src/core/search/function/OrdFieldSource.cpp000066400000000000000000000046361217574114600255120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OrdFieldSource.h" #include "_OrdFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { OrdFieldSource::OrdFieldSource(const String& field) { this->field = field; } OrdFieldSource::~OrdFieldSource() { } String OrdFieldSource::description() { return L"ord(" + field + L")"; } DocValuesPtr OrdFieldSource::getValues(IndexReaderPtr reader) { Collection arr(FieldCache::DEFAULT()->getStringIndex(reader, field)->order); return newLucene(shared_from_this(), arr); } bool OrdFieldSource::equals(LuceneObjectPtr other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; OrdFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) return false; return field == otherSource->field; } int32_t OrdFieldSource::hashCode() { return StringUtils::hashCode(OrdFieldSource::_getClassName()) + StringUtils::hashCode(field); } OrdDocValues::OrdDocValues(OrdFieldSourcePtr source, Collection arr) { this->_source = source; this->arr = arr; } OrdDocValues::~OrdDocValues() { } double OrdDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) boost::throw_exception(IndexOutOfBoundsException()); return (double)arr[doc]; } String OrdDocValues::strVal(int32_t doc) { // the string value of the ordinal, not the string itself if (doc < 0 || doc >= arr.size()) boost::throw_exception(IndexOutOfBoundsException()); return StringUtils::toString(arr[doc]); } String OrdDocValues::toString(int32_t doc) { return OrdFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); } CollectionValue OrdDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.4/src/core/search/function/ReverseOrdFieldSource.cpp000066400000000000000000000054411217574114600270410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReverseOrdFieldSource.h" #include "_ReverseOrdFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { ReverseOrdFieldSource::ReverseOrdFieldSource(const String& field) { this->field = field; } ReverseOrdFieldSource::~ReverseOrdFieldSource() { } String ReverseOrdFieldSource::description() { return L"rord(" + field + L")"; } DocValuesPtr ReverseOrdFieldSource::getValues(IndexReaderPtr reader) { StringIndexPtr sindex(FieldCache::DEFAULT()->getStringIndex(reader, field)); Collection arr(sindex->order); int32_t end = sindex->lookup.size(); return newLucene(shared_from_this(), arr, end); } bool ReverseOrdFieldSource::equals(LuceneObjectPtr other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; ReverseOrdFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) return false; return field == otherSource->field; } int32_t ReverseOrdFieldSource::hashCode() { return StringUtils::hashCode(ReverseOrdFieldSource::_getClassName()) + StringUtils::hashCode(field); } ReverseOrdDocValues::ReverseOrdDocValues(ReverseOrdFieldSourcePtr source, Collection arr, int32_t end) { this->_source = source; this->arr = arr; this->end = end; } ReverseOrdDocValues::~ReverseOrdDocValues() { } double ReverseOrdDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) boost::throw_exception(IndexOutOfBoundsException()); return (double)(end - arr[doc]); } int32_t ReverseOrdDocValues::intVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) boost::throw_exception(IndexOutOfBoundsException()); return (end - arr[doc]); } String ReverseOrdDocValues::strVal(int32_t doc) { // the string value of the ordinal, not the string itself return StringUtils::toString(intVal(doc)); } String ReverseOrdDocValues::toString(int32_t doc) { return ReverseOrdFieldSourcePtr(_source)->description() + L"=" + strVal(doc); } CollectionValue ReverseOrdDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.4/src/core/search/function/ValueSource.cpp000066400000000000000000000010271217574114600250650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ValueSource.h" namespace Lucene { ValueSource::~ValueSource() { } String ValueSource::toString() { return description(); } } LucenePlusPlus-rel_3.0.4/src/core/search/function/ValueSourceQuery.cpp000066400000000000000000000107641217574114600261230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ValueSourceQuery.h" #include "_ValueSourceQuery.h" #include "ValueSource.h" #include "DocValues.h" #include "ComplexExplanation.h" #include "IndexReader.h" #include "TermDocs.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { ValueSourceQuery::ValueSourceQuery(ValueSourcePtr valSrc) { this->valSrc = valSrc; } ValueSourceQuery::~ValueSourceQuery() { } QueryPtr ValueSourceQuery::rewrite(IndexReaderPtr reader) { return shared_from_this(); } void ValueSourceQuery::extractTerms(SetTerm terms) { // no terms involved here } WeightPtr ValueSourceQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } String ValueSourceQuery::toString(const String& field) { return valSrc->toString() + boostString(); } bool ValueSourceQuery::equals(LuceneObjectPtr other) { ValueSourceQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; return (getBoost() == otherQuery->getBoost() && valSrc->equals(otherQuery->valSrc)); } int32_t ValueSourceQuery::hashCode() { return (StringUtils::hashCode(ValueSourceQuery::_getClassName()) + valSrc->hashCode()) ^ MiscUtils::doubleToIntBits(getBoost()); } LuceneObjectPtr ValueSourceQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(valSrc); ValueSourceQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->valSrc = valSrc; return cloneQuery; } ValueSourceWeight::ValueSourceWeight(ValueSourceQueryPtr query, SearcherPtr searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); } ValueSourceWeight::~ValueSourceWeight() { } QueryPtr ValueSourceWeight::getQuery() { return query; } double ValueSourceWeight::getValue() { return queryWeight; } double ValueSourceWeight::sumOfSquaredWeights() { queryWeight = query->getBoost(); return queryWeight * queryWeight; } void ValueSourceWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; } ScorerPtr ValueSourceWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(similarity, reader, shared_from_this()); } ExplanationPtr ValueSourceWeight::explain(IndexReaderPtr reader, int32_t doc) { DocValuesPtr vals(query->valSrc->getValues(reader)); double sc = queryWeight * vals->doubleVal(doc); ExplanationPtr result(newLucene(true, sc, query->toString() + L", product of:")); result->addDetail(vals->explain(doc)); result->addDetail(newLucene(query->getBoost(), L"boost")); result->addDetail(newLucene(queryNorm, L"queryNorm")); return result; } ValueSourceScorer::ValueSourceScorer(SimilarityPtr similarity, IndexReaderPtr reader, ValueSourceWeightPtr weight) : Scorer(similarity) { this->weight = weight; this->qWeight = weight->getValue(); this->doc = -1; // this is when/where the values are first created. vals = weight->query->valSrc->getValues(reader); termDocs = reader->termDocs(TermPtr()); } ValueSourceScorer::~ValueSourceScorer() { } int32_t ValueSourceScorer::nextDoc() { doc = termDocs->next() ? termDocs->doc() : NO_MORE_DOCS; return doc; } int32_t ValueSourceScorer::docID() { return doc; } int32_t ValueSourceScorer::advance(int32_t target) { doc = termDocs->skipTo(target) ? termDocs->doc() : NO_MORE_DOCS; return doc; } double ValueSourceScorer::score() { return qWeight * vals->doubleVal(termDocs->doc()); } } LucenePlusPlus-rel_3.0.4/src/core/search/payloads/000077500000000000000000000000001217574114600221135ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/search/payloads/AveragePayloadFunction.cpp000066400000000000000000000030561217574114600272150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AveragePayloadFunction.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { AveragePayloadFunction::~AveragePayloadFunction() { } double AveragePayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) { return currentPayloadScore + currentScore; } double AveragePayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) { return numPayloadsSeen > 0 ? (payloadScore / (double)numPayloadsSeen) : 1.0; } int32_t AveragePayloadFunction::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + StringUtils::hashCode(getClassName()); return result; } bool AveragePayloadFunction::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!other) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; return true; } } LucenePlusPlus-rel_3.0.4/src/core/search/payloads/MaxPayloadFunction.cpp000066400000000000000000000031321217574114600263630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MaxPayloadFunction.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { MaxPayloadFunction::~MaxPayloadFunction() { } double MaxPayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) { if (numPayloadsSeen == 0) return currentPayloadScore; else return std::max(currentPayloadScore, currentScore); } double MaxPayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) { return numPayloadsSeen > 0 ? payloadScore : 1.0; } int32_t MaxPayloadFunction::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + StringUtils::hashCode(getClassName()); return result; } bool MaxPayloadFunction::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!other) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; return true; } } LucenePlusPlus-rel_3.0.4/src/core/search/payloads/MinPayloadFunction.cpp000066400000000000000000000031321217574114600263610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MinPayloadFunction.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { MinPayloadFunction::~MinPayloadFunction() { } double MinPayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) { if (numPayloadsSeen == 0) return currentPayloadScore; else return std::min(currentPayloadScore, currentScore); } double MinPayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) { return numPayloadsSeen > 0 ? payloadScore : 1.0; } int32_t MinPayloadFunction::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + StringUtils::hashCode(getClassName()); return result; } bool MinPayloadFunction::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!other) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; return true; } } LucenePlusPlus-rel_3.0.4/src/core/search/payloads/PayloadFunction.cpp000066400000000000000000000010051217574114600257120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadFunction.h" namespace Lucene { PayloadFunction::PayloadFunction() { } PayloadFunction::~PayloadFunction() { } } LucenePlusPlus-rel_3.0.4/src/core/search/payloads/PayloadNearQuery.cpp000066400000000000000000000170061217574114600260500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadNearQuery.h" #include "AveragePayloadFunction.h" #include "IndexReader.h" #include "NearSpansOrdered.h" #include "NearSpansUnordered.h" #include "Similarity.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { PayloadNearQuery::PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder) : SpanNearQuery(clauses, slop, inOrder) { fieldName = clauses[0]->getField(); // all clauses must have same field this->function = newLucene(); } PayloadNearQuery::PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder, PayloadFunctionPtr function) : SpanNearQuery(clauses, slop, inOrder) { fieldName = clauses[0]->getField(); // all clauses must have same field this->function = function; } PayloadNearQuery::~PayloadNearQuery() { } WeightPtr PayloadNearQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } LuceneObjectPtr PayloadNearQuery::clone(LuceneObjectPtr other) { int32_t sz = clauses.size(); Collection newClauses(Collection::newInstance(sz)); for (int32_t i = 0; i < sz; ++i) newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); PayloadNearQueryPtr payloadNearQuery(newLucene(newClauses, slop, inOrder)); payloadNearQuery->setBoost(getBoost()); return payloadNearQuery; } String PayloadNearQuery::toString(const String& field) { StringStream buffer; buffer << L"payloadNear(["; for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (clause != clauses.begin()) buffer << L", "; buffer << (*clause)->toString(field); } buffer << L"], " << slop << L", " << inOrder << L")" << boostString(); return buffer.str(); } bool PayloadNearQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!SpanNearQuery::equals(other)) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; PayloadNearQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; if (fieldName != otherQuery->fieldName) return false; if (!function) { if (otherQuery->function) return false; } else if (!function->equals(otherQuery->function)) return false; return true; } int32_t PayloadNearQuery::hashCode() { int32_t prime = 31; int32_t result = SpanNearQuery::hashCode(); result = prime * result + (fieldName.empty() ? 0 : StringUtils::hashCode(fieldName)); result = prime * result + (!function ? 0 : function->hashCode()); return result; } PayloadNearSpanWeight::PayloadNearSpanWeight(SpanQueryPtr query, SearcherPtr searcher) : SpanWeight(query, searcher) { } PayloadNearSpanWeight::~PayloadNearSpanWeight() { } ScorerPtr PayloadNearSpanWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(query->getSpans(reader), shared_from_this(), similarity, reader->norms(query->getField())); } PayloadNearSpanScorer::PayloadNearSpanScorer(SpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms) : SpanScorer(spans, weight, similarity, norms) { this->spans = spans; this->payloadScore = 0.0; this->payloadsSeen = 0; this->similarity = getSimilarity(); } PayloadNearSpanScorer::~PayloadNearSpanScorer() { } void PayloadNearSpanScorer::getPayloads(Collection subSpans) { for (Collection::iterator span = subSpans.begin(); span != subSpans.end(); ++span) { if (MiscUtils::typeOf(*span)) { NearSpansOrderedPtr ordered(boost::static_pointer_cast(*span)); if (ordered->isPayloadAvailable()) processPayloads(ordered->getPayload(), ordered->start(), ordered->end()); getPayloads(ordered->getSubSpans()); } else if (MiscUtils::typeOf(*span)) { NearSpansUnorderedPtr unordered(boost::static_pointer_cast(*span)); if (unordered->isPayloadAvailable()) processPayloads(unordered->getPayload(), unordered->start(), unordered->end()); getPayloads(unordered->getSubSpans()); } } } void PayloadNearSpanScorer::processPayloads(Collection payLoads, int32_t start, int32_t end) { PayloadNearSpanWeightPtr spanWeight(boost::static_pointer_cast(weight)); PayloadNearQueryPtr nearQuery(boost::static_pointer_cast(spanWeight->query)); for (Collection::iterator payload = payLoads.begin(); payload != payLoads.end(); ++payload) { payloadScore = nearQuery->function->currentScore(doc, nearQuery->fieldName, start, end, payloadsSeen, payloadScore, similarity->scorePayload(doc, nearQuery->fieldName, spans->start(), spans->end(), *payload, 0, payload->size())); ++payloadsSeen; } } bool PayloadNearSpanScorer::setFreqCurrentDoc() { if (!more) return false; Collection spansArr(newCollection(spans)); payloadScore = 0.0; payloadsSeen = 0; getPayloads(spansArr); return SpanScorer::setFreqCurrentDoc(); } double PayloadNearSpanScorer::score() { PayloadNearSpanWeightPtr spanWeight(boost::static_pointer_cast(weight)); PayloadNearQueryPtr nearQuery(boost::static_pointer_cast(spanWeight->query)); return SpanScorer::score() * nearQuery->function->docScore(doc, nearQuery->fieldName, payloadsSeen, payloadScore); } ExplanationPtr PayloadNearSpanScorer::explain(int32_t doc) { ExplanationPtr result(newLucene()); ExplanationPtr nonPayloadExpl(SpanScorer::explain(doc)); result->addDetail(nonPayloadExpl); ExplanationPtr payloadBoost(newLucene()); result->addDetail(payloadBoost); double avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / (double)payloadsSeen) : 1.0); payloadBoost->setValue(avgPayloadScore); payloadBoost->setDescription(L"scorePayload(...)"); result->setValue(nonPayloadExpl->getValue() * avgPayloadScore); result->setDescription(L"bnq, product of:"); return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/payloads/PayloadSpanUtil.cpp000066400000000000000000000156631217574114600257030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadSpanUtil.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "PhraseQuery.h" #include "SpanTermQuery.h" #include "SpanNearQuery.h" #include "SpanOrQuery.h" #include "TermQuery.h" #include "FilteredQuery.h" #include "DisjunctionMaxQuery.h" #include "MultiPhraseQuery.h" #include "Term.h" #include "Spans.h" #include "MiscUtils.h" namespace Lucene { PayloadSpanUtil::PayloadSpanUtil(IndexReaderPtr reader) { this->reader = reader; } PayloadSpanUtil::~PayloadSpanUtil() { } Collection PayloadSpanUtil::getPayloadsForQuery(QueryPtr query) { Collection payloads(Collection::newInstance()); queryToSpanQuery(query, payloads); return payloads; } void PayloadSpanUtil::queryToSpanQuery(QueryPtr query, Collection payloads) { if (MiscUtils::typeOf(query)) { BooleanQueryPtr booleanQuery(boost::dynamic_pointer_cast(query)); Collection queryClauses(booleanQuery->getClauses()); for (Collection::iterator clause = queryClauses.begin(); clause != queryClauses.end(); ++clause) { if (!(*clause)->isProhibited()) queryToSpanQuery((*clause)->getQuery(), payloads); } } else if (MiscUtils::typeOf(query)) { PhraseQueryPtr phraseQuery(boost::dynamic_pointer_cast(query)); Collection phraseQueryTerms(phraseQuery->getTerms()); Collection clauses(Collection::newInstance(phraseQueryTerms.size())); for (int32_t i = 0; i < phraseQueryTerms.size(); ++i) clauses[i] = newLucene(phraseQueryTerms[i]); int32_t slop = phraseQuery->getSlop(); bool inorder = false; if (slop == 0) inorder = true; SpanNearQueryPtr sp(newLucene(clauses, slop, inorder)); sp->setBoost(query->getBoost()); getPayloads(payloads, sp); } else if (MiscUtils::typeOf(query)) { TermQueryPtr termQuery(boost::dynamic_pointer_cast(query)); SpanTermQueryPtr stq(newLucene(termQuery->getTerm())); stq->setBoost(query->getBoost()); getPayloads(payloads, stq); } else if (MiscUtils::typeOf(query)) { SpanQueryPtr spanQuery(boost::dynamic_pointer_cast(query)); getPayloads(payloads, spanQuery); } else if (MiscUtils::typeOf(query)) { FilteredQueryPtr filteredQuery(boost::dynamic_pointer_cast(query)); queryToSpanQuery(filteredQuery->getQuery(), payloads); } else if (MiscUtils::typeOf(query)) { DisjunctionMaxQueryPtr maxQuery(boost::dynamic_pointer_cast(query)); for (Collection::iterator disjunct = maxQuery->begin(); disjunct != maxQuery->end(); ++disjunct) queryToSpanQuery(*disjunct, payloads); } else if (MiscUtils::typeOf(query)) { MultiPhraseQueryPtr multiphraseQuery(boost::dynamic_pointer_cast(query)); Collection< Collection > termArrays(multiphraseQuery->getTermArrays()); Collection positions(multiphraseQuery->getPositions()); if (!positions.empty()) { int32_t maxPosition = positions[positions.size() - 1]; for (int32_t i = 0; i < positions.size() - 1; ++i) { if (positions[i] > maxPosition) maxPosition = positions[i]; } Collection< Collection > disjunctLists(Collection< Collection >::newInstance(maxPosition + 1)); int32_t distinctPositions = 0; for (int32_t i = 0; i < termArrays.size(); ++i) { Collection termArray(termArrays[i]); Collection disjuncts(disjunctLists[positions[i]]); if (!disjuncts) { disjuncts = Collection::newInstance(); disjunctLists[positions[i]] = disjuncts; ++distinctPositions; } for (Collection::iterator term = termArray.begin(); term != termArray.end(); ++term) disjuncts.add(newLucene(*term)); } int32_t positionGaps = 0; int32_t position = 0; Collection clauses(Collection::newInstance(distinctPositions)); for (int32_t i = 0; i < disjunctLists.size(); ++i) { Collection disjuncts(disjunctLists[i]); if (disjuncts) { Collection spanDisjuncts(Collection::newInstance(disjuncts.size())); for (int32_t j = 0; j < disjuncts.size(); ++j) spanDisjuncts[j] = boost::dynamic_pointer_cast(disjuncts[j]); clauses[position++] = newLucene(spanDisjuncts); } else ++positionGaps; } int32_t slop = multiphraseQuery->getSlop(); bool inorder = (slop == 0); SpanNearQueryPtr sp(newLucene(clauses, slop + positionGaps, inorder)); sp->setBoost(query->getBoost()); getPayloads(payloads, sp); } } } void PayloadSpanUtil::getPayloads(Collection payloads, SpanQueryPtr query) { SpansPtr spans(query->getSpans(reader)); while (spans->next()) { if (spans->isPayloadAvailable()) { Collection payload(spans->getPayload()); for (Collection::iterator bytes = payload.begin(); bytes != payload.end(); ++bytes) payloads.add(*bytes); } } } } LucenePlusPlus-rel_3.0.4/src/core/search/payloads/PayloadTermQuery.cpp000066400000000000000000000151541217574114600260740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadTermQuery.h" #include "_PayloadTermQuery.h" #include "Term.h" #include "TermSpans.h" #include "TermPositions.h" #include "ComplexExplanation.h" #include "IndexReader.h" #include "Similarity.h" #include "PayloadFunction.h" #include "MiscUtils.h" namespace Lucene { PayloadTermQuery::PayloadTermQuery(TermPtr term, PayloadFunctionPtr function, bool includeSpanScore) : SpanTermQuery(term) { this->function = function; this->includeSpanScore = includeSpanScore; } PayloadTermQuery::~PayloadTermQuery() { } WeightPtr PayloadTermQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } LuceneObjectPtr PayloadTermQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(term, function, includeSpanScore)); PayloadTermQueryPtr termQuery(boost::dynamic_pointer_cast(clone)); termQuery->function = function; termQuery->includeSpanScore = includeSpanScore; return termQuery; } bool PayloadTermQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!SpanTermQuery::equals(other)) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; PayloadTermQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; if (!function) { if (otherQuery->function) return false; } else if (!function->equals(otherQuery->function)) return false; if (includeSpanScore != otherQuery->includeSpanScore) return false; return true; } int32_t PayloadTermQuery::hashCode() { int32_t prime = 31; int32_t result = SpanTermQuery::hashCode(); result = prime * result + (function ? function->hashCode() : 0); result = prime * result + (includeSpanScore ? 1231 : 1237); return result; } PayloadTermWeight::PayloadTermWeight(PayloadTermQueryPtr query, SearcherPtr searcher) : SpanWeight(query, searcher) { } PayloadTermWeight::~PayloadTermWeight() { } ScorerPtr PayloadTermWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(boost::dynamic_pointer_cast(query->getSpans(reader)), shared_from_this(), similarity, reader->norms(query->getField())); } PayloadTermSpanScorer::PayloadTermSpanScorer(TermSpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms) : SpanScorer(spans, weight, similarity, norms) { positions = spans->getPositions(); payload = ByteArray::newInstance(256); payloadScore = 0.0; payloadsSeen = 0; } PayloadTermSpanScorer::~PayloadTermSpanScorer() { } bool PayloadTermSpanScorer::setFreqCurrentDoc() { if (!more) return false; doc = spans->doc(); freq = 0.0; payloadScore = 0.0; payloadsSeen = 0; SimilarityPtr similarity1(getSimilarity()); while (more && doc == spans->doc()) { int32_t matchLength = spans->end() - spans->start(); freq += similarity1->sloppyFreq(matchLength); processPayload(similarity1); more = spans->next(); // this moves positions to the next match in this document } return more || (freq != 0); } void PayloadTermSpanScorer::processPayload(SimilarityPtr similarity) { if (positions->isPayloadAvailable()) { PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); payload = positions->getPayload(payload, 0); payloadScore = payloadQuery->function->currentScore(doc, payloadQuery->term->field(), spans->start(), spans->end(), payloadsSeen, payloadScore, similarity->scorePayload(doc, payloadQuery->term->field(), spans->start(), spans->end(), payload, 0, positions->getPayloadLength())); ++payloadsSeen; } else { // zero out the payload? } } double PayloadTermSpanScorer::score() { PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); return payloadQuery->includeSpanScore ? getSpanScore() * getPayloadScore() : getPayloadScore(); } double PayloadTermSpanScorer::getSpanScore() { return SpanScorer::score(); } double PayloadTermSpanScorer::getPayloadScore() { PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); return payloadQuery->function->docScore(doc, payloadQuery->term->field(), payloadsSeen, payloadScore); } ExplanationPtr PayloadTermSpanScorer::explain(int32_t doc) { ComplexExplanationPtr result(newLucene()); ExplanationPtr nonPayloadExpl(SpanScorer::explain(doc)); result->addDetail(nonPayloadExpl); ExplanationPtr payloadBoost(newLucene()); result->addDetail(payloadBoost); double payloadScore = getPayloadScore(); payloadBoost->setValue(payloadScore); payloadBoost->setDescription(L"scorePayload(...)"); result->setValue(nonPayloadExpl->getValue() * payloadScore); result->setDescription(L"btq, product of:"); result->setMatch(nonPayloadExpl->getValue() != 0.0); return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/000077500000000000000000000000001217574114600214235ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/search/spans/FieldMaskingSpanQuery.cpp000066400000000000000000000066511217574114600263440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldMaskingSpanQuery.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { FieldMaskingSpanQuery::FieldMaskingSpanQuery(SpanQueryPtr maskedQuery, const String& maskedField) { this->maskedQuery = maskedQuery; this->field = maskedField; } FieldMaskingSpanQuery::~FieldMaskingSpanQuery() { } String FieldMaskingSpanQuery::getField() { return field; } SpanQueryPtr FieldMaskingSpanQuery::getMaskedQuery() { return maskedQuery; } // :NOTE: getBoost and setBoost are not proxied to the maskedQuery // ...this is done to be more consistent with things like SpanFirstQuery SpansPtr FieldMaskingSpanQuery::getSpans(IndexReaderPtr reader) { return maskedQuery->getSpans(reader); } void FieldMaskingSpanQuery::extractTerms(SetTerm terms) { maskedQuery->extractTerms(terms); } WeightPtr FieldMaskingSpanQuery::createWeight(SearcherPtr searcher) { return maskedQuery->createWeight(searcher); } SimilarityPtr FieldMaskingSpanQuery::getSimilarity(SearcherPtr searcher) { return maskedQuery->getSimilarity(searcher); } QueryPtr FieldMaskingSpanQuery::rewrite(IndexReaderPtr reader) { FieldMaskingSpanQueryPtr clone; SpanQueryPtr rewritten(boost::dynamic_pointer_cast(maskedQuery->rewrite(reader))); if (rewritten != maskedQuery) { clone = boost::dynamic_pointer_cast(this->clone()); clone->maskedQuery = rewritten; } if (clone) return clone; else return shared_from_this(); } String FieldMaskingSpanQuery::toString(const String& field) { StringStream buffer; buffer << L"mask(" << maskedQuery->toString(field) << L")"; buffer << boostString() << L" as " << this->field; return buffer.str(); } bool FieldMaskingSpanQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; FieldMaskingSpanQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; return (getField() == otherQuery->getField() && getBoost() == otherQuery->getBoost() && getMaskedQuery()->equals(otherQuery->getMaskedQuery())); } int32_t FieldMaskingSpanQuery::hashCode() { return getMaskedQuery()->hashCode() ^ StringUtils::hashCode(getField()) ^ MiscUtils::doubleToRawIntBits(getBoost()); } LuceneObjectPtr FieldMaskingSpanQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(maskedQuery, field)); FieldMaskingSpanQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->maskedQuery = maskedQuery; cloneQuery->field = field; return cloneQuery; } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/NearSpansOrdered.cpp000066400000000000000000000232151217574114600253310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NearSpansOrdered.h" #include "SpanNearQuery.h" namespace Lucene { NearSpansOrdered::NearSpansOrdered(SpanNearQueryPtr spanNearQuery, IndexReaderPtr reader, bool collectPayloads) { if (spanNearQuery->getClauses().size() < 2) boost::throw_exception(IllegalArgumentException(L"Less than 2 clauses: " + spanNearQuery->toString())); this->firstTime = true; this->more = false; this->inSameDoc = false; this->matchDoc = -1; this->matchStart = -1; this->matchEnd = -1; this->collectPayloads = collectPayloads; this->allowedSlop = spanNearQuery->getSlop(); Collection clauses(spanNearQuery->getClauses()); this->subSpans = Collection::newInstance(clauses.size()); this->matchPayload = Collection::newInstance(); this->subSpansByDoc = Collection::newInstance(clauses.size()); for (int32_t i = 0; i < clauses.size(); ++i) { subSpans[i] = clauses[i]->getSpans(reader); subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } this->query = spanNearQuery; // kept for toString() only. } NearSpansOrdered::~NearSpansOrdered() { } int32_t NearSpansOrdered::doc() { return matchDoc; } int32_t NearSpansOrdered::start() { return matchStart; } int32_t NearSpansOrdered::end() { return matchEnd; } Collection NearSpansOrdered::getSubSpans() { return subSpans; } Collection NearSpansOrdered::getPayload() { return matchPayload; } bool NearSpansOrdered::isPayloadAvailable() { return !matchPayload.empty(); } bool NearSpansOrdered::next() { if (firstTime) { firstTime = false; for (int32_t i = 0; i < subSpans.size(); ++i) { if (!subSpans[i]->next()) { more = false; return false; } } more = true; } if (collectPayloads) matchPayload.clear(); return advanceAfterOrdered(); } bool NearSpansOrdered::skipTo(int32_t target) { if (firstTime) { firstTime = false; for (int32_t i = 0; i < subSpans.size(); ++i) { if (!subSpans[i]->skipTo(target)) { more = false; return false; } } more = true; } else if (more && (subSpans[0]->doc() < target)) { if (subSpans[0]->skipTo(target)) inSameDoc = false; else { more = false; return false; } } if (collectPayloads) matchPayload.clear(); return advanceAfterOrdered(); } bool NearSpansOrdered::advanceAfterOrdered() { while (more && (inSameDoc || toSameDoc())) { if (stretchToOrder() && shrinkToAfterShortestMatch()) return true; } return false; // no more matches } struct lessSpanDoc { inline bool operator()(const SpansPtr& first, const SpansPtr& second) const { return ((first->doc() - second->doc()) < 0); } }; bool NearSpansOrdered::toSameDoc() { std::sort(subSpansByDoc.begin(), subSpansByDoc.end(), lessSpanDoc()); int32_t firstIndex = 0; int32_t maxDoc = subSpansByDoc[subSpansByDoc.size() - 1]->doc(); while (subSpansByDoc[firstIndex]->doc() != maxDoc) { if (!subSpansByDoc[firstIndex]->skipTo(maxDoc)) { more = false; inSameDoc = false; return false; } maxDoc = subSpansByDoc[firstIndex]->doc(); if (++firstIndex == subSpansByDoc.size()) firstIndex = 0; } for (int32_t i = 0; i < subSpansByDoc.size(); ++i) { BOOST_ASSERT(subSpansByDoc[i]->doc() == maxDoc); } inSameDoc = true; return true; } bool NearSpansOrdered::docSpansOrdered(SpansPtr spans1, SpansPtr spans2) { BOOST_ASSERT(spans1->doc() == spans2->doc()); int32_t start1 = spans1->start(); int32_t start2 = spans2->start(); // Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() return start1 == start2 ? (spans1->end() < spans2->end()) : (start1 < start2); } bool NearSpansOrdered::docSpansOrdered(int32_t start1, int32_t end1, int32_t start2, int32_t end2) { return start1 == start2 ? (end1 < end2) : (start1 < start2); } bool NearSpansOrdered::stretchToOrder() { matchDoc = subSpans[0]->doc(); for (int32_t i = 1; inSameDoc && (i < subSpans.size()); ++i) { while (!docSpansOrdered(subSpans[i - 1], subSpans[i])) { if (!subSpans[i]->next()) { inSameDoc = false; more = false; break; } else if (matchDoc != subSpans[i]->doc()) { inSameDoc = false; break; } } } return inSameDoc; } bool NearSpansOrdered::shrinkToAfterShortestMatch() { SpansPtr subSpan(subSpans[subSpans.size() - 1]); matchStart = subSpan->start(); matchEnd = subSpan->end(); SetByteArray possibleMatchPayloads(SetByteArray::newInstance()); if (subSpan->isPayloadAvailable()) { Collection payload(subSpan->getPayload()); possibleMatchPayloads.addAll(payload.begin(), payload.end()); } Collection possiblePayload; int32_t matchSlop = 0; int32_t lastStart = matchStart; int32_t lastEnd = matchEnd; for (int32_t i = subSpans.size() - 2; i >= 0; --i) { SpansPtr prevSpans(subSpans[i]); if (collectPayloads && prevSpans->isPayloadAvailable()) { Collection payload(prevSpans->getPayload()); possiblePayload = Collection::newInstance(payload.begin(), payload.end()); } int32_t prevStart = prevSpans->start(); int32_t prevEnd = prevSpans->end(); while (true) // Advance prevSpans until after (lastStart, lastEnd) { if (!prevSpans->next()) { inSameDoc = false; more = false; break; // Check remaining subSpans for final match. } else if (matchDoc != prevSpans->doc()) { inSameDoc = false; // The last subSpans is not advanced here. break; // Check remaining subSpans for last match in this document. } else { int32_t ppStart = prevSpans->start(); int32_t ppEnd = prevSpans->end(); // Cannot avoid invoking .end() if (!docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) break; // Check remaining subSpans. else { prevStart = ppStart; prevEnd = ppEnd; if (collectPayloads && prevSpans->isPayloadAvailable()) { Collection payload(prevSpans->getPayload()); possiblePayload = Collection::newInstance(payload.begin(), payload.end()); } } } } if (collectPayloads && possiblePayload) possibleMatchPayloads.addAll(possiblePayload.begin(), possiblePayload.end()); BOOST_ASSERT(prevStart <= matchStart); if (matchStart > prevEnd) // Only non overlapping spans add to slop. matchSlop += (matchStart - prevEnd); // Do not break on (matchSlop > allowedSlop) here to make sure that subSpans[0] is // advanced after the match, if any. matchStart = prevStart; lastStart = prevStart; lastEnd = prevEnd; } bool match = (matchSlop <= allowedSlop); if (collectPayloads && match && !possibleMatchPayloads.empty()) matchPayload.addAll(possibleMatchPayloads.begin(), possibleMatchPayloads.end()); return match; // ordered and allowed slop } String NearSpansOrdered::toString() { StringStream buffer; buffer << getClassName() << L"(" << query->toString() << L")@"; if (firstTime) buffer << L"START"; else { if (more) buffer << doc() << L":" << start() << L"-" << end(); else buffer << L"END"; } return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/NearSpansUnordered.cpp000066400000000000000000000211751217574114600256770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NearSpansUnordered.h" #include "_NearSpansUnordered.h" #include "NearSpansOrdered.h" #include "SpanNearQuery.h" #include "StringUtils.h" namespace Lucene { NearSpansUnordered::NearSpansUnordered(SpanNearQueryPtr query, IndexReaderPtr reader) { this->query = query; this->reader = reader; } NearSpansUnordered::~NearSpansUnordered() { } void NearSpansUnordered::initialize() { this->slop = query->getSlop(); this->totalLength = 0; this->more = true; this->firstTime = true; Collection clauses(query->getClauses()); queue = newLucene(clauses.size()); subSpans = Collection::newInstance(clauses.size()); ordered = Collection::newInstance(); for (int32_t i = 0; i < clauses.size(); ++i) { SpansCellPtr cell(newLucene(shared_from_this(), clauses[i]->getSpans(reader), i)); ordered.add(cell); subSpans[i] = cell->spans; } } Collection NearSpansUnordered::getSubSpans() { return subSpans; } bool NearSpansUnordered::next() { if (firstTime) { initList(true); listToQueue(); // initialize queue firstTime = false; } else if (more) { if (min()->next()) // trigger further scanning queue->updateTop(); // maintain queue else more = false; } while (more) { bool queueStale = false; if (min()->doc() != max->doc()) // maintain list { queueToList(); queueStale = true; } // skip to doc with all clauses while (more && first->doc() < last->doc()) { more = first->skipTo(last->doc()); // skip first upto last firstToLast(); // and move it to the end queueStale = true; } if (!more) return false; // found doc with all clauses if (queueStale) // maintain the queue { listToQueue(); queueStale = false; } if (atMatch()) return true; more = min()->next(); if (more) queue->updateTop(); // maintain queue } return false; // no more matches } bool NearSpansUnordered::skipTo(int32_t target) { if (firstTime) // initialize { initList(false); for (SpansCellPtr cell(first); more && cell; cell = cell->_next) more = cell->skipTo(target); // skip all if (more) listToQueue(); firstTime = false; } else // normal case { while (more && min()->doc() < target) // skip as needed { if (min()->skipTo(target)) queue->updateTop(); else more = false; } } return (more && (atMatch() || next())); } SpansCellPtr NearSpansUnordered::min() { return queue->top(); } int32_t NearSpansUnordered::doc() { return min()->doc(); } int32_t NearSpansUnordered::start() { return min()->start(); } int32_t NearSpansUnordered::end() { return max->end(); } Collection NearSpansUnordered::getPayload() { SetByteArray matchPayload(SetByteArray::newInstance()); for (SpansCellPtr cell(first); cell; cell = cell->_next) { if (cell->isPayloadAvailable()) { Collection payload(cell->getPayload()); matchPayload.addAll(payload.begin(), payload.end()); } } return Collection::newInstance(matchPayload.begin(), matchPayload.end()); } bool NearSpansUnordered::isPayloadAvailable() { SpansCellPtr pointer(min()); while (pointer) { if (pointer->isPayloadAvailable()) return true; pointer = pointer->_next; } return false; } String NearSpansUnordered::toString() { StringStream buffer; buffer << getClassName() << L"(" << query->toString() << L")@"; if (firstTime) buffer << L"START"; else { if (more) buffer << doc() << L":" << start() << L"-" << end(); else buffer << L"END"; } return buffer.str(); } void NearSpansUnordered::initList(bool next) { for (Collection::iterator cell = ordered.begin(); more && cell != ordered.end(); ++cell) { if (next) more = (*cell)->next(); // move to first entry if (more) addToList(*cell); // add to list } } void NearSpansUnordered::addToList(SpansCellPtr cell) { if (last) // add next to end of list last->_next = cell; else first = cell; last = cell; cell->_next.reset(); } void NearSpansUnordered::firstToLast() { last->_next = first; // move first to end of list last = first; first = first->_next; last->_next.reset(); } void NearSpansUnordered::queueToList() { first.reset(); last.reset(); while (queue->top()) addToList(queue->pop()); } void NearSpansUnordered::listToQueue() { queue->clear(); // rebuild queue for (SpansCellPtr cell(first); cell; cell = cell->_next) queue->add(cell); // add to queue from list } bool NearSpansUnordered::atMatch() { return ((min()->doc() == max->doc()) && ((max->end() - min()->start() - totalLength) <= slop)); } SpansCell::SpansCell(NearSpansUnorderedPtr unordered, SpansPtr spans, int32_t index) { this->_unordered = unordered; this->spans = spans; this->index = index; this->length = -1; } SpansCell::~SpansCell() { } bool SpansCell::next() { return adjust(spans->next()); } bool SpansCell::skipTo(int32_t target) { return adjust(spans->skipTo(target)); } bool SpansCell::adjust(bool condition) { NearSpansUnorderedPtr unordered(_unordered); if (length != -1) unordered->totalLength -= length; // subtract old length if (condition) { length = end() - start(); unordered->totalLength += length; // add new length if (!unordered->max || doc() > unordered->max->doc() || (doc() == unordered->max->doc()) && (end() > unordered->max->end())) unordered->max = shared_from_this(); } unordered->more = condition; return condition; } int32_t SpansCell::doc() { return spans->doc(); } int32_t SpansCell::start() { return spans->start(); } int32_t SpansCell::end() { return spans->end(); } Collection SpansCell::getPayload() { Collection payload(spans->getPayload()); return Collection::newInstance(payload.begin(), payload.end()); } bool SpansCell::isPayloadAvailable() { return spans->isPayloadAvailable(); } String SpansCell::toString() { return spans->toString() + L"#" + StringUtils::toString(index); } CellQueue::CellQueue(int32_t size) : PriorityQueue(size) { } CellQueue::~CellQueue() { } bool CellQueue::lessThan(const SpansCellPtr& first, const SpansCellPtr& second) { if (first->doc() == second->doc()) return NearSpansOrdered::docSpansOrdered(first, second); else return (first->doc() < second->doc()); } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/SpanFirstQuery.cpp000066400000000000000000000104231217574114600250660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanFirstQuery.h" #include "_SpanFirstQuery.h" #include "SpanQuery.h" #include "MiscUtils.h" namespace Lucene { SpanFirstQuery::SpanFirstQuery(SpanQueryPtr match, int32_t end) { this->match = match; this->end = end; } SpanFirstQuery::~SpanFirstQuery() { } SpanQueryPtr SpanFirstQuery::getMatch() { return match; } int32_t SpanFirstQuery::getEnd() { return end; } String SpanFirstQuery::getField() { return match->getField(); } String SpanFirstQuery::toString(const String& field) { StringStream buffer; buffer << L"spanFirst(" << match->toString(field) << L", " << end << L")" << boostString(); return buffer.str(); } LuceneObjectPtr SpanFirstQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(boost::dynamic_pointer_cast(match->clone()), end)); SpanFirstQueryPtr spanFirstQuery(boost::dynamic_pointer_cast(clone)); spanFirstQuery->match = match; spanFirstQuery->end = end; spanFirstQuery->setBoost(getBoost()); return spanFirstQuery; } void SpanFirstQuery::extractTerms(SetTerm terms) { match->extractTerms(terms); } SpansPtr SpanFirstQuery::getSpans(IndexReaderPtr reader) { return newLucene(shared_from_this(), match->getSpans(reader)); } QueryPtr SpanFirstQuery::rewrite(IndexReaderPtr reader) { SpanFirstQueryPtr clone; SpanQueryPtr rewritten(boost::dynamic_pointer_cast(match->rewrite(reader))); if (rewritten != match) { clone = boost::dynamic_pointer_cast(this->clone()); clone->match = rewritten; } if (clone) return clone; // some clauses rewrote else return shared_from_this(); // no clauses rewrote } bool SpanFirstQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; SpanFirstQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; return (end == otherQuery->end && match->equals(otherQuery->match) && getBoost() == otherQuery->getBoost()); } int32_t SpanFirstQuery::hashCode() { int32_t result = match->hashCode(); result ^= (result << 8) | MiscUtils::unsignedShift(result, 25); // reversible result ^= MiscUtils::doubleToRawIntBits(getBoost()) ^ end; return result; } FirstSpans::FirstSpans(SpanFirstQueryPtr query, SpansPtr spans) { this->query = query; this->spans = spans; } FirstSpans::~FirstSpans() { } bool FirstSpans::next() { while (spans->next()) // scan to next match { if (end() <= query->end) return true; } return false; } bool FirstSpans::skipTo(int32_t target) { if (!spans->skipTo(target)) return false; return (spans->end() <= query->end || next()); } int32_t FirstSpans::doc() { return spans->doc(); } int32_t FirstSpans::start() { return spans->start(); } int32_t FirstSpans::end() { return spans->end(); } Collection FirstSpans::getPayload() { Collection result; if (spans->isPayloadAvailable()) { Collection payload(spans->getPayload()); result = Collection::newInstance(payload.begin(), payload.end()); } return result; } bool FirstSpans::isPayloadAvailable() { return spans->isPayloadAvailable(); } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/SpanNearQuery.cpp000066400000000000000000000124071217574114600246700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanNearQuery.h" #include "SpanQuery.h" #include "SpanOrQuery.h" #include "NearSpansOrdered.h" #include "NearSpansUnordered.h" #include "MiscUtils.h" namespace Lucene { SpanNearQuery::SpanNearQuery(Collection clauses, int32_t slop, bool inOrder, bool collectPayloads) { this->clauses = Collection::newInstance(); for (int32_t i = 0; i < clauses.size(); ++i) { SpanQueryPtr clause(clauses[i]); if (i == 0) // check field field = clause->getField(); else if (clause->getField() != field) boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); this->clauses.add(clause); } this->collectPayloads = collectPayloads; this->slop = slop; this->inOrder = inOrder; } SpanNearQuery::~SpanNearQuery() { } Collection SpanNearQuery::getClauses() { return clauses; } int32_t SpanNearQuery::getSlop() { return slop; } bool SpanNearQuery::isInOrder() { return inOrder; } String SpanNearQuery::getField() { return field; } void SpanNearQuery::extractTerms(SetTerm terms) { for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) (*clause)->extractTerms(terms); } String SpanNearQuery::toString(const String& field) { StringStream buffer; buffer << L"spanNear(["; for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (clause != clauses.begin()) buffer << L", "; buffer << (*clause)->toString(field); } buffer << L"], " << slop << L", " << inOrder << L")" << boostString(); return buffer.str(); } SpansPtr SpanNearQuery::getSpans(IndexReaderPtr reader) { if (clauses.empty()) // optimize 0-clause case return newLucene(getClauses())->getSpans(reader); if (clauses.size() == 1) // optimize 1-clause case return clauses[0]->getSpans(reader); return inOrder ? boost::static_pointer_cast(newLucene(shared_from_this(), reader, collectPayloads)) : boost::static_pointer_cast(newLucene(shared_from_this(), reader)); } QueryPtr SpanNearQuery::rewrite(IndexReaderPtr reader) { SpanNearQueryPtr clone; for (int32_t i = 0; i < clauses.size(); ++i) { SpanQueryPtr clause(clauses[i]); SpanQueryPtr query(boost::dynamic_pointer_cast(clause->rewrite(reader))); if (query != clause) // clause rewrote: must clone { if (!clone) clone = boost::dynamic_pointer_cast(this->clone()); clone->clauses[i] = query; } } if (clone) return clone; // some clauses rewrote else return shared_from_this(); // no clauses rewrote } LuceneObjectPtr SpanNearQuery::clone(LuceneObjectPtr other) { int32_t sz = clauses.size(); Collection newClauses(Collection::newInstance(sz)); for (int32_t i = 0; i < sz; ++i) newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); SpanNearQueryPtr spanNearQuery(newLucene(newClauses, slop, inOrder)); spanNearQuery->setBoost(getBoost()); return spanNearQuery; } bool SpanNearQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; SpanNearQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; if (inOrder != otherQuery->inOrder) return false; if (slop != otherQuery->slop) return false; if (!clauses.equals(otherQuery->clauses, luceneEquals())) return false; return (getBoost() == otherQuery->getBoost()); } int32_t SpanNearQuery::hashCode() { int32_t result = MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene); // Mix bits before folding in things like boost, since it could cancel the last element of clauses. // This particular mix also serves to differentiate SpanNearQuery hashcodes from others. result ^= (result << 14) | MiscUtils::unsignedShift(result, 19); // reversible result += MiscUtils::doubleToRawIntBits(getBoost()); result += slop; result ^= (inOrder ? 0x99afd3bd : 0); return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/SpanNotQuery.cpp000066400000000000000000000150531217574114600245430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanNotQuery.h" #include "_SpanNotQuery.h" #include "MiscUtils.h" namespace Lucene { SpanNotQuery::SpanNotQuery(SpanQueryPtr include, SpanQueryPtr exclude) { this->include = include; this->exclude = exclude; if (include->getField() != exclude->getField()) boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); } SpanNotQuery::~SpanNotQuery() { } SpanQueryPtr SpanNotQuery::getInclude() { return include; } SpanQueryPtr SpanNotQuery::getExclude() { return exclude; } String SpanNotQuery::getField() { return include->getField(); } void SpanNotQuery::extractTerms(SetTerm terms) { include->extractTerms(terms); } String SpanNotQuery::toString(const String& field) { StringStream buffer; buffer << L"spanNot(" << include->toString(field) << L", " << exclude->toString(field) << L")"; buffer << boostString(); return buffer.str(); } LuceneObjectPtr SpanNotQuery::clone(LuceneObjectPtr other) { SpanNotQueryPtr spanNotQuery(newLucene(boost::dynamic_pointer_cast(include->clone()), boost::dynamic_pointer_cast(exclude->clone()))); spanNotQuery->setBoost(getBoost()); return spanNotQuery; } SpansPtr SpanNotQuery::getSpans(IndexReaderPtr reader) { return newLucene(shared_from_this(), include->getSpans(reader), exclude->getSpans(reader)); } QueryPtr SpanNotQuery::rewrite(IndexReaderPtr reader) { SpanNotQueryPtr clone; SpanQueryPtr rewrittenInclude(boost::dynamic_pointer_cast(include->rewrite(reader))); if (rewrittenInclude != include) { clone = boost::dynamic_pointer_cast(this->clone()); clone->include = rewrittenInclude; } SpanQueryPtr rewrittenExclude(boost::dynamic_pointer_cast(exclude->rewrite(reader))); if (rewrittenExclude != exclude) { if (!clone) clone = boost::dynamic_pointer_cast(this->clone()); clone->exclude = rewrittenExclude; } if (clone) return clone; // some clauses rewrote else return shared_from_this(); // no clauses rewrote } bool SpanNotQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; SpanNotQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; return (include->equals(otherQuery->include) && exclude->equals(otherQuery->exclude) && getBoost() == otherQuery->getBoost()); } int32_t SpanNotQuery::hashCode() { int32_t result = include->hashCode(); result = (result << 1) | MiscUtils::unsignedShift(result, 31); // rotate left result ^= exclude->hashCode(); result = (result << 1) | MiscUtils::unsignedShift(result, 31); // rotate left result ^= MiscUtils::doubleToRawIntBits(getBoost()); return result; } NotSpans::NotSpans(SpanNotQueryPtr query, SpansPtr includeSpans, SpansPtr excludeSpans) { this->query = query; this->includeSpans = includeSpans; this->moreInclude = true; this->excludeSpans = excludeSpans; this->moreExclude = excludeSpans->next(); } NotSpans::~NotSpans() { } bool NotSpans::next() { if (moreInclude) // move to next include moreInclude = includeSpans->next(); while (moreInclude && moreExclude) { if (includeSpans->doc() > excludeSpans->doc()) // skip exclude moreExclude = excludeSpans->skipTo(includeSpans->doc()); // while exclude is before while (moreExclude && includeSpans->doc() == excludeSpans->doc() && excludeSpans->end() <= includeSpans->start()) moreExclude = excludeSpans->next(); // increment exclude // if no intersection if (!moreExclude || includeSpans->doc() != excludeSpans->doc() || includeSpans->end() <= excludeSpans->start()) break; // we found a match moreInclude = includeSpans->next(); // intersected: keep scanning } return moreInclude; } bool NotSpans::skipTo(int32_t target) { if (moreInclude) // skip include moreInclude = includeSpans->skipTo(target); if (!moreInclude) return false; // skip exclude if (moreExclude && includeSpans->doc() > excludeSpans->doc()) moreExclude = excludeSpans->skipTo(includeSpans->doc()); // while exclude is before while (moreExclude && includeSpans->doc() == excludeSpans->doc() && excludeSpans->end() <= includeSpans->start()) moreExclude = excludeSpans->next(); // increment exclude // if no intersection if (!moreExclude || includeSpans->doc() != excludeSpans->doc() || includeSpans->end() <= excludeSpans->start()) return true; // we found a match return next(); // scan to next match } int32_t NotSpans::doc() { return includeSpans->doc(); } int32_t NotSpans::start() { return includeSpans->start(); } int32_t NotSpans::end() { return includeSpans->end(); } Collection NotSpans::getPayload() { Collection result; if (includeSpans->isPayloadAvailable()) { Collection payload(includeSpans->getPayload()); result = Collection::newInstance(payload.begin(), payload.end()); } return result; } bool NotSpans::isPayloadAvailable() { return includeSpans->isPayloadAvailable(); } String NotSpans::toString() { return L"spans(" + query->toString() + L")"; } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/SpanOrQuery.cpp000066400000000000000000000166051217574114600243670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanOrQuery.h" #include "_SpanOrQuery.h" #include "MiscUtils.h" namespace Lucene { SpanOrQuery::SpanOrQuery(Collection clauses) { // copy clauses array into an ArrayList this->clauses = Collection::newInstance(); for (int32_t i = 0; i < clauses.size(); ++i) { SpanQueryPtr clause(clauses[i]); if (i == 0) // check field field = clause->getField(); else if (clause->getField() != field) boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); this->clauses.add(clause); } } SpanOrQuery::~SpanOrQuery() { } Collection SpanOrQuery::getClauses() { return clauses; } String SpanOrQuery::getField() { return field; } void SpanOrQuery::extractTerms(SetTerm terms) { for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) (*clause)->extractTerms(terms); } LuceneObjectPtr SpanOrQuery::clone(LuceneObjectPtr other) { int32_t sz = clauses.size(); Collection newClauses(Collection::newInstance(sz)); for (int32_t i = 0; i < sz; ++i) newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); SpanOrQueryPtr spanOrQuery(newLucene(newClauses)); spanOrQuery->setBoost(getBoost()); return spanOrQuery; } QueryPtr SpanOrQuery::rewrite(IndexReaderPtr reader) { SpanOrQueryPtr clone; for (int32_t i = 0; i < clauses.size(); ++i) { SpanQueryPtr clause(clauses[i]); SpanQueryPtr query(boost::dynamic_pointer_cast(clause->rewrite(reader))); if (query != clause) // clause rewrote: must clone { if (!clone) clone = boost::dynamic_pointer_cast(this->clone()); clone->clauses[i] = query; } } if (clone) return clone; // some clauses rewrote else return shared_from_this(); // no clauses rewrote } String SpanOrQuery::toString(const String& field) { StringStream buffer; buffer << L"SpanOr(["; for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (clause != clauses.begin()) buffer << L", "; buffer << (*clause)->toString(field); } buffer << L"])" << boostString(); return buffer.str(); } bool SpanOrQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; SpanOrQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; if (!clauses.equals(otherQuery->clauses, luceneEquals())) return false; if (!clauses.empty() && field != otherQuery->field) return false; return (getBoost() == otherQuery->getBoost()); } int32_t SpanOrQuery::hashCode() { int32_t result = MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene); result ^= (result << 10) | MiscUtils::unsignedShift(result, 23); result ^= MiscUtils::doubleToRawIntBits(getBoost()); return result; } SpansPtr SpanOrQuery::getSpans(IndexReaderPtr reader) { if (clauses.size() == 1) // optimize 1-clause case return clauses[0]->getSpans(reader); return newLucene(shared_from_this(), reader); } SpanQueue::SpanQueue(int32_t size) : PriorityQueue(size) { } SpanQueue::~SpanQueue() { } bool SpanQueue::lessThan(const SpansPtr& first, const SpansPtr& second) { if (first->doc() == second->doc()) { if (first->start() == second->start()) return (first->end() < second->end()); else return (first->start() < second->start()); } else return (first->doc() < second->doc()); } OrSpans::OrSpans(SpanOrQueryPtr query, IndexReaderPtr reader) { this->query = query; this->reader = reader; } OrSpans::~OrSpans() { } bool OrSpans::initSpanQueue(int32_t target) { queue = newLucene(query->clauses.size()); for (Collection::iterator clause = query->clauses.begin(); clause != query->clauses.end(); ++clause) { SpansPtr spans((*clause)->getSpans(reader)); if ((target == -1 && spans->next()) || (target != -1 && spans->skipTo(target))) queue->add(spans); } return !queue->empty(); } bool OrSpans::next() { if (!queue) return initSpanQueue(-1); if (queue->empty()) // all done return false; if (top()->next()) // move to next { queue->updateTop(); return true; } queue->pop(); // exhausted a clause return !queue->empty(); } SpansPtr OrSpans::top() { return queue->top(); } bool OrSpans::skipTo(int32_t target) { if (!queue) return initSpanQueue(target); bool skipCalled = false; while (!queue->empty() && top()->doc() < target) { if (top()->skipTo(target)) queue->updateTop(); else queue->pop(); skipCalled = true; } if (skipCalled) return !queue->empty(); return next(); } int32_t OrSpans::doc() { return top()->doc(); } int32_t OrSpans::start() { return top()->start(); } int32_t OrSpans::end() { return top()->end(); } Collection OrSpans::getPayload() { Collection result; SpansPtr theTop(top()); if (theTop && theTop->isPayloadAvailable()) { Collection payload(theTop->getPayload()); result = Collection::newInstance(payload.begin(), payload.end()); } return result; } bool OrSpans::isPayloadAvailable() { SpansPtr theTop(top()); return (theTop && theTop->isPayloadAvailable()); } String OrSpans::toString() { StringStream buffer; buffer << L"spans(" << query->toString() << L")@"; if (!queue) buffer << L"START"; else { if (!queue->empty()) buffer << doc() << L":" << start() << L"-" << end(); else buffer << L"END"; } return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/SpanQuery.cpp000066400000000000000000000011501217574114600240530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanQuery.h" #include "SpanWeight.h" namespace Lucene { SpanQuery::~SpanQuery() { } WeightPtr SpanQuery::createWeight(SearcherPtr searcher) { return newLucene(shared_from_this(), searcher); } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/SpanScorer.cpp000066400000000000000000000052071217574114600242120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanScorer.h" #include "Explanation.h" #include "Weight.h" #include "Similarity.h" #include "Spans.h" #include "StringUtils.h" namespace Lucene { SpanScorer::SpanScorer(SpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms) : Scorer(similarity) { this->spans = spans; this->norms = norms; this->weight = weight; this->value = weight->getValue(); this->freq = 0.0; if (this->spans->next()) { doc = -1; more = true; } else { doc = NO_MORE_DOCS; more = false; } } SpanScorer::~SpanScorer() { } int32_t SpanScorer::nextDoc() { if (!setFreqCurrentDoc()) doc = NO_MORE_DOCS; return doc; } int32_t SpanScorer::advance(int32_t target) { if (!more) { doc = NO_MORE_DOCS; return doc; } if (spans->doc() < target) // setFreqCurrentDoc() leaves spans->doc() ahead more = spans->skipTo(target); if (!setFreqCurrentDoc()) doc = NO_MORE_DOCS; return doc; } bool SpanScorer::setFreqCurrentDoc() { if (!more) return false; doc = spans->doc(); freq = 0.0; do { int32_t matchLength = spans->end() - spans->start(); freq += getSimilarity()->sloppyFreq(matchLength); more = spans->next(); } while (more && (doc == spans->doc())); return true; } int32_t SpanScorer::docID() { return doc; } double SpanScorer::score() { double raw = getSimilarity()->tf(freq) * value; // raw score return norms ? raw * Similarity::decodeNorm(norms[doc]) : raw; // normalize } ExplanationPtr SpanScorer::explain(int32_t doc) { ExplanationPtr tfExplanation(newLucene()); int32_t expDoc = advance(doc); double phraseFreq = expDoc == doc ? freq : 0.0; tfExplanation->setValue(getSimilarity()->tf(phraseFreq)); tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); return tfExplanation; } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/SpanTermQuery.cpp000066400000000000000000000047261217574114600247170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanTermQuery.h" #include "Term.h" #include "TermSpans.h" #include "IndexReader.h" #include "MiscUtils.h" namespace Lucene { SpanTermQuery::SpanTermQuery(TermPtr term) { this->term = term; } SpanTermQuery::~SpanTermQuery() { } TermPtr SpanTermQuery::getTerm() { return term; } String SpanTermQuery::getField() { return term->field(); } void SpanTermQuery::extractTerms(SetTerm terms) { terms.add(term); } String SpanTermQuery::toString(const String& field) { StringStream buffer; if (term->field() == field) buffer << term->text(); else buffer << term->toString(); buffer << boostString(); return buffer.str(); } int32_t SpanTermQuery::hashCode() { int32_t prime = 31; int32_t result = SpanQuery::hashCode(); result = prime * result + (term ? term->hashCode() : 0); return result; } bool SpanTermQuery::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; if (!SpanQuery::equals(other)) return false; if (!MiscUtils::equalTypes(shared_from_this(), other)) return false; SpanTermQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) return false; if (!term) { if (otherQuery->term) return false; } else if (!term->equals(otherQuery->term)) return false; return true; } LuceneObjectPtr SpanTermQuery::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(term)); SpanTermQueryPtr spanFirstQuery(boost::dynamic_pointer_cast(clone)); spanFirstQuery->term = term; return spanFirstQuery; } SpansPtr SpanTermQuery::getSpans(IndexReaderPtr reader) { return newLucene(reader->termPositions(term), term); } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/SpanWeight.cpp000066400000000000000000000104341217574114600242020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanWeight.h" #include "SpanScorer.h" #include "SpanQuery.h" #include "IndexReader.h" #include "ComplexExplanation.h" #include "Similarity.h" #include "StringUtils.h" namespace Lucene { SpanWeight::SpanWeight(SpanQueryPtr query, SearcherPtr searcher) { this->similarity = query->getSimilarity(searcher); this->query = query; terms = SetTerm::newInstance(); query->extractTerms(terms); idfExp = similarity->idfExplain(Collection::newInstance(terms.begin(), terms.end()), searcher); idf = idfExp->getIdf(); value = 0.0; queryNorm = 0.0; queryWeight = 0.0; } SpanWeight::~SpanWeight() { } QueryPtr SpanWeight::getQuery() { return query; } double SpanWeight::getValue() { return value; } double SpanWeight::sumOfSquaredWeights() { queryWeight = idf * getQuery()->getBoost(); // compute query weight return queryWeight * queryWeight; // square it } void SpanWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; // normalize query weight value = queryWeight * idf; // idf for document } ScorerPtr SpanWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(query->getSpans(reader), shared_from_this(), similarity, reader->norms(query->getField())); } ExplanationPtr SpanWeight::explain(IndexReaderPtr reader, int32_t doc) { ComplexExplanationPtr result(newLucene()); result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); String field(query->getField()); ExplanationPtr idfExpl(newLucene(idf, L"idf(" + field + L":" + idfExp->explain() + L")")); // explain query weight ExplanationPtr queryExpl(newLucene()); queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); if (query->getBoost() != 1.0) queryExpl->addDetail(boostExpl); queryExpl->addDetail(idfExpl); ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); queryExpl->addDetail(queryNormExpl); queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); result->addDetail(queryExpl); // explain field weight ComplexExplanationPtr fieldExpl(newLucene()); fieldExpl->setDescription(L"fieldWeight(" + field + L":" + query->toString(field) + L" in " + StringUtils::toString(doc) + L"), product of:"); ExplanationPtr tfExpl(boost::dynamic_pointer_cast(scorer(reader, true, false))->explain(doc)); fieldExpl->addDetail(tfExpl); fieldExpl->addDetail(idfExpl); ExplanationPtr fieldNormExpl(newLucene()); ByteArray fieldNorms(reader->norms(field)); double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; fieldNormExpl->setValue(fieldNorm); fieldNormExpl->setDescription(L"fieldNorm(field=" + field + L", doc=" + StringUtils::toString(doc) + L")"); fieldExpl->addDetail(fieldNormExpl); fieldExpl->setMatch(tfExpl->isMatch()); fieldExpl->setValue(tfExpl->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); result->addDetail(fieldExpl); result->setMatch(fieldExpl->getMatch()); // combine them result->setValue(queryExpl->getValue() * fieldExpl->getValue()); if (queryExpl->getValue() == 1.0) return fieldExpl; return result; } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/Spans.cpp000066400000000000000000000006631217574114600232200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Spans.h" namespace Lucene { Spans::~Spans() { } } LucenePlusPlus-rel_3.0.4/src/core/search/spans/TermSpans.cpp000066400000000000000000000047541217574114600240550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermSpans.h" #include "TermPositions.h" #include "Term.h" namespace Lucene { TermSpans::TermSpans(TermPositionsPtr positions, TermPtr term) { this->positions = positions; this->term = term; this->_doc = -1; this->freq = 0; this->count = 0; this->position = 0; } TermSpans::~TermSpans() { } bool TermSpans::next() { if (count == freq) { if (!positions->next()) { _doc = INT_MAX; return false; } _doc = positions->doc(); freq = positions->freq(); count = 0; } position = positions->nextPosition(); ++count; return true; } bool TermSpans::skipTo(int32_t target) { if (!positions->skipTo(target)) { _doc = INT_MAX; return false; } _doc = positions->doc(); freq = positions->freq(); count = 0; position = positions->nextPosition(); ++count; return true; } int32_t TermSpans::doc() { return _doc; } int32_t TermSpans::start() { return position; } int32_t TermSpans::end() { return position + 1; } Collection TermSpans::getPayload() { Collection payload(newCollection(ByteArray::newInstance(positions->getPayloadLength()))); payload[0] = positions->getPayload(payload[0], 0); return payload; } bool TermSpans::isPayloadAvailable() { return positions->isPayloadAvailable(); } String TermSpans::toString() { StringStream buffer; buffer << L"spans(" << term->toString() << L")@"; if (_doc == -1) buffer << L"START"; else if (_doc == INT_MAX) buffer << L"END"; else buffer << _doc << L"-" << position; return buffer.str(); } TermPositionsPtr TermSpans::getPositions() { return positions; } } LucenePlusPlus-rel_3.0.4/src/core/store/000077500000000000000000000000001217574114600201665ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/store/BufferedIndexInput.cpp000066400000000000000000000153101217574114600244240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BufferedIndexInput.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// Default buffer size. const int32_t BufferedIndexInput::BUFFER_SIZE = 1024; BufferedIndexInput::BufferedIndexInput(int32_t bufferSize) { this->bufferSize = bufferSize; bufferStart = 0; bufferLength = 0; bufferPosition = 0; } BufferedIndexInput::~BufferedIndexInput() { } uint8_t BufferedIndexInput::readByte() { if (bufferPosition >= bufferLength) refill(); return buffer[bufferPosition++]; } void BufferedIndexInput::setBufferSize(int32_t newSize) { if (newSize != bufferSize) { bufferSize = newSize; if (buffer) { // Resize the existing buffer and carefully save as many bytes as possible starting from the current bufferPosition ByteArray _newBuffer(ByteArray::newInstance(newSize)); int32_t leftInBuffer = bufferLength - bufferPosition; int32_t numToCopy = leftInBuffer > newSize ? newSize : leftInBuffer; MiscUtils::arrayCopy(buffer.get(), bufferPosition, _newBuffer.get(), 0, numToCopy); bufferStart += bufferPosition; bufferPosition = 0; bufferLength = numToCopy; newBuffer(_newBuffer); } } } void BufferedIndexInput::newBuffer(ByteArray newBuffer) { // Subclasses can do something here buffer = newBuffer; } int32_t BufferedIndexInput::getBufferSize() { return bufferSize; } void BufferedIndexInput::checkBufferSize(int32_t bufferSize) { if (bufferSize <= 0) boost::throw_exception(IllegalArgumentException(L"bufferSize must be greater than 0 (got " + StringUtils::toString(bufferSize) + L")")); } void BufferedIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) { readBytes(b, offset, length, true); } void BufferedIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer) { if (length <= (bufferLength - bufferPosition)) { // the buffer contains enough data to satisfy this request if (length > 0) // to allow b to be null if length is 0 MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, length); bufferPosition += length; } else { // the buffer does not have enough data, first serve all we've got int32_t available = bufferLength - bufferPosition; if (available > 0) { MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, available); offset += available; length -= available; bufferPosition += available; } // and now, read the remaining 'length' bytes if (useBuffer && length < bufferSize) { // If the amount left to read is small enough, and we are allowed to use our buffer, // do it in the usual buffered way: fill the buffer and copy from it refill(); if (bufferLength < length) { // throw an exception when refill() could not read length bytes MiscUtils::arrayCopy(buffer.get(), 0, b, offset, bufferLength); boost::throw_exception(IOException(L"Read past EOF")); } else { MiscUtils::arrayCopy(buffer.get(), 0, b, offset, length); bufferPosition = length; } } else { // The amount left to read is larger than the buffer or we've been asked to not use // our buffer - there's no performance reason not to read it all at once. // Note that unlike the previous code of this function, there is no need to do a seek // here, because there's no need to reread what we had in the buffer. int64_t after = bufferStart + bufferPosition + length; if (after > this->length()) boost::throw_exception(IOException(L"Read past EOF")); readInternal(b, offset, length); bufferStart = after; bufferPosition = 0; bufferLength = 0; // trigger refill() on read } } } void BufferedIndexInput::refill() { int64_t start = bufferStart + bufferPosition; int64_t end = start + bufferSize; if (end > length()) // don't read past EOF end = length(); int32_t newLength = (int32_t)(end - start); if (newLength <= 0) boost::throw_exception(IOException(L"Read past EOF")); if (!buffer) { newBuffer(ByteArray::newInstance(bufferSize)); // allocate buffer lazily seekInternal(bufferStart); } readInternal(buffer.get(), 0, newLength); bufferLength = newLength; bufferStart = start; bufferPosition = 0; } void BufferedIndexInput::close() { bufferStart = 0; bufferLength = 0; bufferPosition = 0; } int64_t BufferedIndexInput::getFilePointer() { return bufferStart + bufferPosition; } void BufferedIndexInput::seek(int64_t pos) { if (pos >= bufferStart && pos < (bufferStart + bufferLength)) bufferPosition = (int32_t)(pos - bufferStart); // seek within buffer else { bufferStart = pos; bufferPosition = 0; bufferLength = 0; // trigger refill() on read() seekInternal(pos); } } LuceneObjectPtr BufferedIndexInput::clone(LuceneObjectPtr other) { BufferedIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(IndexInput::clone(other))); cloneIndexInput->bufferSize = bufferSize; cloneIndexInput->buffer.reset(); cloneIndexInput->bufferLength = 0; cloneIndexInput->bufferPosition = 0; cloneIndexInput->bufferStart = getFilePointer(); return cloneIndexInput; } } LucenePlusPlus-rel_3.0.4/src/core/store/BufferedIndexOutput.cpp000066400000000000000000000061741217574114600246350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BufferedIndexOutput.h" #include "MiscUtils.h" namespace Lucene { const int32_t BufferedIndexOutput::BUFFER_SIZE = 16384; BufferedIndexOutput::BufferedIndexOutput() { bufferStart = 0; bufferPosition = 0; buffer = ByteArray::newInstance(BUFFER_SIZE); } BufferedIndexOutput::~BufferedIndexOutput() { } void BufferedIndexOutput::writeByte(uint8_t b) { if (bufferPosition >= BUFFER_SIZE) flush(); buffer[bufferPosition++] = b; } void BufferedIndexOutput::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { int32_t bytesLeft = BUFFER_SIZE - bufferPosition; if (bytesLeft >= length) { // we add the data to the end of the buffer MiscUtils::arrayCopy(b, offset, buffer.get(), bufferPosition, length); bufferPosition += length; // if the buffer is full, flush it if (BUFFER_SIZE - bufferPosition == 0) flush(); } else if (length > BUFFER_SIZE) { // we flush the buffer if (bufferPosition > 0) flush(); // and write data at once flushBuffer(b, offset, length); bufferStart += length; } else { // we fill/flush the buffer (until the input is written) int32_t pos = 0; // position in the input data int32_t pieceLength; while (pos < length) { pieceLength = (length - pos < bytesLeft) ? length - pos : bytesLeft; MiscUtils::arrayCopy(b, pos + offset, buffer.get(), bufferPosition, pieceLength); pos += pieceLength; bufferPosition += pieceLength; // if the buffer is full, flush it bytesLeft = BUFFER_SIZE - bufferPosition; if (bytesLeft == 0) { flush(); bytesLeft = BUFFER_SIZE; } } } } void BufferedIndexOutput::flush() { flushBuffer(buffer.get(), bufferPosition); bufferStart += bufferPosition; bufferPosition = 0; } void BufferedIndexOutput::flushBuffer(const uint8_t* b, int32_t length) { flushBuffer(b, 0, length); } void BufferedIndexOutput::flushBuffer(const uint8_t* b, int32_t offset, int32_t length) { // override } void BufferedIndexOutput::close() { flush(); } int64_t BufferedIndexOutput::getFilePointer() { return bufferStart + bufferPosition; } void BufferedIndexOutput::seek(int64_t pos) { flush(); bufferStart = pos; } } LucenePlusPlus-rel_3.0.4/src/core/store/ChecksumIndexInput.cpp000066400000000000000000000034611217574114600244500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ChecksumIndexInput.h" namespace Lucene { ChecksumIndexInput::ChecksumIndexInput(IndexInputPtr main) { this->main = main; } ChecksumIndexInput::~ChecksumIndexInput() { } uint8_t ChecksumIndexInput::readByte() { uint8_t b = main->readByte(); checksum.process_byte(b); return b; } void ChecksumIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) { main->readBytes(b, offset, length); checksum.process_bytes(b + offset, length); } int64_t ChecksumIndexInput::getChecksum() { return checksum.checksum(); } void ChecksumIndexInput::close() { main->close(); } int64_t ChecksumIndexInput::getFilePointer() { return main->getFilePointer(); } void ChecksumIndexInput::seek(int64_t pos) { boost::throw_exception(RuntimeException(L"Seek not allowed")); } int64_t ChecksumIndexInput::length() { return main->length(); } LuceneObjectPtr ChecksumIndexInput::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene(main)); ChecksumIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); cloneIndexInput->main = main; cloneIndexInput->checksum = checksum; return cloneIndexInput; } } LucenePlusPlus-rel_3.0.4/src/core/store/ChecksumIndexOutput.cpp000066400000000000000000000041271217574114600246510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ChecksumIndexOutput.h" namespace Lucene { ChecksumIndexOutput::ChecksumIndexOutput(IndexOutputPtr main) { this->main = main; } ChecksumIndexOutput::~ChecksumIndexOutput() { } void ChecksumIndexOutput::writeByte(uint8_t b) { checksum.process_byte(b); main->writeByte(b); } void ChecksumIndexOutput::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { checksum.process_bytes(b + offset, length); main->writeBytes(b, offset, length); } int64_t ChecksumIndexOutput::getChecksum() { return checksum.checksum(); } void ChecksumIndexOutput::flush() { main->flush(); } void ChecksumIndexOutput::close() { main->close(); } int64_t ChecksumIndexOutput::getFilePointer() { return main->getFilePointer(); } void ChecksumIndexOutput::seek(int64_t pos) { boost::throw_exception(RuntimeException(L"Seek not allowed")); } void ChecksumIndexOutput::prepareCommit() { int64_t checksum = getChecksum(); // Intentionally write a mismatched checksum. This is because we want to 1) test, as best we can, that we // are able to write a long to the file, but 2) not actually "commit" the file yet. This (prepare commit) // is phase 1 of a two-phase commit. int64_t pos = main->getFilePointer(); main->writeLong(checksum - 1); main->flush(); main->seek(pos); } void ChecksumIndexOutput::finishCommit() { main->writeLong(getChecksum()); } int64_t ChecksumIndexOutput::length() { return main->length(); } } LucenePlusPlus-rel_3.0.4/src/core/store/Directory.cpp000066400000000000000000000070731217574114600226450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Directory.h" #include "LockFactory.h" #include "BufferedIndexOutput.h" #include "IndexFileNameFilter.h" #include "IndexInput.h" #include "IndexOutput.h" namespace Lucene { Directory::Directory() { isOpen = true; } Directory::~Directory() { } void Directory::close() { // override } void Directory::sync(const String& name) { } IndexInputPtr Directory::openInput(const String& name, int32_t bufferSize) { return openInput(name); } LockPtr Directory::makeLock(const String& name) { return lockFactory->makeLock(name); } void Directory::clearLock(const String& name) { if (lockFactory) lockFactory->clearLock(name); } void Directory::setLockFactory(LockFactoryPtr lockFactory) { BOOST_ASSERT(lockFactory); this->lockFactory = lockFactory; this->lockFactory->setLockPrefix(getLockID()); } LockFactoryPtr Directory::getLockFactory() { return lockFactory; } String Directory::getLockID() { return toString(); } String Directory::toString() { return LuceneObject::toString() + L" lockFactory=" + getLockFactory()->toString(); } void Directory::copy(DirectoryPtr src, DirectoryPtr dest, bool closeDirSrc) { HashSet files(src->listAll()); ByteArray buf(ByteArray::newInstance(BufferedIndexOutput::BUFFER_SIZE)); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { if (!IndexFileNameFilter::accept(L"", *file)) continue; IndexOutputPtr os; IndexInputPtr is; LuceneException finally; try { // create file in dest directory os = dest->createOutput(*file); // read current file is = src->openInput(*file); // and copy to dest directory int64_t len = is->length(); int64_t readCount = 0; while (readCount < len) { int32_t toRead = readCount + BufferedIndexOutput::BUFFER_SIZE > len ? (int32_t)(len - readCount) : BufferedIndexOutput::BUFFER_SIZE; is->readBytes(buf.get(), 0, toRead); os->writeBytes(buf.get(), toRead); readCount += toRead; } } catch (LuceneException& e) { finally = e; } // graceful cleanup try { if (os) os->close(); } catch (...) { } try { if (is) is->close(); } catch (...) { } finally.throwException(); } if (closeDirSrc) src->close(); } void Directory::ensureOpen() { if (!isOpen) boost::throw_exception(AlreadyClosedException(L"This directory is closed")); } } LucenePlusPlus-rel_3.0.4/src/core/store/FSDirectory.cpp000066400000000000000000000163331217574114600230750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "FSDirectory.h" #include "NativeFSLockFactory.h" #include "SimpleFSDirectory.h" #include "BufferedIndexInput.h" #include "LuceneThread.h" #include "FileUtils.h" #include "StringUtils.h" extern "C" { #include "../util/md5/md5.h" } namespace Lucene { /// Default read chunk size. This is a conditional default based on operating system. #ifdef LPP_BUILD_64 const int32_t FSDirectory::DEFAULT_READ_CHUNK_SIZE = INT_MAX; #else const int32_t FSDirectory::DEFAULT_READ_CHUNK_SIZE = 100 * 1024 * 1024; // 100mb #endif FSDirectory::FSDirectory(const String& path, LockFactoryPtr lockFactory) { checked = false; chunkSize = DEFAULT_READ_CHUNK_SIZE; // new ctors use always NativeFSLockFactory as default if (!lockFactory) lockFactory = newLucene(); directory = path; if (FileUtils::fileExists(directory) && !FileUtils::isDirectory(directory)) boost::throw_exception(NoSuchDirectoryException(L"File '" + directory + L"' exists but is not a directory")); setLockFactory(lockFactory); // for filesystem based LockFactory, delete the lockPrefix if the locks are placed // in index dir. if no index dir is given, set ourselves FSLockFactoryPtr lf(boost::dynamic_pointer_cast(lockFactory)); if (lf) { if (lf->getLockDir().empty()) { lf->setLockDir(directory); lf->setLockPrefix(L""); } else if (lf->getLockDir() == directory) lf->setLockPrefix(L""); } } FSDirectory::~FSDirectory() { } FSDirectoryPtr FSDirectory::open(const String& path) { return open(path, LockFactoryPtr()); } FSDirectoryPtr FSDirectory::open(const String& path, LockFactoryPtr lockFactory) { return newLucene(path, lockFactory); } void FSDirectory::createDir() { if (!checked) { if (!FileUtils::fileExists(directory) && !FileUtils::createDirectory(directory)) boost::throw_exception(IOException(L"Cannot create directory: " + directory)); checked = true; } } void FSDirectory::initOutput(const String& name) { ensureOpen(); createDir(); String path(FileUtils::joinPath(directory, name)); if (FileUtils::fileExists(path) && !FileUtils::removeFile(path)) // delete existing, if any boost::throw_exception(IOException(L"Cannot overwrite: " + name)); } HashSet FSDirectory::listAll(const String& dir) { if (!FileUtils::fileExists(dir)) boost::throw_exception(NoSuchDirectoryException(L"Directory '" + dir + L"' does not exist")); else if (!FileUtils::isDirectory(dir)) boost::throw_exception(NoSuchDirectoryException(L"File '" + dir + L"' exists but is not a directory")); HashSet result(HashSet::newInstance()); // Exclude subdirs if (!FileUtils::listDirectory(dir, true, result)) boost::throw_exception(IOException(L"Directory '" + dir + L"' exists and is a directory, but cannot be listed")); return result; } HashSet FSDirectory::listAll() { ensureOpen(); return listAll(directory); } bool FSDirectory::fileExists(const String& name) { ensureOpen(); return FileUtils::fileExists(FileUtils::joinPath(directory, name)); } uint64_t FSDirectory::fileModified(const String& name) { ensureOpen(); return FileUtils::fileModified(FileUtils::joinPath(directory, name)); } uint64_t FSDirectory::fileModified(const String& directory, const String& name) { return FileUtils::fileModified(FileUtils::joinPath(directory, name)); } void FSDirectory::touchFile(const String& name) { ensureOpen(); FileUtils::touchFile(FileUtils::joinPath(directory, name)); } void FSDirectory::deleteFile(const String& name) { ensureOpen(); if (!FileUtils::removeFile(FileUtils::joinPath(directory, name))) boost::throw_exception(IOException(L"Cannot delete: " + name)); } int64_t FSDirectory::fileLength(const String& name) { ensureOpen(); return FileUtils::fileLength(FileUtils::joinPath(directory, name)); } void FSDirectory::sync(const String& name) { ensureOpen(); String path(FileUtils::joinPath(directory, name)); bool success = false; for (int32_t retryCount = 0; retryCount < 5; ++retryCount) { std::ofstream syncFile; try { syncFile.open(StringUtils::toUTF8(path).c_str(), std::ios::binary | std::ios::in | std::ios::out); } catch (...) { } if (syncFile.is_open()) { syncFile.close(); success = true; break; } LuceneThread::threadSleep(5); // pause 5 msec } if (!success) boost::throw_exception(IOException(L"Sync failure: " + path)); } IndexInputPtr FSDirectory::openInput(const String& name) { ensureOpen(); return openInput(name, BufferedIndexInput::BUFFER_SIZE); } IndexInputPtr FSDirectory::openInput(const String& name, int32_t bufferSize) { return Directory::openInput(name, bufferSize); } String FSDirectory::getLockID() { ensureOpen(); md5_state_t state; md5_byte_t digest[16]; md5_init(&state); md5_append(&state, (const md5_byte_t *)StringUtils::toUTF8(directory).c_str(), directory.size()); md5_finish(&state, digest); static const wchar_t* hexDigits = L"0123456789abcdef"; String lockID(L"lucene-"); for (int32_t i = 0; i < 16; ++i) { lockID += hexDigits[(digest[i] >> 4) & 0x0f]; lockID += hexDigits[digest[i] & 0x0f]; } return lockID; } void FSDirectory::close() { SyncLock syncLock(this); isOpen = false; } String FSDirectory::toString() { return getClassName() + L"@" + directory + L" lockFactory=" + getLockFactory()->toString(); } String FSDirectory::getFile() { ensureOpen(); return directory; } void FSDirectory::setReadChunkSize(int32_t chunkSize) { #ifndef LPP_BUILD_64 this->chunkSize = chunkSize; #endif } int32_t FSDirectory::getReadChunkSize() { return chunkSize; } } LucenePlusPlus-rel_3.0.4/src/core/store/FSLockFactory.cpp000066400000000000000000000015251217574114600233460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FSLockFactory.h" namespace Lucene { FSLockFactory::FSLockFactory() { } FSLockFactory::~FSLockFactory() { } void FSLockFactory::setLockDir(const String& lockDir) { if (!this->lockDir.empty()) boost::throw_exception(IllegalStateException(L"You can set the lock directory for this factory only once.")); this->lockDir = lockDir; } String FSLockFactory::getLockDir() { return lockDir; } } LucenePlusPlus-rel_3.0.4/src/core/store/FileSwitchDirectory.cpp000066400000000000000000000063271217574114600246300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FileSwitchDirectory.h" namespace Lucene { FileSwitchDirectory::FileSwitchDirectory(HashSet primaryExtensions, DirectoryPtr primaryDir, DirectoryPtr secondaryDir, bool doClose) { this->primaryExtensions = primaryExtensions; this->primaryDir = primaryDir; this->secondaryDir = secondaryDir; this->doClose = doClose; this->lockFactory = primaryDir->getLockFactory(); } FileSwitchDirectory::~FileSwitchDirectory() { } DirectoryPtr FileSwitchDirectory::getPrimaryDir() { return primaryDir; } DirectoryPtr FileSwitchDirectory::getSecondaryDir() { return secondaryDir; } void FileSwitchDirectory::close() { if (doClose) { LuceneException finally; try { secondaryDir->close(); } catch (LuceneException& e) { finally = e; } doClose = false; primaryDir->close(); finally.throwException(); } } HashSet FileSwitchDirectory::listAll() { HashSet primaryFiles(primaryDir->listAll()); HashSet secondaryFiles(secondaryDir->listAll()); HashSet files(HashSet::newInstance(primaryFiles.begin(), primaryFiles.end())); files.addAll(secondaryFiles.begin(), secondaryFiles.end()); return files; } String FileSwitchDirectory::getExtension(const String& name) { String::size_type i = name.find_last_of(L'.'); return i == String::npos ? L"" : name.substr(i + 1); } DirectoryPtr FileSwitchDirectory::getDirectory(const String& name) { return primaryExtensions.contains(getExtension(name)) ? primaryDir : secondaryDir; } bool FileSwitchDirectory::fileExists(const String& name) { return getDirectory(name)->fileExists(name); } uint64_t FileSwitchDirectory::fileModified(const String& name) { return getDirectory(name)->fileModified(name); } void FileSwitchDirectory::touchFile(const String& name) { getDirectory(name)->touchFile(name); } void FileSwitchDirectory::deleteFile(const String& name) { getDirectory(name)->deleteFile(name); } int64_t FileSwitchDirectory::fileLength(const String& name) { return getDirectory(name)->fileLength(name); } IndexOutputPtr FileSwitchDirectory::createOutput(const String& name) { return getDirectory(name)->createOutput(name); } void FileSwitchDirectory::sync(const String& name) { getDirectory(name)->sync(name); } IndexInputPtr FileSwitchDirectory::openInput(const String& name) { return getDirectory(name)->openInput(name); } } LucenePlusPlus-rel_3.0.4/src/core/store/IndexInput.cpp000066400000000000000000000106271217574114600227670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexInput.h" #include "UTF8Stream.h" #include "Reader.h" #include "StringUtils.h" namespace Lucene { IndexInput::IndexInput() { preUTF8Strings = false; } IndexInput::~IndexInput() { } void IndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer) { // default to ignoring useBuffer entirely readBytes(b, offset, length); } int32_t IndexInput::readInt() { int32_t i = (readByte() & 0xff) << 24; i |= (readByte() & 0xff) << 16; i |= (readByte() & 0xff) << 8; i |= (readByte() & 0xff); return i; } int32_t IndexInput::readVInt() { uint8_t b = readByte(); int32_t i = (b & 0x7f); for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { b = readByte(); i |= (b & 0x7f) << shift; } return i; } int64_t IndexInput::readLong() { int64_t i = (int64_t)readInt() << 32; i |= (readInt() & 0xffffffffLL); return i; } int64_t IndexInput::readVLong() { uint8_t b = readByte(); int64_t i = (b & 0x7f); for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { b = readByte(); i |= (int64_t)(b & 0x7f) << shift; } return i; } void IndexInput::setModifiedUTF8StringsMode() { preUTF8Strings = true; } String IndexInput::readString() { if (preUTF8Strings) return readModifiedUTF8String(); int32_t length = readVInt(); ByteArray bytes(ByteArray::newInstance(length)); readBytes(bytes.get(), 0, length); return StringUtils::toUnicode(bytes.get(), length); } String IndexInput::readModifiedUTF8String() { int32_t length = readVInt(); CharArray chars(CharArray::newInstance(length)); return String(chars.get(), readChars(chars.get(), 0, length)); } int32_t IndexInput::readChars(wchar_t* buffer, int32_t start, int32_t length) { Array chars(Array::newInstance(length)); for (int32_t i = 0; i < length; ++i) { uint8_t b = readByte(); if ((b & 0x80) == 0) chars[i] = (uint16_t)(b & 0x7f); else if ((b & 0xe0) != 0xe0) chars[i] = (uint16_t)(((b & 0x1f) << 6) | (readByte() & 0x3f)); else { uint32_t ch = ((b & 0x0f) << 12); ch |= (readByte() & 0x3f) << 6; ch |= (readByte() & 0x3f); chars[i] = (uint16_t)ch; } } UTF16DecoderPtr utf16Decoder(newLucene(chars.get(), chars.get() + length)); int32_t decodeLength = utf16Decoder->decode(buffer + start, length); return decodeLength == Reader::READER_EOF ? 0 : decodeLength; } void IndexInput::skipChars(int32_t length) { for (int32_t i = 0; i < length; ++i) { uint8_t b = readByte(); if ((b & 0x80) == 0) { // do nothing, we only need one byte } else if ((b & 0xe0) != 0xe0) readByte(); // read an additional byte else { // read two additional bytes readByte(); readByte(); } } } MapStringString IndexInput::readStringStringMap() { MapStringString map(MapStringString::newInstance()); int32_t count = readInt(); for (int32_t i = 0; i < count; ++i) { String key(readString()); String val(readString()); map.put(key, val); } return map; } LuceneObjectPtr IndexInput::clone(LuceneObjectPtr other) { IndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(LuceneObject::clone(other))); cloneIndexInput->preUTF8Strings = preUTF8Strings; return cloneIndexInput; } } LucenePlusPlus-rel_3.0.4/src/core/store/IndexOutput.cpp000066400000000000000000000071031217574114600231630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexOutput.h" #include "IndexInput.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t IndexOutput::COPY_BUFFER_SIZE = 16384; IndexOutput::~IndexOutput() { } void IndexOutput::writeBytes(const uint8_t* b, int32_t length) { writeBytes(b, 0, length); } void IndexOutput::writeInt(int32_t i) { writeByte((uint8_t)(i >> 24)); writeByte((uint8_t)(i >> 16)); writeByte((uint8_t)(i >> 8)); writeByte((uint8_t)i); } void IndexOutput::writeVInt(int32_t i) { while ((i & ~0x7f) != 0) { writeByte((uint8_t)((i & 0x7f) | 0x80)); i = MiscUtils::unsignedShift(i, 7); } writeByte((uint8_t)i); } void IndexOutput::writeLong(int64_t i) { writeInt((int32_t)(i >> 32)); writeInt((int32_t)i); } void IndexOutput::writeVLong(int64_t i) { while ((i & ~0x7f) != 0) { writeByte((uint8_t)((i & 0x7f) | 0x80)); i = MiscUtils::unsignedShift(i, (int64_t)7); } writeByte((uint8_t)i); } void IndexOutput::writeString(const String& s) { UTF8ResultPtr utf8Result(newLucene()); StringUtils::toUTF8(s.c_str(), s.length(), utf8Result); writeVInt(utf8Result->length); writeBytes(utf8Result->result.get(), utf8Result->length); } void IndexOutput::writeChars(const String& s, int32_t start, int32_t length) { int32_t end = start + length; for (int32_t i = start; i < end; ++i) { int32_t code = (int32_t)s[i]; if (code >= 0x01 && code <= 0x7f) writeByte((uint8_t)code); else if (((code >= 0x80) && (code <= 0x7ff)) || code == 0) { writeByte((uint8_t)(0xc0 | (code >> 6))); writeByte((uint8_t)(0x80 | (code & 0x3f))); } else { writeByte((uint8_t)(0xe0 | MiscUtils::unsignedShift(code, 12))); writeByte((uint8_t)(0x80 | ((code >> 6) & 0x3f))); writeByte((uint8_t)(0x80 | (code & 0x3f))); } } } void IndexOutput::copyBytes(IndexInputPtr input, int64_t numBytes) { BOOST_ASSERT(numBytes >= 0); int64_t left = numBytes; if (!copyBuffer) copyBuffer = ByteArray::newInstance(COPY_BUFFER_SIZE); while (left > 0) { int32_t toCopy = left > COPY_BUFFER_SIZE ? COPY_BUFFER_SIZE : (int32_t)left; input->readBytes(copyBuffer.get(), 0, toCopy); writeBytes(copyBuffer.get(), 0, toCopy); left -= toCopy; } } void IndexOutput::setLength(int64_t length) { } void IndexOutput::writeStringStringMap(MapStringString map) { if (!map) writeInt(0); else { writeInt(map.size()); for (MapStringString::iterator entry = map.begin(); entry != map.end(); ++entry) { writeString(entry->first); writeString(entry->second); } } } } LucenePlusPlus-rel_3.0.4/src/core/store/Lock.cpp000066400000000000000000000024361217574114600215670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Lock.h" #include "LuceneThread.h" namespace Lucene { /// How long {@link #obtain(int64_t)} waits, in milliseconds, in between attempts to acquire the lock. const int32_t Lock::LOCK_OBTAIN_WAIT_FOREVER = -1; /// Pass this value to {@link #obtain(int64_t)} to try forever to obtain the lock. const int32_t Lock::LOCK_POLL_INTERVAL = 1000; Lock::~Lock() { } bool Lock::obtain(int32_t lockWaitTimeout) { bool locked = obtain(); int32_t maxSleepCount = lockWaitTimeout / LOCK_POLL_INTERVAL; int32_t sleepCount = 0; while (!locked) { if (lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER && sleepCount++ >= maxSleepCount) boost::throw_exception(LockObtainFailedException(L"Lock obtain timed out")); LuceneThread::threadSleep(LOCK_POLL_INTERVAL); locked = obtain(); } return locked; } } LucenePlusPlus-rel_3.0.4/src/core/store/LockFactory.cpp000066400000000000000000000012171217574114600231130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LockFactory.h" namespace Lucene { LockFactory::~LockFactory() { } void LockFactory::setLockPrefix(const String& lockPrefix) { this->lockPrefix = lockPrefix; } String LockFactory::getLockPrefix() { return lockPrefix; } } LucenePlusPlus-rel_3.0.4/src/core/store/MMapDirectory.cpp000066400000000000000000000064621217574114600234210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MMapDirectory.h" #include "_MMapDirectory.h" #include "SimpleFSDirectory.h" #include "_SimpleFSDirectory.h" #include "MiscUtils.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { MMapDirectory::MMapDirectory(const String& path, LockFactoryPtr lockFactory) : FSDirectory(path, lockFactory) { } MMapDirectory::~MMapDirectory() { } IndexInputPtr MMapDirectory::openInput(const String& name, int32_t bufferSize) { ensureOpen(); return newLucene(FileUtils::joinPath(directory, name)); } IndexOutputPtr MMapDirectory::createOutput(const String& name) { initOutput(name); return newLucene(FileUtils::joinPath(directory, name)); } MMapIndexInput::MMapIndexInput(const String& path) { _length = path.empty() ? 0 : (int32_t)FileUtils::fileLength(path); bufferPosition = 0; if (!path.empty()) { try { file.open(StringUtils::toUTF8(path).c_str(), _length); } catch (...) { boost::throw_exception(FileNotFoundException(path)); } } isClone = false; } MMapIndexInput::~MMapIndexInput() { } uint8_t MMapIndexInput::readByte() { try { return file.data()[bufferPosition++]; } catch (...) { boost::throw_exception(IOException(L"Read past EOF")); return 0; } } void MMapIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) { try { MiscUtils::arrayCopy(file.data(), bufferPosition, b, offset, length); bufferPosition += length; } catch (...) { boost::throw_exception(IOException(L"Read past EOF")); } } int64_t MMapIndexInput::getFilePointer() { return bufferPosition; } void MMapIndexInput::seek(int64_t pos) { bufferPosition = (int32_t)pos; } int64_t MMapIndexInput::length() { return (int64_t)_length; } void MMapIndexInput::close() { if (isClone || !file.is_open()) return; _length = 0; bufferPosition = 0; file.close(); } LuceneObjectPtr MMapIndexInput::clone(LuceneObjectPtr other) { if (!file.is_open()) boost::throw_exception(AlreadyClosedException(L"MMapIndexInput already closed")); LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene()); MMapIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); cloneIndexInput->_length = _length; cloneIndexInput->file = file; cloneIndexInput->bufferPosition = bufferPosition; cloneIndexInput->isClone = true; return cloneIndexInput; } } LucenePlusPlus-rel_3.0.4/src/core/store/NativeFSLockFactory.cpp000066400000000000000000000156301217574114600245170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "NativeFSLockFactory.h" #include "_NativeFSLockFactory.h" #include "Random.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { NativeFSLockFactory::NativeFSLockFactory(const String& lockDirName) { setLockDir(lockDirName); } NativeFSLockFactory::~NativeFSLockFactory() { } LockPtr NativeFSLockFactory::makeLock(const String& lockName) { SyncLock syncLock(this); return newLucene(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName); } void NativeFSLockFactory::clearLock(const String& lockName) { // note that this isn't strictly required anymore because the existence of these files does not mean // they are locked, but still do this in case people really want to see the files go away if (FileUtils::isDirectory(lockDir)) { String lockPath(FileUtils::joinPath(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName)); if (FileUtils::fileExists(lockPath) && !FileUtils::removeFile(lockPath)) boost::throw_exception(IOException(L"Failed to delete: " + lockPath)); } } NativeFSLock::NativeFSLock(const String& lockDir, const String& lockFileName) { this->lockDir = lockDir; path = FileUtils::joinPath(lockDir, lockFileName); } NativeFSLock::~NativeFSLock() { try { release(); } catch (...) { } } SynchronizePtr NativeFSLock::LOCK_HELD_LOCK() { static SynchronizePtr _LOCK_HELD_LOCK; if (!_LOCK_HELD_LOCK) _LOCK_HELD_LOCK = newInstance(); return _LOCK_HELD_LOCK; } HashSet NativeFSLock::LOCK_HELD() { static HashSet _LOCK_HELD; if (!_LOCK_HELD) _LOCK_HELD = HashSet::newInstance(); return _LOCK_HELD; } bool NativeFSLock::lockExists() { SyncLock syncLock(this); return lock; } bool NativeFSLock::obtain() { SyncLock syncLock(this); if (lockExists()) // our instance is already locked return false; // ensure that lockdir exists and is a directory if (!FileUtils::fileExists(lockDir)) { if (!FileUtils::createDirectory(lockDir)) boost::throw_exception(IOException(L"Cannot create directory: " + lockDir)); } else if (!FileUtils::isDirectory(lockDir)) boost::throw_exception(IOException(L"Found regular file where directory expected: " + lockDir)); bool markedHeld = false; // make sure nobody else in-process has this lock held already and mark it held if not { SyncLock heldLock(LOCK_HELD_LOCK()); if (LOCK_HELD().contains(path)) // someone else already has the lock return false; else { // this "reserves" the fact that we are the one thread trying to obtain this lock, so we own the // only instance of a channel against this file LOCK_HELD().add(path); markedHeld = true; } } try { // we can get intermittent "access denied" here, so we treat this as failure to acquire the lock std::ofstream f(StringUtils::toUTF8(path).c_str(), std::ios::binary | std::ios::out); if (f.is_open()) { lock = newInstance(StringUtils::toUTF8(path).c_str()); lock->lock(); } } catch (...) { lock.reset(); } if (markedHeld && !lockExists()) { SyncLock heldLock(LOCK_HELD_LOCK()); LOCK_HELD().remove(path); } return lockExists(); } void NativeFSLock::release() { SyncLock syncLock(this); if (lockExists()) { try { lock->unlock(); lock.reset(); } catch (...) { } { SyncLock heldLock(LOCK_HELD_LOCK()); LOCK_HELD().remove(path); } // we don't care anymore if the file cannot be deleted because it's held up by another process // (eg. AntiVirus). NativeFSLock does not depend on the existence/absence of the lock file FileUtils::removeFile(path); } else { // if we don't hold the lock, and somebody still called release(), for example as a result of // calling IndexWriter.unlock(), we should attempt to obtain the lock and release it. If the // obtain fails, it means the lock cannot be released, and we should throw a proper exception // rather than silently failing/not doing anything. bool obtained = false; LuceneException finally; try { obtained = obtain(); if (!obtained) boost::throw_exception(LockReleaseFailedException(L"Cannot forcefully unlock a NativeFSLock which is held by another indexer component: " + path)); } catch (LuceneException& e) { finally = e; } if (obtained) release(); finally.throwException(); } } bool NativeFSLock::isLocked() { SyncLock syncLock(this); // the test for is islocked is not directly possible with native file locks // first a shortcut, if a lock reference in this instance is available if (lockExists()) return true; // look if lock file is present; if not, there can definitely be no lock! if (!FileUtils::fileExists(path)) return false; // try to obtain and release (if was locked) the lock try { bool obtained = obtain(); if (obtained) release(); return !obtained; } catch (LuceneException&) { return false; } } String NativeFSLock::toString() { return getClassName() + L"@" + path; } } LucenePlusPlus-rel_3.0.4/src/core/store/NoLockFactory.cpp000066400000000000000000000030741217574114600234130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NoLockFactory.h" #include "_NoLockFactory.h" namespace Lucene { NoLockFactory::~NoLockFactory() { } NoLockFactoryPtr NoLockFactory::getNoLockFactory() { static NoLockFactoryPtr singleton; if (!singleton) { singleton = newLucene(); CycleCheck::addStatic(singleton); } return singleton; } NoLockPtr NoLockFactory::getSingletonLock() { // Single instance returned whenever makeLock is called. static NoLockPtr singletonLock; if (!singletonLock) { singletonLock = newLucene(); CycleCheck::addStatic(singletonLock); } return singletonLock; } LockPtr NoLockFactory::makeLock(const String& lockName) { return getSingletonLock(); } void NoLockFactory::clearLock(const String& lockName) { } NoLock::~NoLock() { } bool NoLock::obtain() { return true; } void NoLock::release() { } bool NoLock::isLocked() { return false; } String NoLock::toString() { return getClassName(); } } LucenePlusPlus-rel_3.0.4/src/core/store/RAMDirectory.cpp000066400000000000000000000120731217574114600232010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RAMDirectory.h" #include "RAMFile.h" #include "RAMInputStream.h" #include "RAMOutputStream.h" #include "SingleInstanceLockFactory.h" #include "LuceneThread.h" #include "MiscUtils.h" namespace Lucene { RAMDirectory::RAMDirectory() { this->fileMap = MapStringRAMFile::newInstance(); this->_sizeInBytes = 0; this->copyDirectory = false; this->closeDir = false; setLockFactory(newLucene()); } RAMDirectory::RAMDirectory(DirectoryPtr dir) { this->fileMap = MapStringRAMFile::newInstance(); this->_sizeInBytes = 0; this->copyDirectory = true; this->_dirSource = dir; this->closeDir = false; setLockFactory(newLucene()); } RAMDirectory::RAMDirectory(DirectoryPtr dir, bool closeDir) { this->fileMap = MapStringRAMFile::newInstance(); this->_sizeInBytes = 0; this->copyDirectory = true; this->_dirSource = dir; this->closeDir = closeDir; setLockFactory(newLucene()); } RAMDirectory::~RAMDirectory() { } void RAMDirectory::initialize() { if (copyDirectory) Directory::copy(DirectoryPtr(_dirSource), shared_from_this(), closeDir); } HashSet RAMDirectory::listAll() { SyncLock syncLock(this); ensureOpen(); HashSet result(HashSet::newInstance()); for (MapStringRAMFile::iterator fileName = fileMap.begin(); fileName != fileMap.end(); ++fileName) result.add(fileName->first); return result; } bool RAMDirectory::fileExists(const String& name) { ensureOpen(); SyncLock syncLock(this); return fileMap.contains(name); } uint64_t RAMDirectory::fileModified(const String& name) { ensureOpen(); SyncLock syncLock(this); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) boost::throw_exception(FileNotFoundException(name)); return ramFile->second->getLastModified(); } void RAMDirectory::touchFile(const String& name) { ensureOpen(); RAMFilePtr file; { SyncLock syncLock(this); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) boost::throw_exception(FileNotFoundException(name)); file = ramFile->second; } int64_t ts1 = MiscUtils::currentTimeMillis(); while (ts1 == MiscUtils::currentTimeMillis()) LuceneThread::threadSleep(1); file->setLastModified(MiscUtils::currentTimeMillis()); } int64_t RAMDirectory::fileLength(const String& name) { ensureOpen(); SyncLock syncLock(this); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) boost::throw_exception(FileNotFoundException(name)); return ramFile->second->getLength(); } int64_t RAMDirectory::sizeInBytes() { SyncLock syncLock(this); ensureOpen(); return _sizeInBytes; } void RAMDirectory::deleteFile(const String& name) { SyncLock syncLock(this); ensureOpen(); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) boost::throw_exception(FileNotFoundException(name)); _sizeInBytes -= ramFile->second->getSizeInBytes(); fileMap.remove(name); } IndexOutputPtr RAMDirectory::createOutput(const String& name) { ensureOpen(); RAMFilePtr file(newLucene(shared_from_this())); { SyncLock syncLock(this); MapStringRAMFile::iterator existing = fileMap.find(name); if (existing != fileMap.end()) { _sizeInBytes -= existing->second->getSizeInBytes(); existing->second->_directory.reset(); } fileMap.put(name, file); } return newLucene(file); } IndexInputPtr RAMDirectory::openInput(const String& name) { ensureOpen(); RAMFilePtr file; { SyncLock syncLock(this); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) boost::throw_exception(FileNotFoundException(name)); file = ramFile->second; } return newLucene(file); } void RAMDirectory::close() { isOpen = false; fileMap.reset(); } } LucenePlusPlus-rel_3.0.4/src/core/store/RAMFile.cpp000066400000000000000000000045451217574114600221210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RAMFile.h" #include "RAMDirectory.h" #include "MiscUtils.h" namespace Lucene { RAMFile::RAMFile() { this->buffers = Collection::newInstance(); this->length = 0; this->sizeInBytes = 0; this->lastModified = MiscUtils::currentTimeMillis(); } RAMFile::RAMFile(RAMDirectoryPtr directory) { this->buffers = Collection::newInstance(); this->length = 0; this->sizeInBytes = 0; this->_directory = directory; this->lastModified = MiscUtils::currentTimeMillis(); } RAMFile::~RAMFile() { } int64_t RAMFile::getLength() { SyncLock syncLock(this); return length; } void RAMFile::setLength(int64_t length) { SyncLock syncLock(this); this->length = length; } int64_t RAMFile::getLastModified() { SyncLock syncLock(this); return lastModified; } void RAMFile::setLastModified(int64_t lastModified) { SyncLock syncLock(this); this->lastModified = lastModified; } ByteArray RAMFile::addBuffer(int32_t size) { ByteArray buffer(newBuffer(size)); { SyncLock syncLock(this); buffers.add(buffer); sizeInBytes += size; } RAMDirectoryPtr directory(_directory.lock()); if (directory) { SyncLock dirLock(directory); directory->_sizeInBytes += size; } return buffer; } ByteArray RAMFile::getBuffer(int32_t index) { SyncLock syncLock(this); return buffers[index]; } int32_t RAMFile::numBuffers() { SyncLock syncLock(this); return buffers.size(); } ByteArray RAMFile::newBuffer(int32_t size) { return ByteArray::newInstance(size); } int64_t RAMFile::getSizeInBytes() { SyncLock syncLock(this); return sizeInBytes; } } LucenePlusPlus-rel_3.0.4/src/core/store/RAMInputStream.cpp000066400000000000000000000105371217574114600235130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RAMInputStream.h" #include "RAMFile.h" #include "RAMOutputStream.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t RAMInputStream::BUFFER_SIZE = RAMOutputStream::BUFFER_SIZE; RAMInputStream::RAMInputStream() { _length = 0; // make sure that we switch to the first needed buffer lazily currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; } RAMInputStream::RAMInputStream(RAMFilePtr f) { file = f; _length = file->length; if (_length / BUFFER_SIZE >= INT_MAX) boost::throw_exception(IOException(L"Too large RAMFile: " + StringUtils::toString(_length))); // make sure that we switch to the first needed buffer lazily currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; } RAMInputStream::~RAMInputStream() { } void RAMInputStream::close() { // nothing to do here } int64_t RAMInputStream::length() { return _length; } uint8_t RAMInputStream::readByte() { if (bufferPosition >= bufferLength) { ++currentBufferIndex; switchCurrentBuffer(true); } return currentBuffer[bufferPosition++]; } void RAMInputStream::readBytes(uint8_t* b, int32_t offset, int32_t length) { while (length > 0) { if (bufferPosition >= bufferLength) { ++currentBufferIndex; switchCurrentBuffer(true); } int32_t remainInBuffer = bufferLength - bufferPosition; int32_t bytesToCopy = length < remainInBuffer ? length : remainInBuffer; MiscUtils::arrayCopy(currentBuffer.get(), bufferPosition, b, offset, bytesToCopy); offset += bytesToCopy; length -= bytesToCopy; bufferPosition += bytesToCopy; } } void RAMInputStream::switchCurrentBuffer(bool enforceEOF) { if (currentBufferIndex >= file->numBuffers()) { // end of file reached, no more buffers left if (enforceEOF) boost::throw_exception(IOException(L"Read past EOF")); else { // force eof if a read takes place at this position --currentBufferIndex; bufferPosition = BUFFER_SIZE; } } else { currentBuffer = file->getBuffer(currentBufferIndex); bufferPosition = 0; bufferStart = (int64_t)BUFFER_SIZE * (int64_t)currentBufferIndex; int64_t buflen = _length - bufferStart; bufferLength = buflen > BUFFER_SIZE ? BUFFER_SIZE : (int32_t)buflen; } } int64_t RAMInputStream::getFilePointer() { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; } void RAMInputStream::seek(int64_t pos) { if (!currentBuffer || (int32_t)pos < bufferStart || (int32_t)pos >= bufferStart + BUFFER_SIZE) { currentBufferIndex = (int32_t)(pos / BUFFER_SIZE); switchCurrentBuffer(false); } bufferPosition = (int32_t)(pos % BUFFER_SIZE); } LuceneObjectPtr RAMInputStream::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene()); RAMInputStreamPtr cloneInputStream(boost::dynamic_pointer_cast(clone)); cloneInputStream->file = file; cloneInputStream->_length = _length; cloneInputStream->currentBuffer = currentBuffer; cloneInputStream->currentBufferIndex = currentBufferIndex; cloneInputStream->bufferPosition = bufferPosition; cloneInputStream->bufferStart = bufferStart; cloneInputStream->bufferLength = bufferLength; return cloneInputStream; } } LucenePlusPlus-rel_3.0.4/src/core/store/RAMOutputStream.cpp000066400000000000000000000105131217574114600237060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RAMOutputStream.h" #include "RAMFile.h" #include "RAMDirectory.h" #include "MiscUtils.h" namespace Lucene { const int32_t RAMOutputStream::BUFFER_SIZE = 1024; RAMOutputStream::RAMOutputStream() { file = newLucene(RAMDirectoryPtr()); // make sure that we switch to the first needed buffer lazily currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; } RAMOutputStream::RAMOutputStream(RAMFilePtr f) { file = f; // make sure that we switch to the first needed buffer lazily currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; } RAMOutputStream::~RAMOutputStream() { } void RAMOutputStream::writeTo(IndexOutputPtr out) { flush(); int64_t end = file->length; int64_t pos = 0; int32_t buffer = 0; while (pos < end) { int32_t length = BUFFER_SIZE; int64_t nextPos = pos + length; if (nextPos > end) // at the last buffer length = (int32_t)(end - pos); out->writeBytes(file->getBuffer(buffer++).get(), length); pos = nextPos; } } void RAMOutputStream::reset() { currentBuffer.reset(); currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; file->setLength(0); } void RAMOutputStream::close() { flush(); } void RAMOutputStream::seek(int64_t pos) { // set the file length in case we seek back and flush() has not been called yet setFileLength(); if ((int64_t)pos < bufferStart || (int64_t)pos >= bufferStart + bufferLength) { currentBufferIndex = (int32_t)(pos / BUFFER_SIZE); switchCurrentBuffer(); } bufferPosition = (int32_t)(pos % BUFFER_SIZE); } int64_t RAMOutputStream::length() { return file->length; } void RAMOutputStream::writeByte(uint8_t b) { if (bufferPosition == bufferLength) { ++currentBufferIndex; switchCurrentBuffer(); } currentBuffer[bufferPosition++] = b; } void RAMOutputStream::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { while (length > 0) { BOOST_ASSERT(b != NULL); if (bufferPosition == bufferLength) { ++currentBufferIndex; switchCurrentBuffer(); } int32_t remainInBuffer = currentBuffer.size() - bufferPosition; int32_t bytesToCopy = length < remainInBuffer ? length : remainInBuffer; MiscUtils::arrayCopy(b, offset, currentBuffer.get(), bufferPosition, bytesToCopy); offset += bytesToCopy; length -= bytesToCopy; bufferPosition += bytesToCopy; } } void RAMOutputStream::switchCurrentBuffer() { if (currentBufferIndex == file->numBuffers()) currentBuffer = file->addBuffer(BUFFER_SIZE); else currentBuffer = file->getBuffer(currentBufferIndex); bufferPosition = 0; bufferStart = (int64_t)BUFFER_SIZE * (int64_t)currentBufferIndex; bufferLength = currentBuffer.size(); } void RAMOutputStream::setFileLength() { int64_t pointer = bufferStart + bufferPosition; if (pointer > file->length) file->setLength(pointer); } void RAMOutputStream::flush() { file->setLastModified(MiscUtils::currentTimeMillis()); setFileLength(); } int64_t RAMOutputStream::getFilePointer() { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; } int64_t RAMOutputStream::sizeInBytes() { return file->numBuffers() * BUFFER_SIZE; } } LucenePlusPlus-rel_3.0.4/src/core/store/SimpleFSDirectory.cpp000066400000000000000000000155661217574114600242560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SimpleFSDirectory.h" #include "_SimpleFSDirectory.h" #include "IndexOutput.h" #include "FileReader.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { SimpleFSDirectory::SimpleFSDirectory(const String& path, LockFactoryPtr lockFactory) : FSDirectory(path, lockFactory) { } SimpleFSDirectory::~SimpleFSDirectory() { } IndexOutputPtr SimpleFSDirectory::createOutput(const String& name) { initOutput(name); return newLucene(FileUtils::joinPath(directory, name)); } IndexInputPtr SimpleFSDirectory::openInput(const String& name) { return FSDirectory::openInput(name); } IndexInputPtr SimpleFSDirectory::openInput(const String& name, int32_t bufferSize) { ensureOpen(); return newLucene(FileUtils::joinPath(directory, name), bufferSize, getReadChunkSize()); } const int32_t InputFile::FILE_EOF = FileReader::FILE_EOF; const int32_t InputFile::FILE_ERROR = FileReader::FILE_ERROR; InputFile::InputFile(const String& path) { file = newInstance(StringUtils::toUTF8(path).c_str(), std::ios::binary | std::ios::in); if (!file->is_open()) boost::throw_exception(FileNotFoundException(path)); position = 0; length = FileUtils::fileLength(path); } InputFile::~InputFile() { } void InputFile::setPosition(int64_t position) { this->position = position; file->seekg((std::streamoff)position); if (!file->good()) boost::throw_exception(IOException()); } int64_t InputFile::getPosition() { return position; } int64_t InputFile::getLength() { return length; } int32_t InputFile::read(uint8_t* b, int32_t offset, int32_t length) { try { if (file->eof()) return FILE_EOF; file->read((char*)b + offset, length); int32_t readCount = file->gcount(); position += readCount; return readCount; } catch (...) { return FILE_ERROR; } } void InputFile::close() { if (file->is_open()) file->close(); } bool InputFile::isValid() { return (file && file->is_open() && file->good()); } SimpleFSIndexInput::SimpleFSIndexInput() { this->chunkSize = 0; this->isClone = false; } SimpleFSIndexInput::SimpleFSIndexInput(const String& path, int32_t bufferSize, int32_t chunkSize) : BufferedIndexInput(bufferSize) { this->file = newLucene(path); this->path = path; this->chunkSize = chunkSize; this->isClone = false; } SimpleFSIndexInput::~SimpleFSIndexInput() { } void SimpleFSIndexInput::readInternal(uint8_t* b, int32_t offset, int32_t length) { SyncLock fileLock(file); int64_t position = getFilePointer(); if (position != file->getPosition()) file->setPosition(position); int32_t total = 0; while (total < length) { int32_t readLength = total + chunkSize > length ? length - total : chunkSize; int32_t i = file->read(b, offset + total, readLength); if (i == InputFile::FILE_EOF) boost::throw_exception(IOException(L"Read past EOF")); total += i; } } void SimpleFSIndexInput::seekInternal(int64_t pos) { } int64_t SimpleFSIndexInput::length() { return file->getLength(); } void SimpleFSIndexInput::close() { if (!isClone) file->close(); } bool SimpleFSIndexInput::isValid() { return file->isValid(); } LuceneObjectPtr SimpleFSIndexInput::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = BufferedIndexInput::clone(other ? other : newLucene()); SimpleFSIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); cloneIndexInput->path = path; cloneIndexInput->file = file; cloneIndexInput->chunkSize = chunkSize; cloneIndexInput->isClone = true; return cloneIndexInput; } OutputFile::OutputFile(const String& path) { this->path = path; file = newInstance(StringUtils::toUTF8(path).c_str(), std::ios::binary | std::ios::out); } OutputFile::~OutputFile() { } bool OutputFile::write(const uint8_t* b, int32_t offset, int32_t length) { if (!file->is_open()) return false; try { file->write((char*)b + offset, length); return file->good(); } catch (...) { return false; } } void OutputFile::close() { file.reset(); } void OutputFile::setPosition(int64_t position) { file->seekp((std::streamoff)position); if (!file->good()) boost::throw_exception(IOException()); } int64_t OutputFile::getLength() { return FileUtils::fileLength(path); } void OutputFile::setLength(int64_t length) { FileUtils::setFileLength(path, length); } void OutputFile::flush() { if (file->is_open()) file->flush(); } bool OutputFile::isValid() { return (file && file->is_open() && file->good()); } SimpleFSIndexOutput::SimpleFSIndexOutput(const String& path) { file = newLucene(path); isOpen = true; } SimpleFSIndexOutput::~SimpleFSIndexOutput() { } void SimpleFSIndexOutput::flushBuffer(const uint8_t* b, int32_t offset, int32_t length) { file->write(b, offset, length); file->flush(); } void SimpleFSIndexOutput::close() { if (isOpen) { BufferedIndexOutput::close(); file.reset(); isOpen = false; } } void SimpleFSIndexOutput::seek(int64_t pos) { BufferedIndexOutput::seek(pos); file->setPosition(pos); } int64_t SimpleFSIndexOutput::length() { return file->getLength(); } void SimpleFSIndexOutput::setLength(int64_t length) { file->setLength(length); } } LucenePlusPlus-rel_3.0.4/src/core/store/SimpleFSLockFactory.cpp000066400000000000000000000054751217574114600245300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SimpleFSLockFactory.h" #include "_SimpleFSLockFactory.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { SimpleFSLockFactory::SimpleFSLockFactory() { } SimpleFSLockFactory::SimpleFSLockFactory(const String& lockDir) { setLockDir(lockDir); } SimpleFSLockFactory::~SimpleFSLockFactory() { } LockPtr SimpleFSLockFactory::makeLock(const String& lockName) { return newLucene(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName); } void SimpleFSLockFactory::clearLock(const String& lockName) { if (FileUtils::isDirectory(lockDir)) { String lockPath(FileUtils::joinPath(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName)); if (FileUtils::fileExists(lockPath) && !FileUtils::removeFile(lockPath)) boost::throw_exception(IOException(L"Cannot delete " + lockPath)); } } SimpleFSLock::SimpleFSLock(const String& lockDir, const String& lockFileName) { this->lockDir = lockDir; this->lockFile = lockFile; } SimpleFSLock::~SimpleFSLock() { } bool SimpleFSLock::obtain() { // Ensure that lockDir exists and is a directory if (!FileUtils::fileExists(lockDir)) { if (!FileUtils::createDirectory(lockDir)) boost::throw_exception(RuntimeException(L"Cannot create directory: " + lockDir)); } else if (!FileUtils::isDirectory(lockDir)) boost::throw_exception(RuntimeException(L"Found regular file where directory expected: " + lockDir)); std::ofstream f; try { f.open(StringUtils::toUTF8(FileUtils::joinPath(lockDir, lockFile)).c_str(), std::ios::binary | std::ios::out); } catch (...) { } return f.is_open(); } void SimpleFSLock::release() { String path(FileUtils::joinPath(lockDir, lockFile)); if (FileUtils::fileExists(path) && !FileUtils::removeFile(path)) boost::throw_exception(LockReleaseFailedException(L"failed to delete " + path)); } bool SimpleFSLock::isLocked() { return FileUtils::fileExists(FileUtils::joinPath(lockDir, lockFile)); } String SimpleFSLock::toString() { return getClassName() + L"@" + FileUtils::joinPath(lockDir, lockFile); } } LucenePlusPlus-rel_3.0.4/src/core/store/SingleInstanceLockFactory.cpp000066400000000000000000000034441217574114600257460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SingleInstanceLockFactory.h" #include "_SingleInstanceLockFactory.h" namespace Lucene { SingleInstanceLockFactory::SingleInstanceLockFactory() { locks = HashSet::newInstance(); } SingleInstanceLockFactory::~SingleInstanceLockFactory() { } LockPtr SingleInstanceLockFactory::makeLock(const String& lockName) { // We do not use the LockPrefix at all, because the private HashSet instance // effectively scopes the locking to this single Directory instance. return newLucene(locks, lockName); } void SingleInstanceLockFactory::clearLock(const String& lockName) { SyncLock syncLock(&locks); locks.remove(lockName); } SingleInstanceLock::SingleInstanceLock(HashSet locks, const String& lockName) { this->locks = locks; this->lockName = lockName; } SingleInstanceLock::~SingleInstanceLock() { } bool SingleInstanceLock::obtain() { SyncLock syncLock(&locks); return locks.add(lockName); } void SingleInstanceLock::release() { SyncLock syncLock(&locks); locks.remove(lockName); } bool SingleInstanceLock::isLocked() { SyncLock syncLock(&locks); return locks.contains(lockName); } String SingleInstanceLock::toString() { return lockName; } } LucenePlusPlus-rel_3.0.4/src/core/util/000077500000000000000000000000001217574114600200075ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/util/Attribute.cpp000066400000000000000000000014021217574114600224530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Attribute.h" namespace Lucene { Attribute::~Attribute() { } int32_t Attribute::hashCode() { return LuceneObject::hashCode(); } bool Attribute::equals(LuceneObjectPtr other) { return LuceneObject::equals(other); } LuceneObjectPtr Attribute::clone(LuceneObjectPtr other) { return LuceneObject::clone(other); } } LucenePlusPlus-rel_3.0.4/src/core/util/AttributeSource.cpp000066400000000000000000000202201217574114600236330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AttributeSource.h" #include "Attribute.h" namespace Lucene { AttributeFactory::AttributeFactory() { } AttributeFactory::~AttributeFactory() { } AttributePtr AttributeFactory::createAttributeInstance(const String& className) { return AttributePtr(); // override } AttributeFactoryPtr AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY() { static AttributeFactoryPtr _DEFAULT_ATTRIBUTE_FACTORY; if (!_DEFAULT_ATTRIBUTE_FACTORY) { _DEFAULT_ATTRIBUTE_FACTORY = newLucene(); CycleCheck::addStatic(_DEFAULT_ATTRIBUTE_FACTORY); } return _DEFAULT_ATTRIBUTE_FACTORY; } AttributeSource::AttributeSource() { this->attributes = MapStringAttribute::newInstance(); this->factory = AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY(); } AttributeSource::AttributeSource(AttributeSourcePtr input) { if (!input) boost::throw_exception(IllegalArgumentException(L"input AttributeSource must not be null")); this->attributes = input->attributes; this->factory = input->factory; } AttributeSource::AttributeSource(AttributeFactoryPtr factory) { this->attributes = MapStringAttribute::newInstance(); this->factory = factory; } AttributeSource::~AttributeSource() { } AttributeFactoryPtr AttributeSource::getAttributeFactory() { return this->factory; } void AttributeSource::addAttribute(const String& className, AttributePtr attrImpl) { // invalidate state to force recomputation in captureState() currentState.reset(); attributes.put(className, attrImpl); } bool AttributeSource::hasAttributes() { return !attributes.empty(); } AttributePtr AttributeSource::getAttribute(const String& className) { return attributes.get(className); } bool AttributeSource::hasAttribute(const String& className) { return attributes.contains(className); } void AttributeSource::computeCurrentState() { currentState = newLucene(); AttributeSourceStatePtr c(currentState); MapStringAttribute::iterator attrImpl = attributes.begin(); c->attribute = attrImpl->second; ++attrImpl; while (attrImpl != attributes.end()) { c->next = newLucene(); c = c->next; c->attribute = attrImpl->second; ++attrImpl; } } void AttributeSource::clearAttributes() { if (hasAttributes()) { if (!currentState) computeCurrentState(); for (MapStringAttribute::iterator attrImpl = attributes.begin(); attrImpl != attributes.end(); ++attrImpl) attrImpl->second->clear(); } } AttributeSourceStatePtr AttributeSource::captureState() { if (!hasAttributes()) return AttributeSourceStatePtr(); if (!currentState) computeCurrentState(); return boost::dynamic_pointer_cast(currentState->clone()); } void AttributeSource::restoreState(AttributeSourceStatePtr state) { if (!state) return; do { MapStringAttribute::iterator attrImpl = attributes.find(state->attribute->getClassName()); if (attrImpl == attributes.end()) boost::throw_exception(IllegalArgumentException(L"State contains an AttributeImpl that is not in this AttributeSource")); state->attribute->copyTo(attrImpl->second); state = state->next; } while (state); } int32_t AttributeSource::hashCode() { int32_t code = 0; for (MapStringAttribute::iterator attrImpl = attributes.begin(); attrImpl != attributes.end(); ++attrImpl) code = code * 31 + attrImpl->second->hashCode(); return code; } bool AttributeSource::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; AttributeSourcePtr otherAttributeSource = boost::dynamic_pointer_cast(other); if (otherAttributeSource) { if (hasAttributes()) { if (!otherAttributeSource->hasAttributes()) return false; if (attributes.size() != otherAttributeSource->attributes.size()) return false; // it is only equal if all attribute impls are the same in the same order if (!currentState) computeCurrentState(); AttributeSourceStatePtr thisState(currentState); if (!otherAttributeSource->currentState) otherAttributeSource->computeCurrentState(); AttributeSourceStatePtr otherState(otherAttributeSource->currentState); while (thisState && otherState) { if (otherState->attribute->getClassName() != thisState->attribute->getClassName() || !otherState->attribute->equals(thisState->attribute)) return false; thisState = thisState->next; otherState = otherState->next; } return true; } else return !otherAttributeSource->hasAttributes(); } else return false; } String AttributeSource::toString() { StringStream buf; buf << L"("; if (hasAttributes()) { if (!currentState) computeCurrentState(); for (AttributeSourceStatePtr state(currentState); state; state = state->next) { if (state != currentState) buf << L","; buf << state->attribute->toString(); } } buf << ")"; return buf.str(); } AttributeSourcePtr AttributeSource::cloneAttributes() { AttributeSourcePtr clone(newLucene(this->factory)); if (hasAttributes()) { if (!currentState) computeCurrentState(); for (AttributeSourceStatePtr state(currentState); state; state = state->next) clone->attributes.put(state->attribute->getClassName(), boost::dynamic_pointer_cast(state->attribute->clone())); } return clone; } Collection AttributeSource::getAttributes() { Collection attrImpls(Collection::newInstance()); if (hasAttributes()) { if (!currentState) computeCurrentState(); for (AttributeSourceStatePtr state(currentState); state; state = state->next) attrImpls.add(state->attribute); } return attrImpls; } DefaultAttributeFactory::~DefaultAttributeFactory() { } AttributePtr DefaultAttributeFactory::createAttributeInstance(const String& className) { return AttributePtr(); } AttributeSourceState::~AttributeSourceState() { } LuceneObjectPtr AttributeSourceState::clone(LuceneObjectPtr other) { AttributeSourceStatePtr clone(newLucene()); clone->attribute = boost::dynamic_pointer_cast(attribute->clone()); if (next) clone->next = boost::dynamic_pointer_cast(next->clone()); return clone; } } LucenePlusPlus-rel_3.0.4/src/core/util/Base64.cpp000066400000000000000000000101751217574114600215430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Base64.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { const String Base64::BASE64_CHARS = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; Base64::~Base64() { } String Base64::encode(ByteArray bytes) { return encode(bytes.get(), bytes.size()); } String Base64::encode(const uint8_t* bytes, int32_t length) { String result; uint8_t byteArray3[3]; uint8_t byteArray4[4]; int32_t i = 0; while (length--) { byteArray3[i++] = *(bytes++); if (i == 3) { byteArray4[0] = (byteArray3[0] & 0xfc) >> 2; byteArray4[1] = ((byteArray3[0] & 0x03) << 4) + ((byteArray3[1] & 0xf0) >> 4); byteArray4[2] = ((byteArray3[1] & 0x0f) << 2) + ((byteArray3[2] & 0xc0) >> 6); byteArray4[3] = byteArray3[2] & 0x3f; for (i = 0; i < 4; ++i) result += BASE64_CHARS[byteArray4[i]]; i = 0; } } if (i != 0) { for (int32_t j = i; j < 3; ++j) byteArray3[j] = 0; byteArray4[0] = (byteArray3[0] & 0xfc) >> 2; byteArray4[1] = ((byteArray3[0] & 0x03) << 4) + ((byteArray3[1] & 0xf0) >> 4); byteArray4[2] = ((byteArray3[1] & 0x0f) << 2) + ((byteArray3[2] & 0xc0) >> 6); byteArray4[3] = byteArray3[2] & 0x3f; for (int32_t j = 0; j < i + 1; ++j) result += BASE64_CHARS[byteArray4[j]]; while (i++ < 3) result += L'='; } return result; } ByteArray Base64::decode(const String& str) { int32_t length = str.length(); uint8_t byteArray4[4]; uint8_t byteArray3[3]; int32_t i = 0; int32_t charIndex = 0; ByteArray result(ByteArray::newInstance(length / 2)); int32_t resultIndex = 0; while (length-- && str[charIndex] != L'=' && isBase64(str[charIndex])) { byteArray4[i++] = (uint8_t)str[charIndex++]; if (i == 4) { for (i = 0; i < 4; ++i) byteArray4[i] = BASE64_CHARS.find(byteArray4[i]); byteArray3[0] = (byteArray4[0] << 2) + ((byteArray4[1] & 0x30) >> 4); byteArray3[1] = ((byteArray4[1] & 0xf) << 4) + ((byteArray4[2] & 0x3c) >> 2); byteArray3[2] = ((byteArray4[2] & 0x3) << 6) + byteArray4[3]; for (i = 0; i < 3; ++i) { if (resultIndex >= result.size()) result.resize((int32_t)((double)result.size() * 1.5)); result[resultIndex++] = byteArray3[i]; } i = 0; } } if (i != 0) { for (int32_t j = i; j < 4; ++j) byteArray4[j] = 0; for (int32_t j = 0; j < 4; ++j) byteArray4[j] = BASE64_CHARS.find(byteArray4[j]); byteArray3[0] = (byteArray4[0] << 2) + ((byteArray4[1] & 0x30) >> 4); byteArray3[1] = ((byteArray4[1] & 0xf) << 4) + ((byteArray4[2] & 0x3c) >> 2); byteArray3[2] = ((byteArray4[2] & 0x3) << 6) + byteArray4[3]; for (int32_t j = 0; j < i - 1; ++j) { if (resultIndex >= result.size()) result.resize((int32_t)((double)result.size() * 1.5)); result[resultIndex++] = byteArray3[j]; } } result.resize(resultIndex); return result; } bool Base64::isBase64(wchar_t ch) { return (UnicodeUtil::isAlnum(ch) || ch == L'+' || ch == L'/'); } } LucenePlusPlus-rel_3.0.4/src/core/util/BitSet.cpp000066400000000000000000000214171217574114600217120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BitSet.h" #include "BitUtil.h" namespace Lucene { BitSet::BitSet(uint32_t size) : bitSet(size) { } BitSet::~BitSet() { } const uint64_t* BitSet::getBits() { return bitSet.empty() ? NULL : static_cast(&bitSet.m_bits[0]); } void BitSet::clear() { bitSet.clear(); } void BitSet::clear(uint32_t bitIndex) { if (bitIndex <= bitSet.size()) bitSet.set(bitIndex, false); } void BitSet::fastClear(uint32_t bitIndex) { bitSet.set(bitIndex, false); } void BitSet::clear(uint32_t fromIndex, uint32_t toIndex) { toIndex = std::min(toIndex, (uint32_t)bitSet.size()); for (bitset_type::size_type i = std::min(fromIndex, (uint32_t)bitSet.size()); i < toIndex; ++i) bitSet.set(i, false); } void BitSet::fastClear(uint32_t fromIndex, uint32_t toIndex) { for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) bitSet.set(i, false); } void BitSet::set(uint32_t bitIndex) { if (bitIndex >= bitSet.size()) resize(bitIndex + 1); bitSet.set(bitIndex, true); } void BitSet::fastSet(uint32_t bitIndex) { bitSet.set(bitIndex, true); } void BitSet::set(uint32_t bitIndex, bool value) { if (bitIndex >= bitSet.size()) resize(bitIndex + 1); bitSet.set(bitIndex, value); } void BitSet::fastSet(uint32_t bitIndex, bool value) { bitSet.set(bitIndex, value); } void BitSet::set(uint32_t fromIndex, uint32_t toIndex) { if (toIndex >= bitSet.size()) resize(toIndex + 1); for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) bitSet.set(i, true); } void BitSet::fastSet(uint32_t fromIndex, uint32_t toIndex) { for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) bitSet.set(i, true); } void BitSet::set(uint32_t fromIndex, uint32_t toIndex, bool value) { if (toIndex >= bitSet.size()) resize(toIndex + 1); for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) bitSet.set(i, value); } void BitSet::fastSet(uint32_t fromIndex, uint32_t toIndex, bool value) { for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) bitSet.set(i, value); } void BitSet::flip(uint32_t bitIndex) { if (bitIndex >= bitSet.size()) resize(bitIndex + 1); bitSet.flip(bitIndex); } void BitSet::fastFlip(uint32_t bitIndex) { bitSet.flip(bitIndex); } void BitSet::flip(uint32_t fromIndex, uint32_t toIndex) { if (toIndex >= bitSet.size()) resize(toIndex + 1); for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) bitSet.flip(i); } void BitSet::fastFlip(uint32_t fromIndex, uint32_t toIndex) { for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) bitSet.flip(i); } uint32_t BitSet::size() const { return bitSet.num_blocks() * sizeof(bitset_type::block_type) * 8; } uint32_t BitSet::numBlocks() const { return bitSet.num_blocks(); } bool BitSet::isEmpty() const { return bitSet.none(); } bool BitSet::get(uint32_t bitIndex) const { return bitIndex < bitSet.size() ? bitSet.test(bitIndex) : false; } bool BitSet::fastGet(uint32_t bitIndex) const { return bitSet.test(bitIndex); } int32_t BitSet::nextSetBit(uint32_t fromIndex) const { bitset_type::size_type next = fromIndex == 0 ? bitSet.find_first() : bitSet.find_next(fromIndex - 1); return next == bitset_type::npos ? -1 : next; } void BitSet::_and(BitSetPtr set) { bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); for (bitset_type::size_type i = 0; i < minBlocks; ++i) bitSet.m_bits[i] &= set->bitSet.m_bits[i]; if (bitSet.num_blocks() > minBlocks) std::fill(bitSet.m_bits.begin() + minBlocks, bitSet.m_bits.end(), bitset_type::block_type(0)); } void BitSet::_or(BitSetPtr set) { bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); if (set->bitSet.size() > bitSet.size()) resize(set->bitSet.size()); for (bitset_type::size_type i = 0; i < minBlocks; ++i) bitSet.m_bits[i] |= set->bitSet.m_bits[i]; if (bitSet.num_blocks() > minBlocks) std::copy(set->bitSet.m_bits.begin() + minBlocks, set->bitSet.m_bits.end(), bitSet.m_bits.begin() + minBlocks); } void BitSet::_xor(BitSetPtr set) { bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); if (set->bitSet.size() > bitSet.size()) resize(set->bitSet.size()); for (bitset_type::size_type i = 0; i < minBlocks; ++i) bitSet.m_bits[i] ^= set->bitSet.m_bits[i]; if (bitSet.num_blocks() > minBlocks) std::copy(set->bitSet.m_bits.begin() + minBlocks, set->bitSet.m_bits.end(), bitSet.m_bits.begin() + minBlocks); } void BitSet::andNot(BitSetPtr set) { bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); for (bitset_type::size_type i = 0; i < minBlocks; ++i) bitSet.m_bits[i] &= ~set->bitSet.m_bits[i]; } bool BitSet::intersectsBitSet(BitSetPtr set) const { return bitSet.intersects(set->bitSet); } uint32_t BitSet::cardinality() { return bitSet.num_blocks() == 0 ? 0 : (uint32_t)BitUtil::pop_array((int64_t*)getBits(), 0, bitSet.num_blocks()); } void BitSet::resize(uint32_t size) { bitset_type::size_type old_num_blocks = bitSet.num_blocks(); bitset_type::size_type required_blocks = bitSet.calc_num_blocks(size); if (required_blocks != old_num_blocks) bitSet.m_bits.resize(required_blocks, bitset_type::block_type(0)); bitSet.m_num_bits = size; uint64_t extra_bits = static_cast(bitSet.size() % bitSet.bits_per_block); if (extra_bits != 0) bitSet.m_bits.back() &= ~(~static_cast(0) << extra_bits); } bool BitSet::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; BitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); if (!otherBitSet) return false; BitSetPtr first = bitSet.num_blocks() < otherBitSet->bitSet.num_blocks() ? otherBitSet : shared_from_this(); BitSetPtr second = bitSet.num_blocks() < otherBitSet->bitSet.num_blocks() ? shared_from_this() : otherBitSet; bitset_type::size_type firstLength = first->bitSet.num_blocks(); bitset_type::size_type secondLength = second->bitSet.num_blocks(); for (bitset_type::size_type i = secondLength; i < firstLength; ++i) { if (first->bitSet.m_bits[i] != 0) return false; } for (bitset_type::size_type i = 0; i < secondLength; ++i) { if (first->bitSet.m_bits[i] != second->bitSet.m_bits[i]) return false; } return true; } int32_t BitSet::hashCode() { // Start with a zero hash and use a mix that results in zero if the input is zero. // This effectively truncates trailing zeros without an explicit check. int64_t hash = 0; uint32_t maxSize = bitSet.num_blocks(); const uint64_t* bits = getBits(); for (uint32_t bit = 0; bit < maxSize; ++bit) { hash ^= bits[bit]; hash = (hash << 1) | (hash >> 63); // rotate left } // Fold leftmost bits into right and add a constant to prevent empty sets from // returning 0, which is too common. return (int32_t)((hash >> 32) ^ hash) + 0x98761234; } LuceneObjectPtr BitSet::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); BitSetPtr cloneBitSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneBitSet->bitSet = bitSet; return cloneBitSet; } } LucenePlusPlus-rel_3.0.4/src/core/util/BitUtil.cpp000066400000000000000000000434461217574114600221020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BitUtil.h" #include "MiscUtils.h" namespace Lucene { const uint8_t BitUtil::ntzTable[] = { 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 }; BitUtil::~BitUtil() { } int32_t BitUtil::pop(int64_t x) { x = x - (MiscUtils::unsignedShift(x, (int64_t)1) & 0x5555555555555555LL); x = (x & 0x3333333333333333LL) + (MiscUtils::unsignedShift(x, (int64_t)2) & 0x3333333333333333LL); x = (x + MiscUtils::unsignedShift(x, (int64_t)4)) & 0x0f0f0f0f0f0f0f0fLL; x = x + MiscUtils::unsignedShift(x, (int64_t)8); x = x + MiscUtils::unsignedShift(x, (int64_t)16); x = x + MiscUtils::unsignedShift(x, (int64_t)32); return (int32_t)x & 0x7f; } int64_t BitUtil::pop_array(const int64_t* A, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, A[i], A[i + 1]); int64_t twosB; CSA(twosB, ones, ones, A[i + 2], A[i + 3]); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, A[i + 4], A[i + 5]); CSA(twosB, ones, ones, A[i + 6], A[i + 7]); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } // Handle trailing words in a binary-search manner. // Derived from the loop above by setting specific elements to 0. if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, A[i], A[i + 1]); int64_t twosB; CSA(twosB, ones, ones, A[i + 2], A[i + 3]); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, A[i], A[i + 1]); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) tot += pop(A[i]); tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } int64_t BitUtil::pop_intersect(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] & B[i + 2]), (A[i + 3] & B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, (A[i + 4] & B[i + 4]), (A[i + 5] & B[i + 5])); CSA(twosB, ones, ones, (A[i + 6] & B[i + 6]), (A[i + 7] & B[i + 7])); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] & B[i + 2]), (A[i + 3] & B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) tot += pop((A[i] & B[i])); tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } int64_t BitUtil::pop_union(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] | B[i + 2]), (A[i + 3] | B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, (A[i + 4] | B[i + 4]), (A[i + 5] | B[i + 5])); CSA(twosB, ones, ones, (A[i + 6] | B[i + 6]), (A[i + 7] | B[i + 7])); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] | B[i + 2]), (A[i + 3] | B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) tot += pop((A[i] | B[i])); tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } int64_t BitUtil::pop_andnot(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] & ~B[i + 2]), (A[i + 3] & ~B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, (A[i + 4] & ~B[i + 4]), (A[i + 5] & ~B[i + 5])); CSA(twosB, ones, ones, (A[i + 6] & ~B[i + 6]), (A[i + 7] & ~B[i + 7])); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] & ~B[i + 2]), (A[i + 3] & ~B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) tot += pop((A[i] & ~B[i])); tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } int64_t BitUtil::pop_xor(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] ^ B[i + 2]), (A[i + 3] ^ B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, (A[i + 4] ^ B[i + 4]), (A[i + 5] ^ B[i + 5])); CSA(twosB, ones, ones, (A[i + 6] ^ B[i + 6]), (A[i + 7] ^ B[i + 7])); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] ^ B[i + 2]), (A[i + 3] ^ B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) tot += pop((A[i] ^ B[i])); tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } void BitUtil::CSA(int64_t& h, int64_t& l, int64_t a, int64_t b, int64_t c) { int64_t u = a ^ b; h = (a & b) | (u & c); l = u ^ c; } int32_t BitUtil::ntz(int64_t val) { // A full binary search to determine the low byte was slower than a linear search for nextSetBit(). // This is most likely because the implementation of nextSetBit() shifts bits to the right, increasing // the probability that the first non-zero byte is in the rhs. // This implementation does a single binary search at the top level only so that all other bit shifting // can be done on ints instead of longs to remain friendly to 32 bit architectures. In addition, the // case of a non-zero first byte is checked for first because it is the most common in dense bit arrays. int32_t lower = (int32_t)val; int32_t lowByte = lower & 0xff; if (lowByte != 0) return ntzTable[lowByte]; if (lower != 0) { lowByte = MiscUtils::unsignedShift(lower, 8) & 0xff; if (lowByte != 0) return ntzTable[lowByte] + 8; lowByte = MiscUtils::unsignedShift(lower, 16) & 0xff; if (lowByte != 0) return ntzTable[lowByte] + 16; // no need to mask off low byte for the last byte in the 32 bit word // no need to check for zero on the last byte either. return ntzTable[MiscUtils::unsignedShift(lower, 24)] + 24; } else { // grab upper 32 bits int32_t upper = (int32_t)(val >> 32); lowByte = upper & 0xff; if (lowByte != 0) return ntzTable[lowByte] + 32; lowByte = MiscUtils::unsignedShift(upper, 8) & 0xff; if (lowByte != 0) return ntzTable[lowByte] + 40; lowByte = MiscUtils::unsignedShift(upper, 16) & 0xff; if (lowByte != 0) return ntzTable[lowByte] + 48; // no need to mask off low byte for the last byte in the 32 bit word // no need to check for zero on the last byte either. return ntzTable[MiscUtils::unsignedShift(upper, 24)] + 56; } } int32_t BitUtil::ntz(int32_t val) { // This implementation does a single binary search at the top level only. In addition, the case // of a non-zero first byte is checked for first because it is the most common in dense bit arrays. int32_t lowByte = val & 0xff; if (lowByte != 0) return ntzTable[lowByte]; lowByte = MiscUtils::unsignedShift(val, 8) & 0xff; if (lowByte != 0) return ntzTable[lowByte] + 8; lowByte = MiscUtils::unsignedShift(val, 16) & 0xff; if (lowByte != 0) return ntzTable[lowByte] + 16; // no need to mask off low byte for the last byte. // no need to check for zero on the last byte either. return ntzTable[MiscUtils::unsignedShift(val, 24)] + 24; } int32_t BitUtil::ntz2(int64_t x) { int32_t n = 0; int32_t y = (int32_t)x; if (y == 0) // the only 64 bit shift necessary { n += 32; y = (int32_t)MiscUtils::unsignedShift(x, (int64_t)32); } if ((y & 0x0000ffff) == 0) { n += 16; y = MiscUtils::unsignedShift(y, 16); } if ((y & 0x000000ff) == 0) { n += 8; y = MiscUtils::unsignedShift(y, 8); } return (ntzTable[y & 0xff]) + n; } int32_t BitUtil::ntz3(int64_t x) { int32_t n = 1; // do the first step as a long, all others as ints. int32_t y = (int32_t)x; if (y == 0) { n += 32; y = (int32_t)MiscUtils::unsignedShift(x, (int64_t)32); } if ((y & 0x0000ffff) == 0) { n += 16; y = MiscUtils::unsignedShift(y, 16); } if ((y & 0x000000ff) == 0) { n += 8; y = MiscUtils::unsignedShift(y, 8); } if ((y & 0x0000000f) == 0) { n += 4; y = MiscUtils::unsignedShift(y, 4); } if ((y & 0x00000003) == 0) { n += 2; y = MiscUtils::unsignedShift(y, 2); } return n - (y & 1); } bool BitUtil::isPowerOfTwo(int32_t v) { return ((v & (v - 1)) == 0); } bool BitUtil::isPowerOfTwo(int64_t v) { return ((v & (v - 1)) == 0); } int32_t BitUtil::nextHighestPowerOfTwo(int32_t v) { --v; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; return ++v; } int64_t BitUtil::nextHighestPowerOfTwo(int64_t v) { --v; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v |= v >> 32; return ++v; } } LucenePlusPlus-rel_3.0.4/src/core/util/BitVector.cpp000066400000000000000000000210141217574114600224120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BitVector.h" #include "Directory.h" #include "IndexInput.h" #include "IndexOutput.h" #include "TestPoint.h" #include "MiscUtils.h" namespace Lucene { const uint8_t BitVector::BYTE_COUNTS[] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; BitVector::BitVector(int32_t n) { _size = n; bits = ByteArray::newInstance((_size >> 3) + 1); MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); _count = 0; } BitVector::BitVector(ByteArray bits, int32_t size) { this->bits = bits; this->_size = size; this->_count = -1; } BitVector::BitVector(DirectoryPtr d, const String& name) { IndexInputPtr input(d->openInput(name)); LuceneException finally; try { _size = input->readInt(); // read size if (_size == -1) readDgaps(input); else readBits(input); } catch (LuceneException& e) { finally = e; } input->close(); finally.throwException(); } BitVector::~BitVector() { } LuceneObjectPtr BitVector::clone(LuceneObjectPtr other) { ByteArray copyBits(ByteArray::newInstance(bits.size())); MiscUtils::arrayCopy(bits.get(), 0, copyBits.get(), 0, bits.size()); BitVectorPtr clone = newLucene(copyBits, _size); clone->_count = _count; return clone; } void BitVector::set(int32_t bit) { if (bit >= _size) boost::throw_exception(IndexOutOfBoundsException()); bits[bit >> 3] |= 1 << (bit & 7); _count = -1; } bool BitVector::getAndSet(int32_t bit) { if (bit >= _size) boost::throw_exception(IndexOutOfBoundsException()); int32_t pos = (bit >> 3); int32_t v = bits[pos]; int32_t flag = 1 << (bit & 7); if ((flag & v) != 0) return true; else { bits[pos] = (uint8_t)(v | flag); if (_count != -1) ++_count; return false; } } void BitVector::clear(int32_t bit) { if (bit >= _size) boost::throw_exception(IndexOutOfBoundsException()); bits[bit >> 3] &= ~(1 << (bit & 7)); _count = -1; } bool BitVector::get(int32_t bit) { BOOST_ASSERT(bit >= 0 && bit < _size); return (bits[bit >> 3] & (1 << (bit & 7))) != 0; } int32_t BitVector::size() { return _size; } int32_t BitVector::count() { // if the vector has been modified if (_count == -1) { int32_t c = 0; int32_t end = bits.size(); for (int32_t i = 0; i < end; ++i) c += BYTE_COUNTS[bits[i] & 0xff]; // sum bits per byte _count = c; } return _count; } int32_t BitVector::getRecomputedCount() { int32_t c = 0; int32_t end = bits.size(); for (int32_t i = 0; i < end; ++i) c += BYTE_COUNTS[bits[i] & 0xff]; // sum bits per byte return c; } void BitVector::write(DirectoryPtr d, const String& name) { TestScope testScope(L"BitVector", L"write"); IndexOutputPtr output(d->createOutput(name)); LuceneException finally; try { if (isSparse()) writeDgaps(output); // sparse bit-set more efficiently saved as d-gaps. else writeBits(output); } catch (LuceneException& e) { finally = e; } output->close(); finally.throwException(); } void BitVector::writeBits(IndexOutputPtr output) { output->writeInt(size()); // write size output->writeInt(count()); // write count output->writeBytes(bits.get(), bits.size()); } void BitVector::writeDgaps(IndexOutputPtr output) { output->writeInt(-1); // mark using d-gaps output->writeInt(size()); // write size output->writeInt(count()); // write count int32_t last = 0; int32_t n = count(); int32_t m = bits.size(); for (int32_t i = 0; i < m && n > 0; ++i) { if (bits[i] != 0) { output->writeVInt(i-last); output->writeByte(bits[i]); last = i; n -= BYTE_COUNTS[bits[i] & 0xff]; } } } bool BitVector::isSparse() { // note: order of comparisons below set to favor smaller values (no binary range search.) // note: adding 4 because we start with ((int) -1) to indicate d-gaps format. // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore // multiplying count by (8+8) or (8+16) or (8+24) etc.: // - first 8 for writing bits[i] (1 byte vs. 1 bit), and // - second part for writing the byte-number d-gap as vint. // note: factor is for read/write of byte-arrays being faster than vints. int32_t factor = 10; if (bits.size() < (1 << 7)) return factor * (4 + (8 + 8) * count()) < size(); if (bits.size() < (1 << 14)) return factor * (4 + (8 + 16) * count()) < size(); if (bits.size() < (1 << 21)) return factor * (4 + (8 + 24) * count()) < size(); if (bits.size() < (1 << 28)) return factor * (4 + (8 + 32) * count()) < size(); return factor * (4 + (8 + 40) * count()) < size(); } void BitVector::readBits(IndexInputPtr input) { _count = input->readInt(); // read count bits = ByteArray::newInstance((_size >> 3) + 1); // allocate bits MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); input->readBytes(bits.get(), 0, bits.size()); } void BitVector::readDgaps(IndexInputPtr input) { _size = input->readInt(); // (re)read size _count = input->readInt(); // read count bits = ByteArray::newInstance((_size >> 3) + 1); // allocate bits MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); int32_t last = 0; int32_t n = count(); while (n > 0) { last += input->readVInt(); bits[last] = input->readByte(); n -= BYTE_COUNTS[bits[last] & 0xff]; } } BitVectorPtr BitVector::subset(int32_t start, int32_t end) { if (start < 0 || end > size() || end < start) boost::throw_exception(IndexOutOfBoundsException()); // Special case -- return empty vector is start == end if (end == start) return newLucene(0); ByteArray bits(ByteArray::newInstance(MiscUtils::unsignedShift(end - start - 1, 3) + 1)); int32_t s = MiscUtils::unsignedShift(start, 3); for (int32_t i = 0; i < bits.size(); ++i) { int32_t cur = 0xff & this->bits[i + s]; int32_t next = i + s + 1 >= this->bits.size() ? 0 : 0xff & this->bits[i + s + 1]; bits[i] = (uint8_t)(MiscUtils::unsignedShift(cur, (start & 7)) | ((next << (8 - (start & 7))))); } int32_t bitsToClear = (bits.size() * 8 - (end - start)) % 8; bits[bits.size() - 1] &= ~(0xff << (8 - bitsToClear)); return newLucene(bits, end - start); } } LucenePlusPlus-rel_3.0.4/src/core/util/BufferedReader.cpp000066400000000000000000000066341217574114600233710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BufferedReader.h" #include "MiscUtils.h" namespace Lucene { const int32_t BufferedReader::READER_BUFFER = 8192; BufferedReader::BufferedReader(ReaderPtr reader, int32_t size) { this->reader = reader; this->bufferSize = size; this->bufferLength = 0; this->bufferPosition = 0; } BufferedReader::~BufferedReader() { } int32_t BufferedReader::read() { if (bufferPosition >= bufferLength) { if (refill() == READER_EOF) return READER_EOF; } return buffer[bufferPosition++]; } int32_t BufferedReader::peek() { if (bufferPosition >= bufferLength) { if (refill() == READER_EOF) return READER_EOF; } return buffer[bufferPosition]; } int32_t BufferedReader::read(wchar_t* b, int32_t offset, int32_t length) { if (length == 0) return 0; int32_t remaining = length; while (remaining > 0) { int32_t available = bufferLength - bufferPosition; if (remaining <= available) { // the buffer contains enough data to satisfy this request MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, remaining); bufferPosition += remaining; remaining = 0; } else if (available > 0) { // the buffer does not have enough data, first serve all we've got MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, available); bufferPosition += available; offset += available; remaining -= available; } else if (refill() == READER_EOF) { length -= remaining; break; } } return length == 0 ? READER_EOF : length; } bool BufferedReader::readLine(String& line) { line.clear(); wchar_t ch = (wchar_t)read(); while (ch != (wchar_t)READER_EOF && ch != L'\r' && ch != L'\n') { line += ch; ch = (wchar_t)read(); } if (ch == '\r' && (wchar_t)peek() == L'\n') read(); return (!line.empty() || ch != (wchar_t)READER_EOF); } int32_t BufferedReader::refill() { if (!buffer) buffer = CharArray::newInstance(bufferSize); // allocate buffer lazily int32_t readLength = reader->read(buffer.get(), 0, bufferSize); bufferLength = readLength == READER_EOF ? 0 : readLength; bufferPosition = 0; return readLength; } void BufferedReader::close() { reader->close(); bufferLength = 0; bufferPosition = 0; } bool BufferedReader::markSupported() { return false; } void BufferedReader::reset() { reader->reset(); bufferLength = 0; bufferPosition = 0; } } LucenePlusPlus-rel_3.0.4/src/core/util/CharFolder.cpp000066400000000000000000000027421217574114600225310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharFolder.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { bool CharFolder::lowerCache = CharFolder::fillLower(); bool CharFolder::upperCache = CharFolder::fillUpper(); wchar_t CharFolder::lowerChars[CHAR_MAX - CHAR_MIN + 1]; wchar_t CharFolder::upperChars[CHAR_MAX - CHAR_MIN + 1]; CharFolder::~CharFolder() { } wchar_t CharFolder::toLower(wchar_t ch) { return (ch > CHAR_MIN && ch < CHAR_MAX) ? lowerChars[ch - CHAR_MIN] : UnicodeUtil::toLower(ch); } wchar_t CharFolder::toUpper(wchar_t ch) { return (ch > CHAR_MIN && ch < CHAR_MAX) ? upperChars[ch - CHAR_MIN] : UnicodeUtil::toUpper(ch); } bool CharFolder::fillLower() { for (int32_t index = CHAR_MIN; index < CHAR_MAX; ++index) lowerChars[index - CHAR_MIN] = UnicodeUtil::toLower((wchar_t)index); return true; } bool CharFolder::fillUpper() { for (int32_t index = CHAR_MIN; index < CHAR_MAX; ++index) upperChars[index - CHAR_MIN] = UnicodeUtil::toUpper((wchar_t)index); return true; } } LucenePlusPlus-rel_3.0.4/src/core/util/Collator.cpp000066400000000000000000000014401217574114600222710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "Collator.h" namespace Lucene { Collator::Collator(std::locale locale) : collate(std::use_facet< std::collate >(locale)) { } Collator::~Collator() { } int32_t Collator::compare(const String& first, const String& second) { return collate.compare(first.c_str(), first.c_str() + first.length(), second.c_str(), second.c_str() + second.length()); } } LucenePlusPlus-rel_3.0.4/src/core/util/Constants.cpp000066400000000000000000000027631217574114600224770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Constants.h" namespace Lucene { #if defined(linux) || defined(__linux) || defined(__linux__) String Constants::OS_NAME = L"Linux"; #elif defined(sun) || defined(__sun) String Constants::OS_NAME = L"Sun"; #elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(_WIN64) || defined(__WIN64__) || defined(WIN64) String Constants::OS_NAME = L"Windows"; #elif defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__) String Constants::OS_NAME = L"Mac"; #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) String Constants::OS_NAME = L"BSD"; #endif String Constants::LUCENE_MAIN_VERSION = L"3.0.3.4"; String Constants::LUCENE_VERSION = L"3.0.3"; Constants::Constants() { // private } Constants::~Constants() { } LuceneVersion::LuceneVersion() { // private } LuceneVersion::~LuceneVersion() { } bool LuceneVersion::onOrAfter(LuceneVersion::Version first, LuceneVersion::Version second) { return (first >= second); } } LucenePlusPlus-rel_3.0.4/src/core/util/CycleCheck.cpp000066400000000000000000000043611217574114600225140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "CycleCheck.h" namespace Lucene { MapStringInt CycleCheck::cycleMap; Set CycleCheck::staticRefs; CycleCheck::~CycleCheck() { } void CycleCheck::addRef(const String& className, int32_t ref) { if (!cycleMap) cycleMap = MapStringInt::newInstance(); SyncLock lockRef(&cycleMap); MapStringInt::iterator classRef = cycleMap.find(className); if (classRef == cycleMap.end()) cycleMap.put(className, 1); else { classRef->second += ref; if (classRef->second < 0) boost::throw_exception(RuntimeException(L"invalid class reference")); } } void CycleCheck::addStatic(LuceneObjectPtr* staticRef) { #ifdef LPP_USE_CYCLIC_CHECK if (!staticRefs) staticRefs = Set::newInstance(); staticRefs.add(staticRef); #endif } void CycleCheck::dumpRefs() { SyncLock lockRef(&cycleMap); // destroy all registered statics if (staticRefs) { for (Set::iterator staticRef = staticRefs.begin(); staticRef != staticRefs.end(); ++staticRef) (*staticRef)->reset(); } if (cycleMap) { bool reportCycles = true; for (MapStringInt::iterator classRef = cycleMap.begin(); classRef != cycleMap.end(); ++classRef) { if (classRef->second > 0) { if (reportCycles) { std::wcout << L"Cyclic references detected!\n"; reportCycles = false; } std::wcout << classRef->first << L": " << classRef->second << L"\n"; } } } } } LucenePlusPlus-rel_3.0.4/src/core/util/DocIdBitSet.cpp000066400000000000000000000043751217574114600226210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocIdBitSet.h" #include "_DocIdBitSet.h" #include "BitSet.h" namespace Lucene { DocIdBitSet::DocIdBitSet() { } DocIdBitSet::DocIdBitSet(BitSetPtr bitSet) { this->bitSet = bitSet; } DocIdBitSet::~DocIdBitSet() { } DocIdSetIteratorPtr DocIdBitSet::iterator() { return newLucene(bitSet); } bool DocIdBitSet::isCacheable() { return true; } BitSetPtr DocIdBitSet::getBitSet() { return bitSet; } bool DocIdBitSet::equals(LuceneObjectPtr other) { if (DocIdSet::equals(other)) return true; DocIdBitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); return bitSet->equals(otherBitSet->bitSet); } int32_t DocIdBitSet::hashCode() { return bitSet->hashCode(); } LuceneObjectPtr DocIdBitSet::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); DocIdBitSetPtr cloneBitSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneBitSet->bitSet = boost::dynamic_pointer_cast(bitSet->clone()); return cloneBitSet; } DocIdBitSetIterator::DocIdBitSetIterator(BitSetPtr bitSet) { this->bitSet = bitSet; this->docId = -1; } DocIdBitSetIterator::~DocIdBitSetIterator() { } int32_t DocIdBitSetIterator::docID() { return docId; } int32_t DocIdBitSetIterator::nextDoc() { int32_t doc = bitSet->nextSetBit(docId + 1); docId = doc == -1 ? NO_MORE_DOCS : doc; return docId; } int32_t DocIdBitSetIterator::advance(int32_t target) { int32_t doc = bitSet->nextSetBit(target); docId = doc == -1 ? NO_MORE_DOCS : doc; return docId; } } LucenePlusPlus-rel_3.0.4/src/core/util/FieldCacheSanityChecker.cpp000066400000000000000000000260731217574114600251470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheSanityChecker.h" #include "_FieldCacheSanityChecker.h" #include "FieldCache.h" #include "IndexReader.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { FieldCacheSanityChecker::FieldCacheSanityChecker() { } FieldCacheSanityChecker::~FieldCacheSanityChecker() { } Collection FieldCacheSanityChecker::checkSanity(FieldCachePtr cache) { return checkSanity(cache->getCacheEntries()); } Collection FieldCacheSanityChecker::checkSanity(Collection cacheEntries) { FieldCacheSanityCheckerPtr sanityChecker(newLucene()); return sanityChecker->check(cacheEntries); } Collection FieldCacheSanityChecker::check(Collection cacheEntries) { if (!cacheEntries || cacheEntries.empty()) return Collection::newInstance(); // Maps the (valId) identityhashCode of cache values to sets of CacheEntry instances MapSetIntFieldCacheEntry valIdToItems(MapSetIntFieldCacheEntry::map_type::newInstance()); // Maps ReaderField keys to Sets of ValueIds MapSetReaderFieldInt readerFieldToValIds(MapSetReaderFieldInt::map_type::newInstance()); // Any keys that we know result in more then one valId SetReaderField valMismatchKeys(SetReaderField::newInstance()); // iterate over all the cacheEntries to get the mappings we'll need for (int32_t i = 0; i < cacheEntries.size(); ++i) { FieldCacheEntryPtr item(cacheEntries[i]); boost::any val(item->getValue()); if (VariantUtils::typeOf(val)) continue; ReaderFieldPtr rf(newLucene(item->getReaderKey(), item->getFieldName())); int32_t valId = VariantUtils::hashCode(val); // indirect mapping, so the MapOfSet will dedup identical valIds for us valIdToItems.put(valId, item); if (1 < readerFieldToValIds.put(rf, valId)) valMismatchKeys.add(rf); } Collection insanity(Collection::newInstance()); Collection mismatch(checkValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys)); insanity.addAll(mismatch.begin(), mismatch.end()); Collection subreaders(checkSubreaders(valIdToItems, readerFieldToValIds)); insanity.addAll(subreaders.begin(), subreaders.end()); return insanity; } Collection FieldCacheSanityChecker::checkValueMismatch(MapSetIntFieldCacheEntry valIdToItems, MapSetReaderFieldInt readerFieldToValIds, SetReaderField valMismatchKeys) { Collection insanity(Collection::newInstance()); if (!valMismatchKeys.empty()) { // we have multiple values for some ReaderFields MapSetReaderFieldInt::map_type rfMap = readerFieldToValIds.getMap(); MapSetIntFieldCacheEntry::map_type valMap = valIdToItems.getMap(); for (SetReaderField::iterator rf = valMismatchKeys.begin(); rf != valMismatchKeys.end(); ++rf) { Collection badEntries(Collection::newInstance()); MapSetReaderFieldInt::set_type values(rfMap.get(*rf)); for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) { MapSetIntFieldCacheEntry::set_type cacheEntries(valMap.get(*value)); for (MapSetIntFieldCacheEntry::set_type::iterator cacheEntry = cacheEntries.begin(); cacheEntry != cacheEntries.end(); ++cacheEntry) badEntries.add(*cacheEntry); } insanity.add(newLucene(VALUEMISMATCH, L"Multiple distinct value objects for " + (*rf)->toString(), badEntries)); } } return insanity; } Collection FieldCacheSanityChecker::checkSubreaders(MapSetIntFieldCacheEntry valIdToItems, MapSetReaderFieldInt readerFieldToValIds) { Collection insanity(Collection::newInstance()); MapReaderFieldSetReaderField badChildren(MapReaderFieldSetReaderField::newInstance()); MapSetReaderFieldReaderField badKids(badChildren); // wrapper MapSetIntFieldCacheEntry::map_type viToItemSets = valIdToItems.getMap(); MapSetReaderFieldInt::map_type rfToValIdSets = readerFieldToValIds.getMap(); SetReaderField seen(SetReaderField::newInstance()); for (MapSetReaderFieldInt::map_type::iterator rf = rfToValIdSets.begin(); rf != rfToValIdSets.end(); ++rf) { if (seen.contains(rf->first)) continue; Collection kids(getAllDecendentReaderKeys(rf->first->readerKey)); for (Collection::iterator kidKey = kids.begin(); kidKey != kids.end(); ++kidKey) { ReaderFieldPtr kid(newLucene(*kidKey, rf->first->fieldName)); if (badChildren.contains(kid)) { // we've already process this kid as RF and found other problems track those problems as our own badKids.put(rf->first, kid); badKids.putAll(rf->first, badChildren.get(kid)); badChildren.remove(kid); } else if (rfToValIdSets.contains(kid)) { // we have cache entries for the kid badKids.put(rf->first, kid); } seen.add(kid); } seen.add(rf->first); } // every mapping in badKids represents an Insanity for (MapReaderFieldSetReaderField::iterator parent = badChildren.begin(); parent != badChildren.end(); ++parent) { SetReaderField kids = parent->second; Collection badEntries(Collection::newInstance()); // put parent entries in first MapSetReaderFieldInt::set_type values(rfToValIdSets.get(parent->first)); for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) { MapSetIntFieldCacheEntry::set_type cacheEntries(viToItemSets.get(*value)); badEntries.addAll(cacheEntries.begin(), cacheEntries.end()); } // now the entries for the descendants for (SetReaderField::iterator kid = kids.begin(); kid != kids.end(); ++kid) { MapSetReaderFieldInt::set_type values(rfToValIdSets.get(*kid)); for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) { MapSetIntFieldCacheEntry::set_type cacheEntries(viToItemSets.get(*value)); badEntries.addAll(cacheEntries.begin(), cacheEntries.end()); } } insanity.add(newLucene(SUBREADER, L"Found caches for descendants of " + parent->first->toString(), badEntries)); } return insanity; } Collection FieldCacheSanityChecker::getAllDecendentReaderKeys(LuceneObjectPtr seed) { Collection all(Collection::newInstance()); // will grow as we iter all.add(seed); for (int32_t i = 0; i < all.size(); ++i) { IndexReaderPtr indexReader(boost::dynamic_pointer_cast(all[i])); if (indexReader) { Collection subs(indexReader->getSequentialSubReaders()); for (int32_t j = 0; subs && j < subs.size(); ++j) all.add(subs[j]->getFieldCacheKey()); } } // need to remove the first, because it was the seed all.remove(all.begin()); return all; } ReaderField::ReaderField(LuceneObjectPtr readerKey, const String& fieldName) { this->readerKey = readerKey; this->fieldName = fieldName; } ReaderField::~ReaderField() { } int32_t ReaderField::hashCode() { return readerKey->hashCode() * StringUtils::hashCode(fieldName); } bool ReaderField::equals(LuceneObjectPtr other) { ReaderFieldPtr otherReaderField(boost::dynamic_pointer_cast(other)); if (!otherReaderField) return false; return (readerKey->equals(otherReaderField->readerKey) && fieldName == otherReaderField->fieldName); } String ReaderField::toString() { return readerKey->toString() + L"+" + fieldName; } Insanity::Insanity(FieldCacheSanityChecker::InsanityType type, const String& msg, Collection entries) { if (!entries || entries.empty()) boost::throw_exception(IllegalArgumentException(L"Insanity requires non-null/non-empty CacheEntry[]")); this->type = type; this->msg = msg; this->entries = entries; } Insanity::~Insanity() { } FieldCacheSanityChecker::InsanityType Insanity::getType() { return type; } String Insanity::getMsg() { return msg; } Collection Insanity::getCacheEntries() { return entries; } String Insanity::toString() { StringStream buffer; switch (type) { case FieldCacheSanityChecker::SUBREADER: buffer << L"SUBREADER: "; break; case FieldCacheSanityChecker::VALUEMISMATCH: buffer << L"VALUEMISMATCH: "; break; case FieldCacheSanityChecker::EXPECTED: buffer << L"EXPECTED: "; break; } buffer << msg << L"\n"; for (Collection::iterator ce = entries.begin(); ce != entries.end(); ++ce) buffer << L"\t" << (*ce)->toString() << L"\n"; return buffer.str(); } } LucenePlusPlus-rel_3.0.4/src/core/util/FileReader.cpp000066400000000000000000000041421217574114600225160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "FileReader.h" #include "MiscUtils.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t FileReader::FILE_EOF = Reader::READER_EOF; const int32_t FileReader::FILE_ERROR = -1; FileReader::FileReader(const String& fileName) { this->file = newInstance(StringUtils::toUTF8(fileName).c_str(), std::ios::binary | std::ios::in); if (!file->is_open()) boost::throw_exception(FileNotFoundException(fileName)); _length = FileUtils::fileLength(fileName); } FileReader::~FileReader() { } int32_t FileReader::read() { wchar_t buffer; return read(&buffer, 0, 1) == FILE_EOF ? FILE_EOF : buffer; } int32_t FileReader::read(wchar_t* buffer, int32_t offset, int32_t length) { try { if (file->eof()) return FILE_EOF; if (!fileBuffer) fileBuffer = ByteArray::newInstance(length); if (length > fileBuffer.size()) fileBuffer.resize(length); file->read((char*)fileBuffer.get(), length); int32_t readLength = file->gcount(); MiscUtils::arrayCopy(fileBuffer.get(), 0, buffer, offset, readLength); return readLength == 0 ? FILE_EOF : readLength; } catch (...) { return FILE_ERROR; } } void FileReader::close() { file->close(); } bool FileReader::markSupported() { return false; } void FileReader::reset() { file->clear(); file->seekg((std::streamoff)0); } int64_t FileReader::length() { return _length; } } LucenePlusPlus-rel_3.0.4/src/core/util/FileUtils.cpp000066400000000000000000000145631217574114600224240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include #include "LuceneThread.h" #include "StringUtils.h" #include "FileUtils.h" #if defined(_WIN32) || defined(_WIN64) #include #include #include #else #include #endif namespace Lucene { namespace FileUtils { bool fileExists(const String& path) { try { return boost::filesystem::exists(path.c_str()); } catch (...) { return false; } } uint64_t fileModified(const String& path) { try { return (uint64_t)boost::filesystem::last_write_time(path.c_str()); } catch (...) { return 0; } } bool touchFile(const String& path) { try { boost::filesystem::last_write_time(path.c_str(), time(NULL)); return true; } catch (...) { return false; } } int64_t fileLength(const String& path) { try { int64_t fileSize = (int64_t)boost::filesystem::file_size(path.c_str()); for (int32_t i = 0; fileSize == 0 && i < 100; ++i) { LuceneThread::threadYield(); fileSize = (int64_t)boost::filesystem::file_size(path.c_str()); } return fileSize; } catch (...) { return 0; } } bool setFileLength(const String& path, int64_t length) { try { if (!fileExists(path)) return false; #if defined(_WIN32) || defined(_WIN64) int32_t fd = _wopen(path.c_str(), _O_WRONLY | _O_CREAT | _O_BINARY, _S_IWRITE); return _chsize(fd, (long)length) == 0; #else return truncate(StringUtils::toUTF8(path).c_str(), (off_t)length) == 0; #endif } catch (...) { return false; } } bool removeFile(const String& path) { try { return boost::filesystem::remove(path.c_str()); } catch (...) { return false; } } bool copyFile(const String& source, const String& dest) { try { boost::filesystem::copy_file(source.c_str(), dest.c_str()); return true; } catch (...) { return false; } } bool createDirectory(const String& path) { try { return boost::filesystem::create_directory(path.c_str()); } catch (...) { return false; } } bool removeDirectory(const String& path) { try { boost::filesystem::remove_all(path.c_str()); return true; } catch (...) { return false; } } bool isDirectory(const String& path) { try { return boost::filesystem::is_directory(path.c_str()); } catch (...) { return false; } } bool listDirectory(const String& path, bool filesOnly, HashSet dirList) { try { for (boost::filesystem::directory_iterator dir(path.c_str()); dir != boost::filesystem::directory_iterator(); ++dir) { if (!filesOnly || !boost::filesystem::is_directory(dir->status())) dirList.add(dir->path().filename().wstring().c_str()); } return true; } catch (...) { return false; } } bool copyDirectory(const String& source, const String& dest) { try { HashSet dirList(HashSet::newInstance()); if (!listDirectory(source, true, dirList)) return false; createDirectory(dest); for (HashSet::iterator file = dirList.begin(); file != dirList.end(); ++file) copyFile(joinPath(source, *file), joinPath(dest, *file)); return true; } catch (...) { return false; } } String joinPath(const String& path, const String& file) { try { boost::filesystem::path join(path.c_str()); join /= file.c_str(); return join.wstring().c_str(); } catch (...) { return path; } } String extractPath(const String& path) { try { boost::filesystem::wpath parentPath(path.c_str()); return parentPath.parent_path().wstring().c_str(); } catch (...) { return path; } } String extractFile(const String& path) { try { boost::filesystem::wpath fileName(path.c_str()); return fileName.filename().wstring().c_str(); } catch (...) { return path; } } } } LucenePlusPlus-rel_3.0.4/src/core/util/InfoStream.cpp000066400000000000000000000022241217574114600225620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "InfoStream.h" #include "StringUtils.h" namespace Lucene { InfoStream::InfoStream() { } InfoStream::~InfoStream() { } InfoStreamFile::InfoStreamFile(const String& path) : file(StringUtils::toUTF8(path).c_str()) { } InfoStreamFile::~InfoStreamFile() { } InfoStreamFile& InfoStreamFile::operator<< (const String& t) { file << t; return *this; } InfoStreamOut::~InfoStreamOut() { } InfoStreamOut& InfoStreamOut::operator<< (const String& t) { std::wcout << t; return *this; } InfoStreamNull::~InfoStreamNull() { } InfoStreamNull& InfoStreamNull::operator<< (const String& t) { return *this; } } LucenePlusPlus-rel_3.0.4/src/core/util/InputStreamReader.cpp000066400000000000000000000023721217574114600241150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InputStreamReader.h" #include "BufferedReader.h" #include "UTF8Stream.h" namespace Lucene { InputStreamReader::InputStreamReader(ReaderPtr reader) { this->reader = reader; this->decoder = newLucene(newLucene(reader, 1024)); } InputStreamReader::~InputStreamReader() { } int32_t InputStreamReader::read() { int32_t buffer; return read((wchar_t*)&buffer, 0, 1) == READER_EOF ? READER_EOF : buffer; } int32_t InputStreamReader::read(wchar_t* b, int32_t offset, int32_t length) { return decoder->decode(b + offset, length); } void InputStreamReader::close() { reader->close(); } bool InputStreamReader::markSupported() { return false; } void InputStreamReader::reset() { reader->reset(); } } LucenePlusPlus-rel_3.0.4/src/core/util/LuceneAllocator.cpp000066400000000000000000000033741217574114600235760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneAllocator.h" #ifdef LPP_USE_NEDMALLOC extern "C" { #include "nedmalloc/nedmalloc.h" } #endif namespace Lucene { void* AllocMemory(size_t size) { #if defined(LPP_USE_NEDMALLOC) return nedalloc::nedmalloc(size); #elif (defined(_WIN32) || defined(_WIN64)) && !defined(NDEBUG) return _malloc_dbg(size, _NORMAL_BLOCK, __FILE__, __LINE__); #else return malloc(size); #endif } void* ReallocMemory(void* memory, size_t size) { if (memory == NULL) return AllocMemory(size); if (size == 0) { FreeMemory(memory); return NULL; } #if defined(LPP_USE_NEDMALLOC) return nedalloc::nedrealloc(memory, size); #elif defined(_WIN32) && !defined(NDEBUG) return _realloc_dbg(memory, size, _NORMAL_BLOCK, __FILE__, __LINE__); #else return realloc(memory, size); #endif } void FreeMemory(void* memory) { if (memory == NULL) return; #if defined(LPP_USE_NEDMALLOC) nedalloc::nedfree(memory); #elif defined(_WIN32) && !defined(NDEBUG) _free_dbg(memory, _NORMAL_BLOCK); #else free(memory); #endif } void ReleaseThreadCache() { #if defined(LPP_USE_NEDMALLOC) nedalloc::neddisablethreadcache(0); #endif } } LucenePlusPlus-rel_3.0.4/src/core/util/LuceneException.cpp000066400000000000000000000105251217574114600236100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneException.h" namespace Lucene { LuceneException::LuceneException(const String& error, ExceptionType type) throw() { this->error = error; this->type = type; } LuceneException::~LuceneException() throw() { } LuceneException::ExceptionType LuceneException::getType() const { return type; } String LuceneException::getError() const { return error; } bool LuceneException::isNull() const { return (type == Null); } void LuceneException::throwException() { switch (type) { case LuceneException::AlreadyClosed: boost::throw_exception(AlreadyClosedException(error, type)); case LuceneException::Compression: boost::throw_exception(CompressionException(error, type)); case LuceneException::CorruptIndex: boost::throw_exception(CorruptIndexException(error, type)); case LuceneException::FieldReader: boost::throw_exception(FieldReaderException(error, type)); case LuceneException::FileNotFound: boost::throw_exception(FileNotFoundException(error, type)); case LuceneException::IllegalArgument: boost::throw_exception(IllegalArgumentException(error, type)); case LuceneException::IllegalState: boost::throw_exception(IllegalStateException(error, type)); case LuceneException::IndexOutOfBounds: boost::throw_exception(IndexOutOfBoundsException(error, type)); case LuceneException::IO: boost::throw_exception(IOException(error, type)); case LuceneException::LockObtainFailed: boost::throw_exception(LockObtainFailedException(error, type)); case LuceneException::LockReleaseFailed: boost::throw_exception(LockReleaseFailedException(error, type)); case LuceneException::Lookahead: boost::throw_exception(LookaheadSuccess(error, type)); case LuceneException::MergeAborted: boost::throw_exception(MergeAbortedException(error, type)); case LuceneException::Merge: boost::throw_exception(MergeException(error, type)); case LuceneException::NoSuchDirectory: boost::throw_exception(NoSuchDirectoryException(error, type)); case LuceneException::NullPointer: boost::throw_exception(NullPointerException(error, type)); case LuceneException::NumberFormat: boost::throw_exception(NumberFormatException(error, type)); case LuceneException::OutOfMemory: boost::throw_exception(OutOfMemoryError(error, type)); case LuceneException::Parse: boost::throw_exception(ParseException(error, type)); case LuceneException::QueryParser: boost::throw_exception(QueryParserError(error, type)); case LuceneException::Runtime: boost::throw_exception(RuntimeException(error, type)); case LuceneException::StaleReader: boost::throw_exception(StaleReaderException(error, type)); case LuceneException::StopFillCache: boost::throw_exception(StopFillCacheException(error, type)); case LuceneException::Temporary: boost::throw_exception(TemporaryException(error, type)); case LuceneException::TimeExceeded: boost::throw_exception(TimeExceededException(error, type)); case LuceneException::TooManyClauses: boost::throw_exception(TooManyClausesException(error, type)); case LuceneException::UnsupportedOperation: boost::throw_exception(UnsupportedOperationException(error, type)); case LuceneException::Null: // silence static analyzer break; } } } LucenePlusPlus-rel_3.0.4/src/core/util/LuceneObject.cpp000066400000000000000000000023431217574114600230570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneObject.h" #include "StringUtils.h" namespace Lucene { LuceneObject::LuceneObject() { } LuceneObject::~LuceneObject() { } void LuceneObject::initialize() { // override } LuceneObjectPtr LuceneObject::clone(LuceneObjectPtr other) { if (!other) boost::throw_exception(UnsupportedOperationException(L"clone must not be null")); return other; } int32_t LuceneObject::hashCode() { return (int32_t)(int64_t)this; } bool LuceneObject::equals(LuceneObjectPtr other) { return (other && this == other.get()); } int32_t LuceneObject::compareTo(LuceneObjectPtr other) { return (int32_t)(this - other.get()); } String LuceneObject::toString() { return StringUtils::toString(hashCode()); } } LucenePlusPlus-rel_3.0.4/src/core/util/LuceneSignal.cpp000066400000000000000000000027331217574114600230710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneSignal.h" #include "Synchronize.h" namespace Lucene { LuceneSignal::LuceneSignal(SynchronizePtr objectLock) { this->objectLock = objectLock; } LuceneSignal::~LuceneSignal() { } void LuceneSignal::createSignal(LuceneSignalPtr& signal, SynchronizePtr objectLock) { static boost::mutex lockMutex; boost::mutex::scoped_lock syncLock(lockMutex); if (!signal) signal = newInstance(objectLock); } void LuceneSignal::wait(int32_t timeout) { int32_t relockCount = objectLock ? objectLock->unlockAll() : 0; boost::mutex::scoped_lock waitLock(waitMutex); while (!signalCondition.timed_wait(waitMutex, boost::posix_time::milliseconds(timeout))) { if (timeout != 0 || signalCondition.timed_wait(waitMutex, boost::posix_time::milliseconds(10))) break; } for (int32_t relock = 0; relock < relockCount; ++relock) objectLock->lock(); } void LuceneSignal::notifyAll() { signalCondition.notify_all(); } } LucenePlusPlus-rel_3.0.4/src/core/util/LuceneSync.cpp000066400000000000000000000022721217574114600225660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneSync.h" #include "Synchronize.h" #include "LuceneSignal.h" namespace Lucene { LuceneSync::~LuceneSync() { } SynchronizePtr LuceneSync::getSync() { Synchronize::createSync(objectLock); return objectLock; } LuceneSignalPtr LuceneSync::getSignal() { LuceneSignal::createSignal(objectSignal, getSync()); return objectSignal; } void LuceneSync::lock(int32_t timeout) { getSync()->lock(); } void LuceneSync::unlock() { getSync()->unlock(); } bool LuceneSync::holdsLock() { return getSync()->holdsLock(); } void LuceneSync::wait(int32_t timeout) { getSignal()->wait(timeout); } void LuceneSync::notifyAll() { getSignal()->notifyAll(); } } LucenePlusPlus-rel_3.0.4/src/core/util/LuceneThread.cpp000066400000000000000000000061331217574114600230610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "LuceneThread.h" namespace Lucene { #if defined(_WIN32) || defined(_WIN64) const int32_t LuceneThread::MAX_PRIORITY = THREAD_PRIORITY_HIGHEST; const int32_t LuceneThread::NORM_PRIORITY = THREAD_PRIORITY_NORMAL; const int32_t LuceneThread::MIN_PRIORITY = THREAD_PRIORITY_LOWEST; #else const int32_t LuceneThread::MAX_PRIORITY = 2; const int32_t LuceneThread::NORM_PRIORITY = 0; const int32_t LuceneThread::MIN_PRIORITY = -2; #endif LuceneThread::LuceneThread() { running = false; } LuceneThread::~LuceneThread() { } void LuceneThread::start() { setRunning(false); thread = newInstance(LuceneThread::runThread, this); setRunning(true); } void LuceneThread::runThread(LuceneThread* thread) { LuceneThreadPtr threadObject(thread->shared_from_this()); try { threadObject->run(); } catch (...) { } threadObject->setRunning(false); threadObject.reset(); ReleaseThreadCache(); } void LuceneThread::setRunning(bool running) { SyncLock syncLock(this); this->running = running; } bool LuceneThread::isRunning() { SyncLock syncLock(this); return running; } bool LuceneThread::isAlive() { return (thread && isRunning()); } void LuceneThread::setPriority(int32_t priority) { #if defined(_WIN32) || defined(_WIN64) if (thread) SetThreadPriority(thread->native_handle(), priority); #endif } int32_t LuceneThread::getPriority() { #if defined(_WIN32) || defined(_WIN64) return thread ? GetThreadPriority(thread->native_handle()) : NORM_PRIORITY; #else return NORM_PRIORITY; #endif } void LuceneThread::yield() { if (thread) thread->yield(); } bool LuceneThread::join(int32_t timeout) { while (isAlive() && !thread->timed_join(boost::posix_time::milliseconds(timeout))) { if (timeout != 0) return false; if (thread->timed_join(boost::posix_time::milliseconds(10))) return true; } return true; } int64_t LuceneThread::currentId() { #if defined(_WIN32) || defined(_WIN64) return (int64_t)GetCurrentThreadId(); #else return (int64_t)pthread_self(); #endif } void LuceneThread::threadSleep(int32_t time) { boost::this_thread::sleep(boost::posix_time::milliseconds(time)); } void LuceneThread::threadYield() { boost::this_thread::yield(); } } LucenePlusPlus-rel_3.0.4/src/core/util/MiscUtils.cpp000066400000000000000000000113721217574114600224330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MiscUtils.h" #include "LuceneObject.h" namespace Lucene { const uint32_t MiscUtils::SINGLE_EXPONENT_MASK = 0x7f800000; const uint32_t MiscUtils::SINGLE_MANTISSA_MASK = 0x007fffff; const uint32_t MiscUtils::SINGLE_NAN_BITS = (MiscUtils::SINGLE_EXPONENT_MASK | 0x00400000); const uint64_t MiscUtils::DOUBLE_SIGN_MASK = 0x8000000000000000LL; const uint64_t MiscUtils::DOUBLE_EXPONENT_MASK = 0x7ff0000000000000LL; const uint64_t MiscUtils::DOUBLE_MANTISSA_MASK = 0x000fffffffffffffLL; const uint64_t MiscUtils::DOUBLE_NAN_BITS = DOUBLE_EXPONENT_MASK | 0x0008000000000000LL; uint64_t MiscUtils::getTimeMillis(boost::posix_time::ptime time) { return boost::posix_time::time_duration(time - boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1))).total_milliseconds(); } uint64_t MiscUtils::currentTimeMillis() { return getTimeMillis(boost::posix_time::microsec_clock::universal_time()); } int32_t MiscUtils::getNextSize(int32_t targetSize) { return (targetSize >> 3) + (targetSize < 9 ? 3 : 6) + targetSize; } int32_t MiscUtils::getShrinkSize(int32_t currentSize, int32_t targetSize) { int32_t newSize = getNextSize(targetSize); return (newSize < currentSize / 2) ? newSize : currentSize; } int32_t MiscUtils::bytesDifference(uint8_t* bytes1, int32_t len1, uint8_t* bytes2, int32_t len2) { int32_t len = std::min(len1, len2); for (int32_t i = 0; i < len; ++i) { if (bytes1[i] != bytes2[i]) return i; } return len; } int32_t MiscUtils::hashCode(const wchar_t* array, int32_t start, int32_t end) { return hashCode(array + start, array + end, hashNumeric); } int32_t MiscUtils::hashCode(const uint8_t* array, int32_t start, int32_t end) { return hashCode(array + start, array + end, hashNumeric); } int32_t MiscUtils::hashCode(bool value) { return value ? 1231 : 1237; } int32_t MiscUtils::doubleToIntBits(double value) { int32_t intValue = 0; float floatValue = (float)value; std::memcpy(&intValue, &floatValue, sizeof(float)); if ((intValue & SINGLE_EXPONENT_MASK) == SINGLE_EXPONENT_MASK) { if (intValue & SINGLE_MANTISSA_MASK) return SINGLE_NAN_BITS; } return intValue; } int32_t MiscUtils::doubleToRawIntBits(double value) { int32_t intValue = 0; float floatValue = (float)value; std::memcpy(&intValue, &floatValue, sizeof(float)); return intValue; } double MiscUtils::intBitsToDouble(int32_t bits) { float floatValue = 0; std::memcpy(&floatValue, &bits, sizeof(int32_t)); return (double)floatValue; } int64_t MiscUtils::doubleToLongBits(double value) { int64_t longValue = 0; std::memcpy(&longValue, &value, sizeof(double)); if ((longValue & DOUBLE_EXPONENT_MASK) == DOUBLE_EXPONENT_MASK) { if (longValue & DOUBLE_MANTISSA_MASK) return DOUBLE_NAN_BITS; } return longValue; } int64_t MiscUtils::doubleToRawLongBits(double value) { int64_t longValue = 0; std::memcpy(&longValue, &value, sizeof(double)); return longValue; } double MiscUtils::longBitsToDouble(int64_t bits) { double doubleValue = 0; std::memcpy(&doubleValue, &bits, sizeof(int64_t)); return doubleValue; } bool MiscUtils::isInfinite(double value) { return (value == std::numeric_limits::infinity() || value == -std::numeric_limits::infinity()); } bool MiscUtils::isNaN(double value) { return (value != value); } bool MiscUtils::equalTypes(LuceneObjectPtr first, LuceneObjectPtr second) { return (typeid(*first) == typeid(*second)); } int64_t MiscUtils::unsignedShift(int64_t num, int64_t shift) { return (shift & 0x3f) == 0 ? num : (((uint64_t)num >> 1) & 0x7fffffffffffffffLL) >> ((shift & 0x3f) - 1); } int32_t MiscUtils::unsignedShift(int32_t num, int32_t shift) { return (shift & 0x1f) == 0 ? num : (((uint32_t)num >> 1) & 0x7fffffff) >> ((shift & 0x1f) - 1); } } LucenePlusPlus-rel_3.0.4/src/core/util/NumericUtils.cpp000066400000000000000000000250221217574114600231370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericUtils.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// The default precision step used by {@link NumericField}, {@link NumericTokenStream}, {@link NumericRangeQuery}, /// and {@link NumericRangeFilter} as default. const int32_t NumericUtils::PRECISION_STEP_DEFAULT = 4; /// Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + /// shift in the first character. const wchar_t NumericUtils::SHIFT_START_LONG = (wchar_t)0x20; /// The maximum term length (used for char[] buffer size) for encoding long values. /// @see #longToPrefixCoded(long,int,char[]) const int32_t NumericUtils::BUF_SIZE_LONG = 63 / 7 + 2; /// Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + /// shift in the first character. const wchar_t NumericUtils::SHIFT_START_INT = (wchar_t)0x60; /// The maximum term length (used for char[] buffer size) for encoding int values. /// @see #intToPrefixCoded(int,int,char[]) const int32_t NumericUtils::BUF_SIZE_INT = 31 / 7 + 2; NumericUtils::~NumericUtils() { } int32_t NumericUtils::longToPrefixCoded(int64_t val, int32_t shift, CharArray buffer) { if (shift > 63 || shift < 0) boost::throw_exception(IllegalArgumentException(L"Illegal shift value, must be 0..63")); int32_t nChars = (63 - shift) / 7 + 1; int32_t len = nChars + 1; buffer[0] = (wchar_t)(SHIFT_START_LONG + shift); int64_t sortableBits = val ^ 0x8000000000000000LL; sortableBits = MiscUtils::unsignedShift(sortableBits, (int64_t)shift); while (nChars >= 1) { // Store 7 bits per character for good efficiency when UTF-8 encoding. The whole number is // right-justified so that lucene can prefix-encode the terms more efficiently. buffer[nChars--] = (wchar_t)(sortableBits & 0x7f); sortableBits = MiscUtils::unsignedShift(sortableBits, (int64_t)7); } return len; } String NumericUtils::longToPrefixCoded(int64_t val, int32_t shift) { CharArray buffer(CharArray::newInstance(BUF_SIZE_LONG)); int32_t len = longToPrefixCoded(val, shift, buffer); return String(buffer.get(), len); } String NumericUtils::longToPrefixCoded(int64_t val) { return longToPrefixCoded(val, 0); } int32_t NumericUtils::intToPrefixCoded(int32_t val, int32_t shift, CharArray buffer) { if (shift > 31 || shift < 0) boost::throw_exception(IllegalArgumentException(L"Illegal shift value, must be 0..31")); int32_t nChars = (31 - shift) / 7 + 1; int32_t len = nChars + 1; buffer[0] = (wchar_t)(SHIFT_START_INT + shift); int32_t sortableBits = val ^ 0x80000000; sortableBits = MiscUtils::unsignedShift(sortableBits, shift); while (nChars >= 1) { // Store 7 bits per character for good efficiency when UTF-8 encoding. The whole number is // right-justified so that lucene can prefix-encode the terms more efficiently. buffer[nChars--] = (wchar_t)(sortableBits & 0x7f); sortableBits = MiscUtils::unsignedShift(sortableBits, 7); } return len; } String NumericUtils::intToPrefixCoded(int32_t val, int32_t shift) { CharArray buffer(CharArray::newInstance(BUF_SIZE_INT)); int32_t len = intToPrefixCoded(val, shift, buffer); return String(buffer.get(), len); } String NumericUtils::intToPrefixCoded(int32_t val) { return intToPrefixCoded(val, 0); } int64_t NumericUtils::prefixCodedToLong(const String& prefixCoded) { int32_t shift = prefixCoded[0] - SHIFT_START_LONG; if (shift > 63 || shift < 0) boost::throw_exception(NumberFormatException(L"Invalid shift value in prefixCoded string (is encoded value really a LONG?)")); int64_t sortableBits = 0; for (int32_t i = 1, len = prefixCoded.length(); i < len; ++i) { sortableBits <<= 7; wchar_t ch = prefixCoded[i]; if (ch > 0x7f) { boost::throw_exception(NumberFormatException(L"Invalid prefixCoded numerical value representation (char " + StringUtils::toString(ch, 16) + L" at position " + StringUtils::toString(i) + L" is invalid)")); } sortableBits |= (int64_t)ch; } return (sortableBits << shift) ^ 0x8000000000000000LL; } int32_t NumericUtils::prefixCodedToInt(const String& prefixCoded) { int32_t shift = prefixCoded[0] - SHIFT_START_INT; if (shift > 31 || shift < 0) boost::throw_exception(NumberFormatException(L"Invalid shift value in prefixCoded string (is encoded value really a INT?)")); int32_t sortableBits = 0; for (int32_t i = 1, len = prefixCoded.length(); i < len; ++i) { sortableBits <<= 7; wchar_t ch = prefixCoded[i]; if (ch > 0x7f) { boost::throw_exception(NumberFormatException(L"Invalid prefixCoded numerical value representation (char " + StringUtils::toString(ch, 16) + L" at position " + StringUtils::toString(i) + L" is invalid)")); } sortableBits |= (int32_t)ch; } return (sortableBits << shift) ^ 0x80000000; } int64_t NumericUtils::doubleToSortableLong(double val) { int64_t f = MiscUtils::doubleToRawLongBits(val); if (f < 0) f ^= 0x7fffffffffffffffLL; return f; } String NumericUtils::doubleToPrefixCoded(double val) { return longToPrefixCoded(doubleToSortableLong(val)); } double NumericUtils::sortableLongToDouble(int64_t val) { if (val < 0) val ^= 0x7fffffffffffffffLL; return MiscUtils::longBitsToDouble(val); } double NumericUtils::prefixCodedToDouble(const String& val) { return sortableLongToDouble(prefixCodedToLong(val)); } void NumericUtils::splitLongRange(LongRangeBuilderPtr builder, int32_t precisionStep, int64_t minBound, int64_t maxBound) { splitRange(builder, 64, precisionStep, minBound, maxBound); } void NumericUtils::splitIntRange(IntRangeBuilderPtr builder, int32_t precisionStep, int32_t minBound, int32_t maxBound) { splitRange(builder, 32, precisionStep, (int64_t)minBound, (int64_t)maxBound); } void NumericUtils::splitRange(LuceneObjectPtr builder, int32_t valSize, int32_t precisionStep, int64_t minBound, int64_t maxBound) { if (precisionStep < 1) boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); if (minBound > maxBound) return; for (int32_t shift = 0; ; shift += precisionStep) { // calculate new bounds for inner precision int64_t diff = (int64_t)1 << (shift + precisionStep); int64_t mask = (((int64_t)1 << precisionStep) - (int64_t)1) << shift; bool hasLower = ((minBound & mask) != 0); bool hasUpper = ((maxBound & mask) != mask); int64_t nextMinBound = ((hasLower ? (minBound + diff) : minBound) & ~mask); int64_t nextMaxBound = ((hasUpper ? (maxBound - diff) : maxBound) & ~mask); bool lowerWrapped = nextMinBound < minBound; bool upperWrapped = nextMaxBound > maxBound; if (shift + precisionStep >= valSize || nextMinBound>nextMaxBound || lowerWrapped || upperWrapped) { // We are in the lowest precision or the next precision is not available. addRange(builder, valSize, minBound, maxBound, shift); break; // exit the split recursion loop } if (hasLower) addRange(builder, valSize, minBound, minBound | mask, shift); if (hasUpper) addRange(builder, valSize, maxBound & ~mask, maxBound, shift); // recurse to next precision minBound = nextMinBound; maxBound = nextMaxBound; } } void NumericUtils::addRange(LuceneObjectPtr builder, int32_t valSize, int64_t minBound, int64_t maxBound, int32_t shift) { // for the max bound set all lower bits (that were shifted away): this is important for testing or other // usages of the splitted range (eg. to reconstruct the full range). The prefixEncoding will remove the // bits anyway, so they do not hurt! maxBound |= ((int64_t)1 << shift) - (int64_t)1; // delegate to correct range builder switch (valSize) { case 64: boost::dynamic_pointer_cast(builder)->addRange(minBound, maxBound, shift); break; case 32: boost::dynamic_pointer_cast(builder)->addRange((int32_t)minBound, (int32_t)maxBound, shift); break; default: boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64.")); } } LongRangeBuilder::~LongRangeBuilder() { } void LongRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { boost::throw_exception(UnsupportedOperationException()); } void LongRangeBuilder::addRange(int64_t min, int64_t max, int32_t shift) { addRange(NumericUtils::longToPrefixCoded(min, shift), NumericUtils::longToPrefixCoded(max, shift)); } IntRangeBuilder::~IntRangeBuilder() { } void IntRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { boost::throw_exception(UnsupportedOperationException()); } void IntRangeBuilder::addRange(int32_t min, int32_t max, int32_t shift) { addRange(NumericUtils::intToPrefixCoded(min, shift), NumericUtils::intToPrefixCoded(max, shift)); } } LucenePlusPlus-rel_3.0.4/src/core/util/OpenBitSet.cpp000066400000000000000000000436151217574114600225400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OpenBitSet.h" #include "OpenBitSetIterator.h" #include "BitUtil.h" #include "MiscUtils.h" namespace Lucene { OpenBitSet::OpenBitSet(int64_t numBits) { bits = LongArray::newInstance(bits2words(numBits)); MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0LL); wlen = bits.size(); } OpenBitSet::OpenBitSet(LongArray bits, int32_t numWords) { this->bits = bits; this->wlen = numWords; } OpenBitSet::~OpenBitSet() { } DocIdSetIteratorPtr OpenBitSet::iterator() { return newLucene(bits, wlen); } bool OpenBitSet::isCacheable() { return true; } int64_t OpenBitSet::capacity() { return bits.size() << 6; } int64_t OpenBitSet::size() { return capacity(); } bool OpenBitSet::isEmpty() { return (cardinality() == 0); } LongArray OpenBitSet::getBits() { return bits; } void OpenBitSet::setBits(LongArray bits) { this->bits = bits; } int32_t OpenBitSet::getNumWords() { return wlen; } void OpenBitSet::setNumWords(int32_t numWords) { this->wlen = numWords; } bool OpenBitSet::get(int32_t index) { int32_t i = index >> 6; // div 64 // signed shift will keep a negative index and force an array-index-out-of-bounds-exception, // removing the need for an explicit check. if (i >= bits.size()) return false; int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; return ((bits[i] & bitmask) != 0); } bool OpenBitSet::fastGet(int32_t index) { int32_t i = index >> 6; // div 64 // signed shift will keep a negative index and force an array-index-out-of-bounds-exception, // removing the need for an explicit check. int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; return ((bits[i] & bitmask) != 0); } bool OpenBitSet::get(int64_t index) { int32_t i = (int32_t)(index >> 6); // div 64 if (i >= bits.size()) return false; int32_t bit = ((int32_t)index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; return ((bits[i] & bitmask) != 0); } bool OpenBitSet::fastGet(int64_t index) { int32_t i = (int32_t)(index >> 6); // div 64 int32_t bit = ((int32_t)index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; return ((bits[i] & bitmask) != 0); } int32_t OpenBitSet::getBit(int32_t index) { int32_t i = index >> 6; // div 64 int32_t bit = (index & 0x3f); // mod 64 return (int32_t)MiscUtils::unsignedShift(bits[i], (int64_t)bit) & 0x01; } void OpenBitSet::set(int64_t index) { int32_t wordNum = expandingWordNum(index); int32_t bit = (int32_t)index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] |= bitmask; } void OpenBitSet::fastSet(int32_t index) { int32_t wordNum = index >> 6; // div 64 int32_t bit = index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] |= bitmask; } void OpenBitSet::fastSet(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); int32_t bit = (int32_t)index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] |= bitmask; } void OpenBitSet::set(int64_t startIndex, int64_t endIndex) { if (endIndex <= startIndex) return; int32_t startWord = (int32_t)(startIndex >> 6); // since endIndex is one past the end, this is index of the last word to be changed int32_t endWord = expandingWordNum(endIndex - 1); int64_t startmask = -1LL << (startIndex & 0x3f); int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); if (startWord == endWord) { bits[startWord] |= (startmask & endmask); return; } bits[startWord] |= startmask; MiscUtils::arrayFill(bits.get(), startWord + 1, endWord, -1LL); bits[endWord] |= endmask; } int32_t OpenBitSet::expandingWordNum(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); if (wordNum >= wlen) { ensureCapacity(index + 1); wlen = wordNum + 1; } return wordNum; } void OpenBitSet::fastClear(int32_t index) { int32_t wordNum = index >> 6; int32_t bit = (index & 0x03f); int64_t bitmask = 1LL << bit; bits[wordNum] &= ~bitmask; } void OpenBitSet::fastClear(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); int32_t bit = (int32_t)index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] &= ~bitmask; } void OpenBitSet::clear(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); if (wordNum >= wlen) return; int32_t bit = (int32_t)index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] &= ~bitmask; } void OpenBitSet::clear(int32_t startIndex, int32_t endIndex) { if (endIndex <= startIndex) return; int32_t startWord = (startIndex >> 6); if (startWord >= wlen) return; // since endIndex is one past the end, this is index of the last word to be changed. int32_t endWord = ((endIndex - 1) >> 6); int64_t startmask = -1LL << (startIndex & 0x3f); int64_t endmask = MiscUtils::unsignedShift(-1LL, (int64_t)-endIndex); // invert masks since we are clearing startmask = ~startmask; endmask = ~endmask; if (startWord == endWord) { bits[startWord] &= (startmask | endmask); return; } bits[startWord] &= startmask; int32_t middle = std::min(wlen, endWord); MiscUtils::arrayFill(bits.get(), startWord + 1, middle, 0LL); if (endWord < wlen) bits[endWord] &= endmask; } void OpenBitSet::clear(int64_t startIndex, int64_t endIndex) { if (endIndex <= startIndex) return; int32_t startWord = (int32_t)(startIndex>>6); if (startWord >= wlen) return; // since endIndex is one past the end, this is index of the last word to be changed. int32_t endWord = (int32_t)((endIndex - 1) >> 6); int64_t startmask = -1LL << (startIndex & 0x3f); int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); // invert masks since we are clearing startmask = ~startmask; endmask = ~endmask; if (startWord == endWord) { bits[startWord] &= (startmask | endmask); return; } bits[startWord] &= startmask; int32_t middle = std::min(wlen, endWord); MiscUtils::arrayFill(bits.get(), startWord + 1, middle, 0LL); if (endWord < wlen) bits[endWord] &= endmask; } bool OpenBitSet::getAndSet(int32_t index) { int32_t wordNum = index >> 6; // div 64 int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; bool val = ((bits[wordNum] & bitmask) != 0); bits[wordNum] |= bitmask; return val; } bool OpenBitSet::getAndSet(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); // div 64 int32_t bit = (int32_t)index & 0x3f; // mod 64 int64_t bitmask = 1LL << bit; bool val = ((bits[wordNum] & bitmask) != 0); bits[wordNum] |= bitmask; return val; } void OpenBitSet::fastFlip(int32_t index) { int32_t wordNum = index >> 6; // div 64 int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; } void OpenBitSet::fastFlip(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); // div 64 int32_t bit = (int32_t)index & 0x3f; // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; } void OpenBitSet::flip(int64_t index) { int32_t wordNum = expandingWordNum(index); int32_t bit = (int32_t)index & 0x3f; // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; } bool OpenBitSet::flipAndGet(int32_t index) { int32_t wordNum = index >> 6; // div 64 int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; return ((bits[wordNum] & bitmask) != 0); } bool OpenBitSet::flipAndGet(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); // div 64 int32_t bit = (int32_t)index & 0x3f; // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; return ((bits[wordNum] & bitmask) != 0); } void OpenBitSet::flip(int64_t startIndex, int64_t endIndex) { if (endIndex <= startIndex) return; int32_t startWord = (int32_t)(startIndex >> 6); // since endIndex is one past the end, this is index of the last word to be changed. int32_t endWord = expandingWordNum(endIndex - 1); int64_t startmask = -1LL << (startIndex & 0x3f); int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); if (startWord == endWord) { bits[startWord] ^= (startmask & endmask); return; } bits[startWord] ^= startmask; for (int32_t i = startWord + 1; i < endWord; ++i) bits[i] = ~bits[i]; bits[endWord] ^= endmask; } int64_t OpenBitSet::cardinality() { return BitUtil::pop_array(bits.get(), 0, wlen); } int64_t OpenBitSet::intersectionCount(OpenBitSetPtr a, OpenBitSetPtr b) { return BitUtil::pop_intersect(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); } int64_t OpenBitSet::unionCount(OpenBitSetPtr a, OpenBitSetPtr b) { int64_t tot = BitUtil::pop_union(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); if (a->wlen < b->wlen) tot += BitUtil::pop_array(b->bits.get(), a->wlen, b->wlen - a->wlen); else if (a->wlen > b->wlen) tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); return tot; } int64_t OpenBitSet::andNotCount(OpenBitSetPtr a, OpenBitSetPtr b) { int64_t tot = BitUtil::pop_andnot(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); if (a->wlen > b->wlen) tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); return tot; } int64_t OpenBitSet::xorCount(OpenBitSetPtr a, OpenBitSetPtr b) { int64_t tot = BitUtil::pop_xor(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); if (a->wlen < b->wlen) tot += BitUtil::pop_array(b->bits.get(), a->wlen, b->wlen - a->wlen); else if (a->wlen > b->wlen) tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); return tot; } int32_t OpenBitSet::nextSetBit(int32_t index) { int32_t i = MiscUtils::unsignedShift(index, 6); if (i >= wlen) return -1; int32_t subIndex = (index & 0x3f); // index within the word int64_t word = MiscUtils::unsignedShift(bits[i], (int64_t)subIndex); // skip all the bits to the right of index if (word != 0) return (i << 6) + subIndex + BitUtil::ntz(word); while (++i < wlen) { word = bits[i]; if (word != 0) return (i << 6) + BitUtil::ntz(word); } return -1; } int64_t OpenBitSet::nextSetBit(int64_t index) { int32_t i = (int32_t)(index >> 6); if (i >= wlen) return -1; int32_t subIndex = (int32_t)index & 0x3f; // index within the word int64_t word = bits[i] >> subIndex; // skip all the bits to the right of index if (word != 0) return ((int64_t)i << 6) + (subIndex + BitUtil::ntz(word)); while (++i < wlen) { word = bits[i]; if (word != 0) return ((int64_t)i << 6) + BitUtil::ntz(word); } return -1; } LuceneObjectPtr OpenBitSet::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); OpenBitSetPtr cloneSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneSet->wlen = wlen; cloneSet->bits = LongArray::newInstance(bits.size()); MiscUtils::arrayCopy(bits.get(), 0, cloneSet->bits.get(), 0, bits.size()); return cloneSet; } void OpenBitSet::intersect(OpenBitSetPtr other) { int32_t newLen= std::min(this->wlen, other->wlen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; // testing against zero can be more efficient int32_t pos = newLen; while (--pos >= 0) thisArr[pos] &= otherArr[pos]; if (this->wlen > newLen) { // fill zeros from the new shorter length to the old length MiscUtils::arrayFill(bits.get(), newLen, this->wlen, 0LL); } this->wlen = newLen; } void OpenBitSet::_union(OpenBitSetPtr other) { int32_t newLen = std::max(wlen, other->wlen); ensureCapacityWords(newLen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; int32_t pos = std::min(wlen, other->wlen); while (--pos >= 0) thisArr[pos] |= otherArr[pos]; if (this->wlen < newLen) MiscUtils::arrayCopy(otherArr.get(), this->wlen, thisArr.get(), this->wlen, newLen - this->wlen); this->wlen = newLen; } void OpenBitSet::remove(OpenBitSetPtr other) { int32_t idx = std::min(wlen, other->wlen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; while (--idx >= 0) thisArr[idx] &= ~otherArr[idx]; } void OpenBitSet::_xor(OpenBitSetPtr other) { int32_t newLen = std::max(wlen, other->wlen); ensureCapacityWords(newLen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; int32_t pos = std::min(wlen, other->wlen); while (--pos >= 0) thisArr[pos] ^= otherArr[pos]; if (this->wlen < newLen) MiscUtils::arrayCopy(otherArr.get(), this->wlen, thisArr.get(), this->wlen, newLen - this->wlen); this->wlen = newLen; } void OpenBitSet::_and(OpenBitSetPtr other) { intersect(other); } void OpenBitSet::_or(OpenBitSetPtr other) { _union(other); } void OpenBitSet::andNot(OpenBitSetPtr other) { remove(other); } bool OpenBitSet::intersects(OpenBitSetPtr other) { int32_t pos = std::min(this->wlen, other->wlen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; while (--pos >= 0) { if ((thisArr[pos] & otherArr[pos]) !=0 ) return true; } return false; } void OpenBitSet::ensureCapacityWords(int32_t numWords) { int32_t length = bits.size(); if (length < numWords) { bits.resize(MiscUtils::getNextSize(numWords)); MiscUtils::arrayFill(bits.get(), length, bits.size(), 0LL); } } void OpenBitSet::ensureCapacity(int64_t numBits) { ensureCapacityWords(bits2words(numBits)); } void OpenBitSet::trimTrailingZeros() { int32_t idx = wlen - 1; while (idx >= 0 && bits[idx] == 0) --idx; wlen = idx + 1; } int32_t OpenBitSet::bits2words(int64_t numBits) { return (int32_t)(MiscUtils::unsignedShift(numBits - 1, (int64_t)6) + 1); } bool OpenBitSet::equals(LuceneObjectPtr other) { if (LuceneObject::equals(other)) return true; OpenBitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); if (!otherBitSet) return false; OpenBitSetPtr a; OpenBitSetPtr b = otherBitSet; // make a the larger set if (b->wlen > this->wlen) { a = b; b = shared_from_this(); } else a = shared_from_this(); // check for any set bits out of the range of b for (int32_t i = a->wlen - 1; i >= b->wlen; --i) { if (a->bits[i] !=0 ) return false; } for (int32_t i = b->wlen - 1; i >= 0; --i) { if (a->bits[i] != b->bits[i]) return false; } return true; } int32_t OpenBitSet::hashCode() { // Start with a zero hash and use a mix that results in zero if the input is zero. // This effectively truncates trailing zeros without an explicit check. int64_t hash = 0; for (int32_t i = bits.size(); --i >= 0;) { hash ^= bits[i]; hash = (hash << 1) | MiscUtils::unsignedShift(hash, (int64_t)63); // rotate left } // Fold leftmost bits into right and add a constant to prevent empty sets from // returning 0, which is too common. return (int32_t)((hash >> 32) ^ hash) + 0x98761234; } } LucenePlusPlus-rel_3.0.4/src/core/util/OpenBitSetDISI.cpp000066400000000000000000000033251217574114600232030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OpenBitSetDISI.h" namespace Lucene { OpenBitSetDISI::OpenBitSetDISI(DocIdSetIteratorPtr disi, int32_t maxSize) : OpenBitSet(maxSize) { inPlaceOr(disi); } OpenBitSetDISI::OpenBitSetDISI(int32_t maxSize) : OpenBitSet(maxSize) { } OpenBitSetDISI::~OpenBitSetDISI() { } void OpenBitSetDISI::inPlaceOr(DocIdSetIteratorPtr disi) { int32_t doc; int32_t _size = size(); while ((doc = disi->nextDoc()) < _size) set(doc); } void OpenBitSetDISI::inPlaceAnd(DocIdSetIteratorPtr disi) { int32_t bitSetDoc = nextSetBit((int32_t)0); int32_t disiDoc; while (bitSetDoc != -1 && (disiDoc = disi->advance(bitSetDoc)) != DocIdSetIterator::NO_MORE_DOCS) { clear(bitSetDoc, disiDoc); bitSetDoc = nextSetBit(disiDoc + 1); } if (bitSetDoc != -1) clear((int64_t)bitSetDoc, size()); } void OpenBitSetDISI::inPlaceNot(DocIdSetIteratorPtr disi) { int32_t doc; int32_t _size = size(); while ((doc = disi->nextDoc()) < _size) clear(doc); } void OpenBitSetDISI::inPlaceXor(DocIdSetIteratorPtr disi) { int32_t doc; int32_t _size = size(); while ((doc = disi->nextDoc()) < _size) flip(doc); } } LucenePlusPlus-rel_3.0.4/src/core/util/OpenBitSetIterator.cpp000066400000000000000000000141311217574114600242410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OpenBitSetIterator.h" #include "OpenBitSet.h" #include "MiscUtils.h" namespace Lucene { /// The General Idea: instead of having an array per byte that has the offsets of the /// next set bit, that array could be packed inside a 32 bit integer (8 4 bit numbers). /// That should be faster than accessing an array for each index, and the total array /// size is kept smaller (256*sizeof(int32_t))=1K const int32_t OpenBitSetIterator::bitlist[] = { 0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41, 0x42, 0x421, 0x43, 0x431, 0x432, 0x4321, 0x5, 0x51, 0x52, 0x521, 0x53, 0x531, 0x532, 0x5321, 0x54, 0x541, 0x542, 0x5421, 0x543, 0x5431, 0x5432, 0x54321, 0x6, 0x61, 0x62, 0x621, 0x63, 0x631, 0x632, 0x6321, 0x64, 0x641, 0x642, 0x6421, 0x643, 0x6431, 0x6432, 0x64321, 0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532, 0x65321, 0x654, 0x6541, 0x6542, 0x65421, 0x6543, 0x65431, 0x65432, 0x654321, 0x7, 0x71, 0x72, 0x721, 0x73, 0x731, 0x732, 0x7321, 0x74, 0x741, 0x742, 0x7421, 0x743, 0x7431, 0x7432, 0x74321, 0x75, 0x751, 0x752, 0x7521, 0x753, 0x7531, 0x7532, 0x75321, 0x754, 0x7541, 0x7542, 0x75421, 0x7543, 0x75431, 0x75432, 0x754321, 0x76, 0x761, 0x762, 0x7621, 0x763, 0x7631, 0x7632, 0x76321, 0x764, 0x7641, 0x7642, 0x76421, 0x7643, 0x76431, 0x76432, 0x764321, 0x765, 0x7651, 0x7652, 0x76521, 0x7653, 0x76531, 0x76532, 0x765321, 0x7654, 0x76541, 0x76542, 0x765421, 0x76543, 0x765431, 0x765432, 0x7654321, 0x8, 0x81, 0x82, 0x821, 0x83, 0x831, 0x832, 0x8321, 0x84, 0x841, 0x842, 0x8421, 0x843, 0x8431, 0x8432, 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853, 0x8531, 0x8532, 0x85321, 0x854, 0x8541, 0x8542, 0x85421, 0x8543, 0x85431, 0x85432, 0x854321, 0x86, 0x861, 0x862, 0x8621, 0x863, 0x8631, 0x8632, 0x86321, 0x864, 0x8641, 0x8642, 0x86421, 0x8643, 0x86431, 0x86432, 0x864321, 0x865, 0x8651, 0x8652, 0x86521, 0x8653, 0x86531, 0x86532, 0x865321, 0x8654, 0x86541, 0x86542, 0x865421, 0x86543, 0x865431, 0x865432, 0x8654321, 0x87, 0x871, 0x872, 0x8721, 0x873, 0x8731, 0x8732, 0x87321, 0x874, 0x8741, 0x8742, 0x87421, 0x8743, 0x87431, 0x87432, 0x874321, 0x875, 0x8751, 0x8752, 0x87521, 0x8753, 0x87531, 0x87532, 0x875321, 0x8754, 0x87541, 0x87542, 0x875421, 0x87543, 0x875431, 0x875432, 0x8754321, 0x876, 0x8761, 0x8762, 0x87621, 0x8763, 0x87631, 0x87632, 0x876321, 0x8764, 0x87641, 0x87642, 0x876421, 0x87643, 0x876431, 0x876432, 0x8764321, 0x8765, 0x87651, 0x87652, 0x876521, 0x87653, 0x876531, 0x876532, 0x8765321, 0x87654, 0x876541, 0x876542, 0x8765421, 0x876543, 0x8765431, 0x8765432, 0x87654321 }; OpenBitSetIterator::OpenBitSetIterator(OpenBitSetPtr bitSet) { arr = bitSet->getBits(); words = bitSet->getNumWords(); i = -1; word = 0; wordShift = 0; indexArray = 0; curDocId = -1; } OpenBitSetIterator::OpenBitSetIterator(LongArray bits, int32_t numWords) { arr = bits; words = numWords; i = -1; word = 0; wordShift = 0; indexArray = 0; curDocId = -1; } OpenBitSetIterator::~OpenBitSetIterator() { } void OpenBitSetIterator::shift() { if ((int32_t)word == 0) { wordShift += 32; word = MiscUtils::unsignedShift(word, (int64_t)32); } if ((word & 0x0000ffff) == 0) { wordShift += 16; word = MiscUtils::unsignedShift(word, (int64_t)16); } if ((word & 0x000000ff) == 0) { wordShift += 8; word = MiscUtils::unsignedShift(word, (int64_t)8); } indexArray = bitlist[(int32_t)word & 0xff]; } int32_t OpenBitSetIterator::nextDoc() { if (indexArray == 0) { if (word != 0) { word = MiscUtils::unsignedShift(word, (int64_t)8); wordShift += 8; } while (word == 0) { if (++i >= words) { curDocId = NO_MORE_DOCS; return curDocId; } word = arr[i]; wordShift = -1; // loop invariant code motion should move this } // after the first time, should I go with a linear search, or stick with the binary search in shift? shift(); } int32_t bitIndex = (indexArray & 0x0f) + wordShift; indexArray = MiscUtils::unsignedShift(indexArray, 4); curDocId = (i << 6) + bitIndex; return curDocId; } int32_t OpenBitSetIterator::advance(int32_t target) { indexArray = 0; i = target >> 6; if (i >= words) { word = 0; // setup so next() will also return -1 curDocId = NO_MORE_DOCS; return curDocId; } wordShift = target & 0x3f; word = MiscUtils::unsignedShift(arr[i], (int64_t)wordShift); if (word != 0) --wordShift; // compensate for 1 based arrIndex else { while (word == 0) { if (++i >= words) { curDocId = NO_MORE_DOCS; return curDocId; } word = arr[i]; } wordShift = -1; } shift(); int32_t bitIndex = (indexArray & 0x0f) + wordShift; indexArray = MiscUtils::unsignedShift(indexArray, 4); curDocId = (i << 6) + bitIndex; return curDocId; } int32_t OpenBitSetIterator::docID() { return curDocId; } } LucenePlusPlus-rel_3.0.4/src/core/util/Random.cpp000066400000000000000000000027041217574114600217360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Random.h" #include "MiscUtils.h" namespace Lucene { Random::Random() { this->seed = (int64_t)MiscUtils::currentTimeMillis(); } Random::Random(int64_t seed) { this->seed = seed; } Random::~Random() { } void Random::setSeed(int64_t seed) { this->seed = (seed ^ 0x5deece66dLL) & (((int64_t)1 << 48) - 1); } int32_t Random::nextInt(int32_t limit) { if ((limit & -limit) == limit) return (int32_t)((limit * (int64_t)next(31)) >> 31); int32_t bits = 0; int32_t val = 0; do { bits = next(31); val = bits % limit; } while (bits - val + (limit - 1) < 0); return val; } double Random::nextDouble() { return ((double)(((int64_t)next(26) << 27) + next(27)) / (double)((int64_t)1 << 53)); } int32_t Random::next(int32_t bits) { seed = (seed * 0x5deece66dLL + 0xb) & (((int64_t)1 << 48) - 1); return (int32_t)(seed >> (48 - bits)); } } LucenePlusPlus-rel_3.0.4/src/core/util/Reader.cpp000066400000000000000000000020301217574114600217100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Reader.h" namespace Lucene { const int32_t Reader::READER_EOF = -1; Reader::Reader() { } Reader::~Reader() { } int32_t Reader::read() { wchar_t buffer; return read(&buffer, 0, 1) == READER_EOF ? READER_EOF : buffer; } int64_t Reader::skip(int64_t n) { return 0; // override } bool Reader::markSupported() { return false; // override } void Reader::mark(int32_t readAheadLimit) { // override } void Reader::reset() { // override } int64_t Reader::length() { return 0; // override } } LucenePlusPlus-rel_3.0.4/src/core/util/ReaderUtil.cpp000066400000000000000000000042421217574114600225550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReaderUtil.h" #include "IndexReader.h" namespace Lucene { ReaderUtil::~ReaderUtil() { } void ReaderUtil::gatherSubReaders(Collection allSubReaders, IndexReaderPtr reader) { Collection subReaders(reader->getSequentialSubReaders()); if (!subReaders) { // Add the reader itself, and do not recurse allSubReaders.add(reader); } else { for (Collection::iterator subReader = subReaders.begin(); subReader != subReaders.end(); ++subReader) gatherSubReaders(allSubReaders, *subReader); } } IndexReaderPtr ReaderUtil::subReader(int32_t doc, IndexReaderPtr reader) { Collection subReaders(Collection::newInstance()); ReaderUtil::gatherSubReaders(subReaders, reader); Collection docStarts(Collection::newInstance(subReaders.size())); int32_t maxDoc = 0; for (int32_t i = 0; i < subReaders.size(); ++i) { docStarts[i] = maxDoc; maxDoc += subReaders[i]->maxDoc(); } return subReaders[ReaderUtil::subIndex(doc, docStarts)]; } IndexReaderPtr ReaderUtil::subReader(IndexReaderPtr reader, int32_t subIndex) { Collection subReaders(Collection::newInstance()); ReaderUtil::gatherSubReaders(subReaders, reader); return subReaders[subIndex]; } int32_t ReaderUtil::subIndex(int32_t n, Collection docStarts) { // Binary search to locate reader Collection::iterator index = std::upper_bound(docStarts.begin(), docStarts.end(), n); return (std::distance(docStarts.begin(), index) - 1); } } LucenePlusPlus-rel_3.0.4/src/core/util/ScorerDocQueue.cpp000066400000000000000000000107631217574114600234120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ScorerDocQueue.h" #include "_ScorerDocQueue.h" #include "Scorer.h" #include "MiscUtils.h" namespace Lucene { ScorerDocQueue::ScorerDocQueue(int32_t maxSize) { this->_size = 0; int32_t heapSize = maxSize + 1; heap = Collection::newInstance(heapSize); this->maxSize = maxSize; topHSD = heap[1]; // initially null } ScorerDocQueue::~ScorerDocQueue() { } void ScorerDocQueue::put(ScorerPtr scorer) { heap[++_size] = newLucene(scorer); upHeap(); } bool ScorerDocQueue::insert(ScorerPtr scorer) { if (_size < maxSize) { put(scorer); return true; } else { int32_t docNr = scorer->docID(); if ((_size > 0) && (!(docNr < topHSD->doc))) // heap[1] is top() { heap[1] = newLucene(scorer, docNr); downHeap(); return true; } else return false; } } ScorerPtr ScorerDocQueue::top() { return topHSD->scorer; } int32_t ScorerDocQueue::topDoc() { return topHSD->doc; } double ScorerDocQueue::topScore() { return topHSD->scorer->score(); } bool ScorerDocQueue::topNextAndAdjustElsePop() { return checkAdjustElsePop(topHSD->scorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS); } bool ScorerDocQueue::topSkipToAndAdjustElsePop(int32_t target) { return checkAdjustElsePop(topHSD->scorer->advance(target) != DocIdSetIterator::NO_MORE_DOCS); } bool ScorerDocQueue::checkAdjustElsePop(bool cond) { if (cond) // see also adjustTop topHSD->doc = topHSD->scorer->docID(); else // see also popNoResult { heap[1] = heap[_size]; // move last to first heap[_size--].reset(); } downHeap(); return cond; } ScorerPtr ScorerDocQueue::pop() { ScorerPtr result(topHSD->scorer); popNoResult(); return result; } void ScorerDocQueue::popNoResult() { heap[1] = heap[_size]; // move last to first heap[_size--].reset(); downHeap(); // adjust heap } void ScorerDocQueue::adjustTop() { topHSD->adjust(); downHeap(); } int32_t ScorerDocQueue::size() { return _size; } void ScorerDocQueue::clear() { for (int32_t i = 0; i <= _size; ++i) heap[i].reset(); _size = 0; } void ScorerDocQueue::upHeap() { int32_t i = _size; HeapedScorerDocPtr node(heap[i]); // save bottom node int32_t j = MiscUtils::unsignedShift(i, 1); while ((j > 0) && (node->doc < heap[j]->doc)) { heap[i] = heap[j]; // shift parents down i = j; j = MiscUtils::unsignedShift(j, 1); } heap[i] = node; // install saved node topHSD = heap[1]; } void ScorerDocQueue::downHeap() { int32_t i = 1; HeapedScorerDocPtr node(heap[i]); // save top node int32_t j = i << 1; // find smaller child int32_t k = j + 1; if ((k <= _size) && (heap[k]->doc < heap[j]->doc)) j = k; while ((j <= _size) && (heap[j]->doc < node->doc)) { heap[i] = heap[j]; // shift up child i = j; j = i << 1; k = j + 1; if (k <= _size && (heap[k]->doc < heap[j]->doc)) j = k; } heap[i] = node; // install saved node topHSD = heap[1]; } HeapedScorerDoc::HeapedScorerDoc(ScorerPtr scorer) { this->scorer = scorer; this->doc = scorer->docID(); } HeapedScorerDoc::HeapedScorerDoc(ScorerPtr scorer, int32_t doc) { this->scorer = scorer; this->doc = doc; } HeapedScorerDoc::~HeapedScorerDoc() { } void HeapedScorerDoc::adjust() { doc = scorer->docID(); } } LucenePlusPlus-rel_3.0.4/src/core/util/SmallDouble.cpp000066400000000000000000000027561217574114600227300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SmallDouble.h" #include "MiscUtils.h" namespace Lucene { SmallDouble::~SmallDouble() { } uint8_t SmallDouble::doubleToByte(double f) { if (f < 0.0) // round negatives up to zero f = 0.0; if (f == 0.0) // zero is a special case return 0; int32_t bits = MiscUtils::doubleToIntBits(f); int32_t mantissa = (bits & 0xffffff) >> 21; int32_t exponent = (((bits >> 24) & 0x7f) - 63) + 15; if (exponent > 31) // overflow: use max value { exponent = 31; mantissa = 7; } if (exponent < 0) // underflow: use min value { exponent = 0; mantissa = 1; } return (uint8_t)((exponent << 3) | mantissa); // pack into a uint8_t } double SmallDouble::byteToDouble(uint8_t b) { if (b == 0) // zero is a special case return 0.0; int32_t mantissa = b & 7; int32_t exponent = (b >> 3) & 31; int32_t bits = ((exponent + (63 - 15)) << 24) | (mantissa << 21); return MiscUtils::intBitsToDouble(bits); } } LucenePlusPlus-rel_3.0.4/src/core/util/SortedVIntList.cpp000066400000000000000000000122661217574114600234170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SortedVIntList.h" #include "_SortedVIntList.h" #include "BitSet.h" #include "OpenBitSet.h" #include "DocIdSetIterator.h" #include "MiscUtils.h" namespace Lucene { /// When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, a SortedVIntList representing the /// index numbers of the set bits will be smaller than that BitSet. const int32_t SortedVIntList::BITS2VINTLIST_SIZE = 8; const int32_t SortedVIntList::VB1 = 0x7f; const int32_t SortedVIntList::BIT_SHIFT = 7; const int32_t SortedVIntList::MAX_BYTES_PER_INT = (31 / SortedVIntList::BIT_SHIFT) + 1; SortedVIntList::SortedVIntList(Collection sortedInts) { lastInt = 0; initBytes(); for (int32_t i = 0; i < sortedInts.size(); ++i) addInt(sortedInts[i]); bytes.resize(lastBytePos); } SortedVIntList::SortedVIntList(Collection sortedInts, int32_t inputSize) { lastInt = 0; initBytes(); for (int32_t i = 0; i < inputSize; ++i) addInt(sortedInts[i]); bytes.resize(lastBytePos); } SortedVIntList::SortedVIntList(BitSetPtr bits) { lastInt = 0; initBytes(); int32_t nextInt = bits->nextSetBit(0); while (nextInt != -1) { addInt(nextInt); nextInt = bits->nextSetBit(nextInt + 1); } bytes.resize(lastBytePos); } SortedVIntList::SortedVIntList(OpenBitSetPtr bits) { lastInt = 0; initBytes(); int32_t nextInt = bits->nextSetBit((int32_t)0); while (nextInt != -1) { addInt(nextInt); nextInt = bits->nextSetBit(nextInt + 1); } bytes.resize(lastBytePos); } SortedVIntList::SortedVIntList(DocIdSetIteratorPtr docIdSetIterator) { lastInt = 0; initBytes(); int32_t doc; while ((doc = docIdSetIterator->nextDoc()) != DocIdSetIterator::NO_MORE_DOCS) addInt(doc); bytes.resize(lastBytePos); } SortedVIntList::~SortedVIntList() { } void SortedVIntList::initBytes() { _size = 0; bytes = ByteArray::newInstance(128); // initial byte size lastBytePos = 0; } void SortedVIntList::addInt(int32_t nextInt) { int32_t diff = nextInt - lastInt; if (diff < 0) boost::throw_exception(IllegalArgumentException(L"Input not sorted or first element negative.")); if (!bytes || (lastBytePos + MAX_BYTES_PER_INT) > bytes.size()) { // biggest possible int does not fit bytes.resize((bytes.size() * 2) + MAX_BYTES_PER_INT); } // See IndexOutput.writeVInt() while ((diff & ~VB1) != 0) // The high bit of the next byte needs to be set. { bytes[lastBytePos++] = (uint8_t)((diff & VB1) | ~VB1); diff = MiscUtils::unsignedShift(diff, BIT_SHIFT); } bytes[lastBytePos++] = (uint8_t)diff; // Last byte, high bit not set. ++_size; lastInt = nextInt; } int32_t SortedVIntList::size() { return _size; } int32_t SortedVIntList::getByteSize() { return bytes ? bytes.size() : 0; } bool SortedVIntList::isCacheable() { return true; } DocIdSetIteratorPtr SortedVIntList::iterator() { return newLucene(shared_from_this()); } SortedDocIdSetIterator::SortedDocIdSetIterator(SortedVIntListPtr list) { _list = list; bytePos = 0; lastInt = 0; doc = -1; } SortedDocIdSetIterator::~SortedDocIdSetIterator() { } void SortedDocIdSetIterator::advance() { SortedVIntListPtr list(_list); // See IndexInput.readVInt() uint8_t b = list->bytes[bytePos++]; lastInt += b & list->VB1; for (int32_t s = list->BIT_SHIFT; (b & ~list->VB1) != 0; s += list->BIT_SHIFT) { b = list->bytes[bytePos++]; lastInt += (b & list->VB1) << s; } } int32_t SortedDocIdSetIterator::docID() { return doc; } int32_t SortedDocIdSetIterator::nextDoc() { SortedVIntListPtr list(_list); if (bytePos >= list->lastBytePos) doc = NO_MORE_DOCS; else { advance(); doc = lastInt; } return doc; } int32_t SortedDocIdSetIterator::advance(int32_t target) { SortedVIntListPtr list(_list); while (bytePos < list->lastBytePos) { advance(); if (lastInt >= target) { doc = lastInt; return doc; } } doc = NO_MORE_DOCS; return doc; } } LucenePlusPlus-rel_3.0.4/src/core/util/StringReader.cpp000066400000000000000000000025641217574114600231130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StringReader.h" namespace Lucene { StringReader::StringReader(const String& str) { this->str = str; this->position = 0; } StringReader::~StringReader() { } int32_t StringReader::read() { return position == (int32_t)str.length() ? READER_EOF : (int32_t)str[position++]; } int32_t StringReader::read(wchar_t* buffer, int32_t offset, int32_t length) { if (position >= (int32_t)str.length()) return READER_EOF; int32_t readChars = std::min(length, (int32_t)str.length() - position); std::wcsncpy(buffer + offset, str.c_str() + position, readChars); position += readChars; return readChars; } void StringReader::close() { str.clear(); } bool StringReader::markSupported() { return false; } void StringReader::reset() { position = 0; } int64_t StringReader::length() { return str.length(); } } LucenePlusPlus-rel_3.0.4/src/core/util/StringUtils.cpp000066400000000000000000000157271217574114600230160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "StringUtils.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "UTF8Stream.h" #include "Reader.h" #include "CharFolder.h" namespace Lucene { /// Maximum length of UTF encoding. const int32_t StringUtils::MAX_ENCODING_UTF8_SIZE = 4; /// Default character radix. const int32_t StringUtils::CHARACTER_MAX_RADIX = 36; int32_t StringUtils::toUnicode(const uint8_t* utf8, int32_t length, CharArray unicode) { if (length == 0) return 0; UTF8Decoder utf8Decoder(utf8, utf8 + length); int32_t decodeLength = utf8Decoder.decode(unicode.get(), unicode.size()); return decodeLength == Reader::READER_EOF ? 0 : decodeLength; } int32_t StringUtils::toUnicode(const uint8_t* utf8, int32_t length, UnicodeResultPtr unicodeResult) { if (length == 0) unicodeResult->length = 0; else { if (length > unicodeResult->result.size()) unicodeResult->result.resize(length); unicodeResult->length = toUnicode(utf8, length, unicodeResult->result); } return unicodeResult->length; } String StringUtils::toUnicode(const uint8_t* utf8, int32_t length) { if (length == 0) return L""; CharArray unicode(CharArray::newInstance(length)); int32_t result = toUnicode(utf8, length, unicode); return String(unicode.get(), result); } String StringUtils::toUnicode(const SingleString& s) { return s.empty() ? L"" : toUnicode((uint8_t*)s.c_str(), s.length()); } int32_t StringUtils::toUTF8(const wchar_t* unicode, int32_t length, ByteArray utf8) { if (length == 0) return 0; UTF8Encoder utf8Encoder(unicode, unicode + length); int32_t encodeLength = utf8Encoder.encode(utf8.get(), utf8.size()); return encodeLength == Reader::READER_EOF ? 0 : encodeLength; } int32_t StringUtils::toUTF8(const wchar_t* unicode, int32_t length, UTF8ResultPtr utf8Result) { if (length == 0) utf8Result->length = 0; else { if (length * MAX_ENCODING_UTF8_SIZE > utf8Result->result.size()) utf8Result->result.resize(length * MAX_ENCODING_UTF8_SIZE); utf8Result->length = toUTF8(unicode, length, utf8Result->result); } return utf8Result->length; } SingleString StringUtils::toUTF8(const wchar_t* unicode, int32_t length) { if (length == 0) return ""; ByteArray utf8(ByteArray::newInstance(length * MAX_ENCODING_UTF8_SIZE)); int32_t result = toUTF8(unicode, length, utf8); return SingleString((char*)utf8.get(), result); } SingleString StringUtils::toUTF8(const String& s) { return s.empty() ? "" : toUTF8(s.c_str(), s.size()); } void StringUtils::toLower(String& str) { CharFolder::toLower(str.begin(), str.end()); } String StringUtils::toLower(const String& str) { String lowerStr(str); toLower(lowerStr); return lowerStr; } void StringUtils::toUpper(String& str) { CharFolder::toUpper(str.begin(), str.end()); } String StringUtils::toUpper(const String& str) { String upperStr(str); toUpper(upperStr); return upperStr; } int32_t StringUtils::compareCase(const String& first, const String& second) { return (toLower(first) == toLower(second)); } Collection StringUtils::split(const String& str, const String& delim) { std::vector tokens; boost::split(tokens, str, boost::is_any_of(delim.c_str())); return Collection::newInstance(tokens.begin(), tokens.end()); } int32_t StringUtils::toInt(const String& value) { if (value.empty()) boost::throw_exception(NumberFormatException()); if (value.size() > 1 && value[0] == L'-' && !UnicodeUtil::isDigit(value[1])) boost::throw_exception(NumberFormatException()); if (value[0] != L'-' && !UnicodeUtil::isDigit(value[0])) boost::throw_exception(NumberFormatException()); return (int32_t)std::wcstol(value.c_str(), NULL, 10); } int64_t StringUtils::toLong(const String& value) { if (value.empty()) boost::throw_exception(NumberFormatException()); if (value.size() > 1 && value[0] == L'-' && !UnicodeUtil::isDigit(value[1])) boost::throw_exception(NumberFormatException()); if (value[0] != L'-' && !UnicodeUtil::isDigit(value[0])) boost::throw_exception(NumberFormatException()); #if defined(_WIN32) || defined(_WIN64) return _wcstoi64(value.c_str(), 0, 10); #else return wcstoll(value.c_str(), 0, 10); #endif } int64_t StringUtils::toLong(const String& value, int32_t base) { int64_t longValue = 0; for (String::const_iterator ptr = value.begin(); ptr != value.end(); ++ptr) longValue = UnicodeUtil::isDigit(*ptr) ? (base * longValue) + (*ptr - L'0') : (base * longValue) + (*ptr - L'a' + 10); return longValue; } double StringUtils::toDouble(const String& value) { if (value.empty()) boost::throw_exception(NumberFormatException()); if (value.length() > 1 && (value[0] == L'-' || value[0] == L'.') && !UnicodeUtil::isDigit(value[1])) boost::throw_exception(NumberFormatException()); if (value[0] != L'-' && value[0] != L'.' && !UnicodeUtil::isDigit(value[0])) boost::throw_exception(NumberFormatException()); return std::wcstod(value.c_str(), NULL); } int32_t StringUtils::hashCode(const String& value) { int32_t hashCode = 0; for (String::const_iterator ptr = value.begin(); ptr != value.end(); ++ptr) hashCode = hashCode * 31 + *ptr; return hashCode; } String StringUtils::toString(int64_t value, int32_t base) { static const wchar_t* digits = L"0123456789abcdefghijklmnopqrstuvwxyz"; int32_t bufferSize = (sizeof(int32_t) << 3) + 1; CharArray baseOutput(CharArray::newInstance(bufferSize)); wchar_t* ptr = baseOutput.get() + bufferSize - 1; *ptr = L'\0'; do { *--ptr = digits[value % base]; value /= base; } while (ptr > baseOutput.get() && value > 0); return String(ptr, (baseOutput.get() + bufferSize - 1) - ptr); } } LucenePlusPlus-rel_3.0.4/src/core/util/Synchronize.cpp000066400000000000000000000036341217574114600230340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "Synchronize.h" #include "LuceneThread.h" namespace Lucene { Synchronize::Synchronize() { lockThread = 0; recursionCount = 0; } Synchronize::~Synchronize() { } void Synchronize::createSync(SynchronizePtr& sync) { static boost::mutex lockMutex; boost::mutex::scoped_lock syncLock(lockMutex); if (!sync) sync = newInstance(); } void Synchronize::lock(int32_t timeout) { if (timeout > 0) mutexSynchronize.timed_lock(boost::posix_time::milliseconds(timeout)); else mutexSynchronize.lock(); lockThread = LuceneThread::currentId(); ++recursionCount; } void Synchronize::unlock() { if (--recursionCount == 0) lockThread = 0; mutexSynchronize.unlock(); } int32_t Synchronize::unlockAll() { int32_t count = recursionCount; for (int32_t unlock = 0; unlock < count; ++unlock) this->unlock(); return count; } bool Synchronize::holdsLock() { return (lockThread == LuceneThread::currentId() && recursionCount > 0); } SyncLock::SyncLock(SynchronizePtr sync, int32_t timeout) { this->sync = sync; lock(timeout); } SyncLock::~SyncLock() { if (sync) sync->unlock(); } void SyncLock::lock(int32_t timeout) { if (sync) sync->lock(timeout); } } LucenePlusPlus-rel_3.0.4/src/core/util/TestPoint.cpp000066400000000000000000000036041217574114600224470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TestPoint.h" namespace Lucene { MapStringInt TestPoint::testMethods = MapStringInt::newInstance(); bool TestPoint::enable = false; TestPoint::~TestPoint() { } void TestPoint::enableTestPoints() { enable = true; } void TestPoint::clear() { SyncLock syncLock(&testMethods); testMethods.clear(); } void TestPoint::setTestPoint(const String& object, const String& method, bool point) { if (enable) { SyncLock syncLock(&testMethods); testMethods.put(object + L":" + method, point); testMethods.put(method, point); } } bool TestPoint::getTestPoint(const String& object, const String& method) { SyncLock syncLock(&testMethods); MapStringInt::iterator testMethod = testMethods.find(object + L":" + method); return testMethod == testMethods.end() ? false : (testMethod->second != 0); } bool TestPoint::getTestPoint(const String& method) { SyncLock syncLock(&testMethods); MapStringInt::iterator testMethod = testMethods.find(method); return testMethod == testMethods.end() ? false : (testMethod->second != 0); } TestScope::TestScope(const String& object, const String& method) { this->object = object; this->method = method; TestPoint::setTestPoint(object, method, true); } TestScope::~TestScope() { TestPoint::setTestPoint(object, method, false); } } LucenePlusPlus-rel_3.0.4/src/core/util/ThreadPool.cpp000066400000000000000000000022351217574114600225560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ThreadPool.h" namespace Lucene { Future::~Future() { } const int32_t ThreadPool::THREADPOOL_SIZE = 5; ThreadPool::ThreadPool() { work.reset(new boost::asio::io_service::work(io_service)); for (int32_t i = 0; i < THREADPOOL_SIZE; ++i) threadGroup.create_thread(boost::bind(&boost::asio::io_service::run, &io_service)); } ThreadPool::~ThreadPool() { work.reset(); // stop all threads threadGroup.join_all(); // wait for all competition } ThreadPoolPtr ThreadPool::getInstance() { static ThreadPoolPtr threadPool; if (!threadPool) { threadPool = newLucene(); CycleCheck::addStatic(threadPool); } return threadPool; } } LucenePlusPlus-rel_3.0.4/src/core/util/UTF8Stream.cpp000066400000000000000000000303741217574114600224240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "UTF8Stream.h" #include "Reader.h" namespace Lucene { const uint16_t UTF8Base::LEAD_SURROGATE_MIN = 0xd800u; const uint16_t UTF8Base::LEAD_SURROGATE_MAX = 0xdbffu; const uint16_t UTF8Base::TRAIL_SURROGATE_MIN = 0xdc00u; const uint16_t UTF8Base::TRAIL_SURROGATE_MAX = 0xdfffu; const uint16_t UTF8Base::LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); const uint32_t UTF8Base::SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; // Maximum valid value for a Unicode code point const uint32_t UTF8Base::CODE_POINT_MAX = 0x0010ffffu; #ifdef LPP_UNICODE_CHAR_SIZE_2 const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0xfffd; const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0xffff; #else const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0x0001fffd; const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0x0001ffff; #endif UTF8Base::~UTF8Base() { } inline uint8_t UTF8Base::mask8(uint32_t b) { return static_cast(0xff & b); } inline uint16_t UTF8Base::mask16(uint32_t c) { return static_cast(0xffff & c); } inline bool UTF8Base::isTrail(uint32_t b) { return ((mask8(b) >> 6) == 0x2); } inline bool UTF8Base::isSurrogate(uint32_t cp) { return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); } inline bool UTF8Base::isLeadSurrogate(uint32_t cp) { return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); } inline bool UTF8Base::isTrailSurrogate(uint32_t cp) { return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); } inline bool UTF8Base::isValidCodePoint(uint32_t cp) { return (cp <= CODE_POINT_MAX && !isSurrogate(cp) && cp != 0xfffe && cp != 0xffff); } inline bool UTF8Base::isOverlongSequence(uint32_t cp, int32_t length) { if (cp < 0x80) { if (length != 1) return true; } else if (cp < 0x800) { if (length != 2) return true; } else if (cp < 0x10000) { if (length != 3) return true; } return false; } UTF8Encoder::UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd) { this->unicodeBegin = unicodeBegin; this->unicodeEnd = unicodeEnd; } UTF8Encoder::~UTF8Encoder() { } uint32_t UTF8Encoder::readNext() { return unicodeBegin == unicodeEnd ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*unicodeBegin++; } inline uint8_t* UTF8Encoder::appendChar(uint8_t* utf8, uint32_t cp) { if (cp < 0x80) // one octet *(utf8++) = static_cast(cp); else if (cp < 0x800) // two octets { *(utf8++) = static_cast((cp >> 6) | 0xc0); *(utf8++) = static_cast((cp & 0x3f) | 0x80); } else if (cp < 0x10000) // three octets { *(utf8++) = static_cast((cp >> 12) | 0xe0); *(utf8++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(utf8++) = static_cast((cp & 0x3f) | 0x80); } else // four octets { *(utf8++) = static_cast((cp >> 18) | 0xf0); *(utf8++) = static_cast(((cp >> 12) & 0x3f) | 0x80); *(utf8++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(utf8++) = static_cast((cp & 0x3f) | 0x80); } return utf8; } int32_t UTF8Encoder::utf16to8(uint8_t* utf8, int32_t length) { uint8_t* start = utf8; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { uint32_t cp = mask16(next); if (isLeadSurrogate(cp)) { next = readNext(); if (next == UNICODE_TERMINATOR) return 0; uint32_t trail_surrogate = mask16(next); if (!isTrailSurrogate(trail_surrogate)) return 0; cp = (cp << 10) + trail_surrogate + SURROGATE_OFFSET; } else if (isTrailSurrogate(cp)) return 0; if (!isValidCodePoint(cp)) return 0; utf8 = appendChar(utf8, cp); if ((utf8 - start) >= length) break; next = readNext(); } return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start); } int32_t UTF8Encoder::utf32to8(uint8_t* utf8, int32_t length) { uint8_t* start = utf8; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { if (!isValidCodePoint(next)) return 0; utf8 = appendChar(utf8, next); if ((utf8 - start) >= length) break; next = readNext(); } return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start); } int32_t UTF8Encoder::encode(uint8_t* utf8, int32_t length) { #ifdef LPP_UNICODE_CHAR_SIZE_2 return utf16to8(utf8, length); #else return utf32to8(utf8, length); #endif } UTF8EncoderStream::UTF8EncoderStream(ReaderPtr reader) : UTF8Encoder(NULL, NULL) { this->reader = reader; } UTF8EncoderStream::~UTF8EncoderStream() { } uint32_t UTF8EncoderStream::readNext() { int32_t next = reader->read(); return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next; } UTF8Decoder::UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End) { this->utf8Begin = utf8Begin; this->utf8End = utf8End; } UTF8Decoder::~UTF8Decoder() { } uint32_t UTF8Decoder::readNext() { return utf8Begin == utf8End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf8Begin++; } inline int32_t UTF8Decoder::sequenceLength(uint32_t cp) { uint8_t lead = mask8(cp); if (lead < 0x80) return 1; else if ((lead >> 5) == 0x6) return 2; else if ((lead >> 4) == 0xe) return 3; else if ((lead >> 3) == 0x1e) return 4; return 0; } inline bool UTF8Decoder::getSequence(uint32_t& cp, int32_t length) { cp = mask8(cp); if (length == 1) return true; uint32_t next = readNext(); if (next == UNICODE_TERMINATOR) return false; if (!isTrail(next)) return false; if (length == 2) { cp = ((cp << 6) & 0x7ff) + (next & 0x3f); return true; } if (length == 3) cp = ((cp << 12) & 0xffff) + ((mask8(next) << 6) & 0xfff); else cp = ((cp << 18) & 0x1fffff) + ((mask8(next) << 12) & 0x3ffff); next = readNext(); if (next == UNICODE_TERMINATOR) return false; if (!isTrail(next)) return false; if (length == 3) { cp += next & 0x3f; return true; } cp += (mask8(next) << 6) & 0xfff; next = readNext(); if (next == UNICODE_TERMINATOR) return false; if (!isTrail(next)) return false; cp += next & 0x3f; return true; } inline bool UTF8Decoder::isValidNext(uint32_t& cp) { // Determine the sequence length based on the lead octet int32_t length = sequenceLength(cp); if (length < 1 || length > 4) return false; // Now that we have a valid sequence length, get trail octets and calculate the code point if (!getSequence(cp, length)) return false; // Decoding succeeded, now security checks return (isValidCodePoint(cp) && !isOverlongSequence(cp, length)); } int32_t UTF8Decoder::utf8to16(wchar_t* unicode, int32_t length) { int32_t position = 0; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { if (!isValidNext(next)) return 0; if (next > 0xffff) // make a surrogate pair { unicode[position++] = static_cast((next >> 10) + LEAD_OFFSET); unicode[position++] = static_cast((next & 0x3ff) + TRAIL_SURROGATE_MIN); } else unicode[position++] = static_cast(next); if (position >= length) break; next = readNext(); } return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; } int32_t UTF8Decoder::utf8to32(wchar_t* unicode, int32_t length) { int32_t position = 0; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { if (!isValidNext(next)) return 0; unicode[position++] = static_cast(next); if (position >= length) break; next = readNext(); } return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; } int32_t UTF8Decoder::decode(wchar_t* unicode, int32_t length) { #ifdef LPP_UNICODE_CHAR_SIZE_2 return utf8to16(unicode, length); #else return utf8to32(unicode, length); #endif } UTF8DecoderStream::UTF8DecoderStream(ReaderPtr reader) : UTF8Decoder(NULL, NULL) { this->reader = reader; } UTF8DecoderStream::~UTF8DecoderStream() { } uint32_t UTF8DecoderStream::readNext() { int32_t next = reader->read(); return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next; } UTF16Decoder::UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End) { this->utf16Begin = utf16Begin; this->utf16End = utf16End; } UTF16Decoder::~UTF16Decoder() { } uint32_t UTF16Decoder::readNext() { return utf16Begin == utf16End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf16Begin++; } int32_t UTF16Decoder::utf16to32(wchar_t* unicode, int32_t length) { int32_t position = 0; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { uint32_t cp = mask16(next); if (isLeadSurrogate(cp)) { next = readNext(); if (next == UNICODE_TERMINATOR) return 0; uint32_t trail_surrogate = mask16(next); if (!isTrailSurrogate(trail_surrogate)) return 0; unicode[position++] = static_cast(((cp - LEAD_SURROGATE_MIN) << 10) + (trail_surrogate - TRAIL_SURROGATE_MIN) + 0x0010000); } else if (isTrailSurrogate(cp)) return 0; else unicode[position++] = static_cast(cp); if (position >= length) break; next = readNext(); } return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; } int32_t UTF16Decoder::utf16to16(wchar_t* unicode, int32_t length) { int32_t position = 0; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { unicode[position++] = static_cast(next); if (position >= length) break; next = readNext(); } return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; } int32_t UTF16Decoder::decode(wchar_t* unicode, int32_t length) { #ifdef LPP_UNICODE_CHAR_SIZE_2 return utf16to16(unicode, length); #else return utf16to32(unicode, length); #endif } } LucenePlusPlus-rel_3.0.4/src/core/util/UnicodeUtils.cpp000066400000000000000000000031541217574114600231250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "unicode/guniprop.h" namespace Lucene { UnicodeUtil::~UnicodeUtil() { } bool UnicodeUtil::isAlnum(wchar_t c) { return g_unichar_isalnum(c); } bool UnicodeUtil::isAlpha(wchar_t c) { return g_unichar_isalpha(c); } bool UnicodeUtil::isDigit(wchar_t c) { return g_unichar_isdigit(c); } bool UnicodeUtil::isSpace(wchar_t c) { return g_unichar_isspace(c); } bool UnicodeUtil::isUpper(wchar_t c) { return g_unichar_isupper(c); } bool UnicodeUtil::isLower(wchar_t c) { return g_unichar_islower(c); } bool UnicodeUtil::isOther(wchar_t c) { return (g_unichar_type(c) == G_UNICODE_OTHER_LETTER); } bool UnicodeUtil::isNonSpacing(wchar_t c) { return (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK); } wchar_t UnicodeUtil::toUpper(wchar_t c) { return (wchar_t)g_unichar_toupper(c); } wchar_t UnicodeUtil::toLower(wchar_t c) { return (wchar_t)g_unichar_tolower(c); } UTF8Result::~UTF8Result() { } UnicodeResult::~UnicodeResult() { } } LucenePlusPlus-rel_3.0.4/src/core/util/md5/000077500000000000000000000000001217574114600204745ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/util/md5/md5.c000066400000000000000000000302221217574114600213240ustar00rootroot00000000000000/* Copyright (C) 1999, 2000, 2002 Aladdin Enterprises. All rights reserved. This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. L. Peter Deutsch ghost@aladdin.com */ /* $Id: md5.c,v 1.6 2002/04/13 19:20:28 lpd Exp $ */ /* Independent implementation of MD5 (RFC 1321). This code implements the MD5 Algorithm defined in RFC 1321, whose text is available at http://www.ietf.org/rfc/rfc1321.txt The code is derived from the text of the RFC, including the test suite (section A.5) but excluding the rest of Appendix A. It does not include any code or documentation that is identified in the RFC as being copyrighted. The original and principal author of md5.c is L. Peter Deutsch . Other authors are noted in the change history that follows (in reverse chronological order): 2002-04-13 lpd Clarified derivation from RFC 1321; now handles byte order either statically or dynamically; added missing #include in library. 2002-03-11 lpd Corrected argument list for main(), and added int return type, in test program and T value program. 2002-02-21 lpd Added missing #include in test program. 2000-07-03 lpd Patched to eliminate warnings about "constant is unsigned in ANSI C, signed in traditional"; made test program self-checking. 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5). 1999-05-03 lpd Original version. */ #include "md5.h" #include #undef BYTE_ORDER /* 1 = big-endian, -1 = little-endian, 0 = unknown */ #ifdef ARCH_IS_BIG_ENDIAN # define BYTE_ORDER (ARCH_IS_BIG_ENDIAN ? 1 : -1) #else # define BYTE_ORDER 0 #endif #define T_MASK ((md5_word_t)~0) #define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87) #define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9) #define T3 0x242070db #define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111) #define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050) #define T6 0x4787c62a #define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec) #define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe) #define T9 0x698098d8 #define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850) #define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e) #define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841) #define T13 0x6b901122 #define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c) #define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71) #define T16 0x49b40821 #define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d) #define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf) #define T19 0x265e5a51 #define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855) #define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2) #define T22 0x02441453 #define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e) #define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437) #define T25 0x21e1cde6 #define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829) #define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278) #define T28 0x455a14ed #define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa) #define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07) #define T31 0x676f02d9 #define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375) #define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd) #define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e) #define T35 0x6d9d6122 #define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3) #define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb) #define T38 0x4bdecfa9 #define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f) #define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f) #define T41 0x289b7ec6 #define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805) #define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a) #define T44 0x04881d05 #define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6) #define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a) #define T47 0x1fa27cf8 #define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a) #define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb) #define T50 0x432aff97 #define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58) #define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6) #define T53 0x655b59c3 #define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d) #define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82) #define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e) #define T57 0x6fa87e4f #define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f) #define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb) #define T60 0x4e0811a1 #define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d) #define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca) #define T63 0x2ad7d2bb #define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e) static void md5_process(md5_state_t *pms, const md5_byte_t *data /*[64]*/) { md5_word_t a = pms->abcd[0], b = pms->abcd[1], c = pms->abcd[2], d = pms->abcd[3]; md5_word_t t; #if BYTE_ORDER > 0 /* Define storage only for big-endian CPUs. */ md5_word_t X[16]; #else /* Define storage for little-endian or both types of CPUs. */ md5_word_t xbuf[16]; const md5_word_t *X; #endif { #if BYTE_ORDER == 0 /* * Determine dynamically whether this is a big-endian or * little-endian machine, since we can use a more efficient * algorithm on the latter. */ static const int w = 1; if (*((const md5_byte_t *)&w)) /* dynamic little-endian */ #endif #if BYTE_ORDER <= 0 /* little-endian */ { /* * On little-endian machines, we can process properly aligned * data without copying it. */ if (!((data - (const md5_byte_t *)0) & 3)) { /* data are properly aligned */ X = (const md5_word_t *)data; } else { /* not aligned */ memcpy(xbuf, data, 64); X = xbuf; } } #endif #if BYTE_ORDER == 0 else /* dynamic big-endian */ #endif #if BYTE_ORDER >= 0 /* big-endian */ { /* * On big-endian machines, we must arrange the bytes in the * right order. */ const md5_byte_t *xp = data; int i; # if BYTE_ORDER == 0 X = xbuf; /* (dynamic only) */ # else # define xbuf X /* (static only) */ # endif for (i = 0; i < 16; ++i, xp += 4) xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24); } #endif } #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) /* Round 1. */ /* Let [abcd k s i] denote the operation a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ #define F(x, y, z) (((x) & (y)) | (~(x) & (z))) #define SET(a, b, c, d, k, s, Ti)\ t = a + F(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 7, T1); SET(d, a, b, c, 1, 12, T2); SET(c, d, a, b, 2, 17, T3); SET(b, c, d, a, 3, 22, T4); SET(a, b, c, d, 4, 7, T5); SET(d, a, b, c, 5, 12, T6); SET(c, d, a, b, 6, 17, T7); SET(b, c, d, a, 7, 22, T8); SET(a, b, c, d, 8, 7, T9); SET(d, a, b, c, 9, 12, T10); SET(c, d, a, b, 10, 17, T11); SET(b, c, d, a, 11, 22, T12); SET(a, b, c, d, 12, 7, T13); SET(d, a, b, c, 13, 12, T14); SET(c, d, a, b, 14, 17, T15); SET(b, c, d, a, 15, 22, T16); #undef SET /* Round 2. */ /* Let [abcd k s i] denote the operation a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ #define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) #define SET(a, b, c, d, k, s, Ti)\ t = a + G(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 1, 5, T17); SET(d, a, b, c, 6, 9, T18); SET(c, d, a, b, 11, 14, T19); SET(b, c, d, a, 0, 20, T20); SET(a, b, c, d, 5, 5, T21); SET(d, a, b, c, 10, 9, T22); SET(c, d, a, b, 15, 14, T23); SET(b, c, d, a, 4, 20, T24); SET(a, b, c, d, 9, 5, T25); SET(d, a, b, c, 14, 9, T26); SET(c, d, a, b, 3, 14, T27); SET(b, c, d, a, 8, 20, T28); SET(a, b, c, d, 13, 5, T29); SET(d, a, b, c, 2, 9, T30); SET(c, d, a, b, 7, 14, T31); SET(b, c, d, a, 12, 20, T32); #undef SET /* Round 3. */ /* Let [abcd k s t] denote the operation a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ #define H(x, y, z) ((x) ^ (y) ^ (z)) #define SET(a, b, c, d, k, s, Ti)\ t = a + H(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 5, 4, T33); SET(d, a, b, c, 8, 11, T34); SET(c, d, a, b, 11, 16, T35); SET(b, c, d, a, 14, 23, T36); SET(a, b, c, d, 1, 4, T37); SET(d, a, b, c, 4, 11, T38); SET(c, d, a, b, 7, 16, T39); SET(b, c, d, a, 10, 23, T40); SET(a, b, c, d, 13, 4, T41); SET(d, a, b, c, 0, 11, T42); SET(c, d, a, b, 3, 16, T43); SET(b, c, d, a, 6, 23, T44); SET(a, b, c, d, 9, 4, T45); SET(d, a, b, c, 12, 11, T46); SET(c, d, a, b, 15, 16, T47); SET(b, c, d, a, 2, 23, T48); #undef SET /* Round 4. */ /* Let [abcd k s t] denote the operation a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ #define I(x, y, z) ((y) ^ ((x) | ~(z))) #define SET(a, b, c, d, k, s, Ti)\ t = a + I(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 6, T49); SET(d, a, b, c, 7, 10, T50); SET(c, d, a, b, 14, 15, T51); SET(b, c, d, a, 5, 21, T52); SET(a, b, c, d, 12, 6, T53); SET(d, a, b, c, 3, 10, T54); SET(c, d, a, b, 10, 15, T55); SET(b, c, d, a, 1, 21, T56); SET(a, b, c, d, 8, 6, T57); SET(d, a, b, c, 15, 10, T58); SET(c, d, a, b, 6, 15, T59); SET(b, c, d, a, 13, 21, T60); SET(a, b, c, d, 4, 6, T61); SET(d, a, b, c, 11, 10, T62); SET(c, d, a, b, 2, 15, T63); SET(b, c, d, a, 9, 21, T64); #undef SET /* Then perform the following additions. (That is increment each of the four registers by the value it had before this block was started.) */ pms->abcd[0] += a; pms->abcd[1] += b; pms->abcd[2] += c; pms->abcd[3] += d; } void md5_init(md5_state_t *pms) { pms->count[0] = pms->count[1] = 0; pms->abcd[0] = 0x67452301; pms->abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476; pms->abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301; pms->abcd[3] = 0x10325476; } void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes) { const md5_byte_t *p = data; int left = nbytes; int offset = (pms->count[0] >> 3) & 63; md5_word_t nbits = (md5_word_t)(nbytes << 3); if (nbytes <= 0) return; /* Update the message length. */ pms->count[1] += nbytes >> 29; pms->count[0] += nbits; if (pms->count[0] < nbits) pms->count[1]++; /* Process an initial partial block. */ if (offset) { int copy = (offset + nbytes > 64 ? 64 - offset : nbytes); memcpy(pms->buf + offset, p, copy); if (offset + copy < 64) return; p += copy; left -= copy; md5_process(pms, pms->buf); } /* Process full blocks. */ for (; left >= 64; p += 64, left -= 64) md5_process(pms, p); /* Process a final partial block. */ if (left) memcpy(pms->buf, p, left); } void md5_finish(md5_state_t *pms, md5_byte_t digest[16]) { static const md5_byte_t pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; md5_byte_t data[8]; int i; /* Save the length before padding. */ for (i = 0; i < 8; ++i) data[i] = (md5_byte_t)(pms->count[i >> 2] >> ((i & 3) << 3)); /* Pad to 56 bytes mod 64. */ md5_append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1); /* Append the length. */ md5_append(pms, data, 8); for (i = 0; i < 16; ++i) digest[i] = (md5_byte_t)(pms->abcd[i >> 2] >> ((i & 3) << 3)); } LucenePlusPlus-rel_3.0.4/src/core/util/md5/md5.h000066400000000000000000000065001217574114600213330ustar00rootroot00000000000000/* Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. L. Peter Deutsch ghost@aladdin.com */ /* $Id: md5.h,v 1.4 2002/04/13 19:20:28 lpd Exp $ */ /* Independent implementation of MD5 (RFC 1321). This code implements the MD5 Algorithm defined in RFC 1321, whose text is available at http://www.ietf.org/rfc/rfc1321.txt The code is derived from the text of the RFC, including the test suite (section A.5) but excluding the rest of Appendix A. It does not include any code or documentation that is identified in the RFC as being copyrighted. The original and principal author of md5.h is L. Peter Deutsch . Other authors are noted in the change history that follows (in reverse chronological order): 2002-04-13 lpd Removed support for non-ANSI compilers; removed references to Ghostscript; clarified derivation from RFC 1321; now handles byte order either statically or dynamically. 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5); added conditionalization for C++ compilation from Martin Purschke . 1999-05-03 lpd Original version. */ #ifndef md5_INCLUDED # define md5_INCLUDED /* * This package supports both compile-time and run-time determination of CPU * byte order. If ARCH_IS_BIG_ENDIAN is defined as 0, the code will be * compiled to run only on little-endian CPUs; if ARCH_IS_BIG_ENDIAN is * defined as non-zero, the code will be compiled to run only on big-endian * CPUs; if ARCH_IS_BIG_ENDIAN is not defined, the code will be compiled to * run on either big- or little-endian CPUs, but will run slightly less * efficiently on either one than if ARCH_IS_BIG_ENDIAN is defined. */ typedef unsigned char md5_byte_t; /* 8-bit byte */ typedef unsigned int md5_word_t; /* 32-bit word */ /* Define the state of the MD5 Algorithm. */ typedef struct md5_state_s { md5_word_t count[2]; /* message length in bits, lsw first */ md5_word_t abcd[4]; /* digest buffer */ md5_byte_t buf[64]; /* accumulate block */ } md5_state_t; #ifdef __cplusplus extern "C" { #endif /* Initialize the algorithm. */ void md5_init(md5_state_t *pms); /* Append a string to the message. */ void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes); /* Finish the message and return the digest. */ void md5_finish(md5_state_t *pms, md5_byte_t digest[16]); #ifdef __cplusplus } /* end extern "C" */ #endif #endif /* md5_INCLUDED */ LucenePlusPlus-rel_3.0.4/src/core/util/nedmalloc/000077500000000000000000000000001217574114600217455ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/util/nedmalloc/License.txt000066400000000000000000000024721217574114600240750ustar00rootroot00000000000000Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. LucenePlusPlus-rel_3.0.4/src/core/util/nedmalloc/malloc.c.h000066400000000000000000006061661217574114600236250ustar00rootroot00000000000000/* This is a version (aka dlmalloc) of malloc/free/realloc written by Doug Lea and released to the public domain, as explained at http://creativecommons.org/licenses/publicdomain. Send questions, comments, complaints, performance data, etc to dl@cs.oswego.edu * Version pre-2.8.4 Mon Nov 27 11:22:37 2006 (dl at gee) Note: There may be an updated version of this malloc obtainable at ftp://gee.cs.oswego.edu/pub/misc/malloc.c Check before installing! * Quickstart This library is all in one file to simplify the most common usage: ftp it, compile it (-O3), and link it into another program. All of the compile-time options default to reasonable values for use on most platforms. You might later want to step through various compile-time and dynamic tuning options. For convenience, an include file for code using this malloc is at: ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h You don't really need this .h file unless you call functions not defined in your system include files. The .h file contains only the excerpts from this file needed for using this malloc on ANSI C/C++ systems, so long as you haven't changed compile-time options about naming and tuning parameters. If you do, then you can create your own malloc.h that does include all settings by cutting at the point indicated below. Note that you may already by default be using a C library containing a malloc that is based on some version of this malloc (for example in linux). You might still want to use the one in this file to customize settings or to avoid overheads associated with library versions. * Vital statistics: Supported pointer/size_t representation: 4 or 8 bytes size_t MUST be an unsigned type of the same width as pointers. (If you are using an ancient system that declares size_t as a signed type, or need it to be a different width than pointers, you can use a previous release of this malloc (e.g. 2.7.2) supporting these.) Alignment: 8 bytes (default) This suffices for nearly all current machines and C compilers. However, you can define MALLOC_ALIGNMENT to be wider than this if necessary (up to 128bytes), at the expense of using more space. Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) 8 or 16 bytes (if 8byte sizes) Each malloced chunk has a hidden word of overhead holding size and status information, and additional cross-check word if FOOTERS is defined. Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) 8-byte ptrs: 32 bytes (including overhead) Even a request for zero bytes (i.e., malloc(0)) returns a pointer to something of the minimum allocatable size. The maximum overhead wastage (i.e., number of extra bytes allocated than were requested in malloc) is less than or equal to the minimum size, except for requests >= mmap_threshold that are serviced via mmap(), where the worst case wastage is about 32 bytes plus the remainder from a system page (the minimal mmap unit); typically 4096 or 8192 bytes. Security: static-safe; optionally more or less The "security" of malloc refers to the ability of malicious code to accentuate the effects of errors (for example, freeing space that is not currently malloc'ed or overwriting past the ends of chunks) in code that calls malloc. This malloc guarantees not to modify any memory locations below the base of heap, i.e., static variables, even in the presence of usage errors. The routines additionally detect most improper frees and reallocs. All this holds as long as the static bookkeeping for malloc itself is not corrupted by some other means. This is only one aspect of security -- these checks do not, and cannot, detect all possible programming errors. If FOOTERS is defined nonzero, then each allocated chunk carries an additional check word to verify that it was malloced from its space. These check words are the same within each execution of a program using malloc, but differ across executions, so externally crafted fake chunks cannot be freed. This improves security by rejecting frees/reallocs that could corrupt heap memory, in addition to the checks preventing writes to statics that are always on. This may further improve security at the expense of time and space overhead. (Note that FOOTERS may also be worth using with MSPACES.) By default detected errors cause the program to abort (calling "abort()"). You can override this to instead proceed past errors by defining PROCEED_ON_ERROR. In this case, a bad free has no effect, and a malloc that encounters a bad address caused by user overwrites will ignore the bad address by dropping pointers and indices to all known memory. This may be appropriate for programs that should continue if at all possible in the face of programming errors, although they may run out of memory because dropped memory is never reclaimed. If you don't like either of these options, you can define CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything else. And if if you are sure that your program using malloc has no errors or vulnerabilities, you can define INSECURE to 1, which might (or might not) provide a small performance improvement. Thread-safety: NOT thread-safe unless USE_LOCKS defined When USE_LOCKS is defined, each public call to malloc, free, etc is surrounded with either a pthread mutex or a win32 spinlock (depending on WIN32). This is not especially fast, and can be a major bottleneck. It is designed only to provide minimal protection in concurrent environments, and to provide a basis for extensions. If you are using malloc in a concurrent program, consider instead using nedmalloc (http://www.nedprod.com/programs/portable/nedmalloc/) or ptmalloc (See http://www.malloc.de), which are derived from versions of this malloc. System requirements: Any combination of MORECORE and/or MMAP/MUNMAP This malloc can use unix sbrk or any emulation (invoked using the CALL_MORECORE macro) and/or mmap/munmap or any emulation (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system memory. On most unix systems, it tends to work best if both MORECORE and MMAP are enabled. On Win32, it uses emulations based on VirtualAlloc. It also uses common C library functions like memset. Compliance: I believe it is compliant with the Single Unix Specification (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably others as well. * Overview of algorithms This is not the fastest, most space-conserving, most portable, or most tunable malloc ever written. However it is among the fastest while also being among the most space-conserving, portable and tunable. Consistent balance across these factors results in a good general-purpose allocator for malloc-intensive programs. In most ways, this malloc is a best-fit allocator. Generally, it chooses the best-fitting existing chunk for a request, with ties broken in approximately least-recently-used order. (This strategy normally maintains low fragmentation.) However, for requests less than 256bytes, it deviates from best-fit when there is not an exactly fitting available chunk by preferring to use space adjacent to that used for the previous small request, as well as by breaking ties in approximately most-recently-used order. (These enhance locality of series of small allocations.) And for very large requests (>= 256Kb by default), it relies on system memory mapping facilities, if supported. (This helps avoid carrying around and possibly fragmenting memory used only for large chunks.) All operations (except malloc_stats and mallinfo) have execution times that are bounded by a constant factor of the number of bits in a size_t, not counting any clearing in calloc or copying in realloc, or actions surrounding MORECORE and MMAP that have times proportional to the number of non-contiguous regions returned by system allocation routines, which is often just 1. In real-time applications, you can optionally suppress segment traversals using NO_SEGMENT_TRAVERSAL, which assures bounded execution even when system allocators return non-contiguous spaces, at the typical expense of carrying around more memory and increased fragmentation. The implementation is not very modular and seriously overuses macros. Perhaps someday all C compilers will do as good a job inlining modular code as can now be done by brute-force expansion, but now, enough of them seem not to. Some compilers issue a lot of warnings about code that is dead/unreachable only on some platforms, and also about intentional uses of negation on unsigned types. All known cases of each can be ignored. For a longer but out of date high-level description, see http://gee.cs.oswego.edu/dl/html/malloc.html * MSPACES If MSPACES is defined, then in addition to malloc, free, etc., this file also defines mspace_malloc, mspace_free, etc. These are versions of malloc routines that take an "mspace" argument obtained using create_mspace, to control all internal bookkeeping. If ONLY_MSPACES is defined, only these versions are compiled. So if you would like to use this allocator for only some allocations, and your system malloc for others, you can compile with ONLY_MSPACES and then do something like... static mspace mymspace = create_mspace(0,0); // for example #define mymalloc(bytes) mspace_malloc(mymspace, bytes) (Note: If you only need one instance of an mspace, you can instead use "USE_DL_PREFIX" to relabel the global malloc.) You can similarly create thread-local allocators by storing mspaces as thread-locals. For example: static __thread mspace tlms = 0; void* tlmalloc(size_t bytes) { if (tlms == 0) tlms = create_mspace(0, 0); return mspace_malloc(tlms, bytes); } void tlfree(void* mem) { mspace_free(tlms, mem); } Unless FOOTERS is defined, each mspace is completely independent. You cannot allocate from one and free to another (although conformance is only weakly checked, so usage errors are not always caught). If FOOTERS is defined, then each chunk carries around a tag indicating its originating mspace, and frees are directed to their originating spaces. ------------------------- Compile-time options --------------------------- Be careful in setting #define values for numerical constants of type size_t. On some systems, literal values are not automatically extended to size_t precision unless they are explicitly casted. You can also use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. WIN32 default: defined if _WIN32 defined Defining WIN32 sets up defaults for MS environment and compilers. Otherwise defaults are for unix. Beware that there seem to be some cases where this malloc might not be a pure drop-in replacement for Win32 malloc: Random-looking failures from Win32 GDI API's (eg; SetDIBits()) may be due to bugs in some video driver implementations when pixel buffers are malloc()ed, and the region spans more than one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) default granularity, pixel buffers may straddle virtual allocation regions more often than when using the Microsoft allocator. You can avoid this by using VirtualAlloc() and VirtualFree() for all pixel buffers rather than using malloc(). If this is not possible, recompile this malloc with a larger DEFAULT_GRANULARITY. MALLOC_ALIGNMENT default: (size_t)8 Controls the minimum alignment for malloc'ed chunks. It must be a power of two and at least 8, even on machines for which smaller alignments would suffice. It may be defined as larger than this though. Note however that code and data structures are optimized for the case of 8-byte alignment. MSPACES default: 0 (false) If true, compile in support for independent allocation spaces. This is only supported if HAVE_MMAP is true. ONLY_MSPACES default: 0 (false) If true, only compile in mspace versions, not regular versions. USE_LOCKS default: 0 (false) Causes each call to each public routine to be surrounded with pthread or WIN32 mutex lock/unlock. (If set true, this can be overridden on a per-mspace basis for mspace versions.) If set to a non-zero value other than 1, locks are used, but their implementation is left out, so lock functions must be supplied manually. USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC If true, uses custom spin locks for locking. This is currently supported only for x86 platforms using gcc or recent MS compilers. Otherwise, posix locks or win32 critical sections are used. FOOTERS default: 0 If true, provide extra checking and dispatching by placing information in the footers of allocated chunks. This adds space and time overhead. INSECURE default: 0 If true, omit checks for usage errors and heap space overwrites. USE_DL_PREFIX default: NOT defined Causes compiler to prefix all public routines with the string 'dl'. This can be useful when you only want to use this malloc in one part of a program, using your regular system malloc elsewhere. ABORT default: defined as abort() Defines how to abort on failed checks. On most systems, a failed check cannot die with an "assert" or even print an informative message, because the underlying print routines in turn call malloc, which will fail again. Generally, the best policy is to simply call abort(). It's not very useful to do more than this because many errors due to overwriting will show up as address faults (null, odd addresses etc) rather than malloc-triggered checks, so will also abort. Also, most compilers know that abort() does not return, so can better optimize code conditionally calling it. PROCEED_ON_ERROR default: defined as 0 (false) Controls whether detected bad addresses cause them to bypassed rather than aborting. If set, detected bad arguments to free and realloc are ignored. And all bookkeeping information is zeroed out upon a detected overwrite of freed heap space, thus losing the ability to ever return it from malloc again, but enabling the application to proceed. If PROCEED_ON_ERROR is defined, the static variable malloc_corruption_error_count is compiled in and can be examined to see if errors have occurred. This option generates slower code than the default abort policy. DEBUG default: NOT defined The DEBUG setting is mainly intended for people trying to modify this code or diagnose problems when porting to new platforms. However, it may also be able to better isolate user errors than just using runtime checks. The assertions in the check routines spell out in more detail the assumptions and invariants underlying the algorithms. The checking is fairly extensive, and will slow down execution noticeably. Calling malloc_stats or mallinfo with DEBUG set will attempt to check every non-mmapped allocated and free chunk in the course of computing the summaries. ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) Debugging assertion failures can be nearly impossible if your version of the assert macro causes malloc to be called, which will lead to a cascade of further failures, blowing the runtime stack. ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), which will usually make debugging easier. MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 The action to take before "return 0" when malloc fails to be able to return memory because there is none available. HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES True if this system supports sbrk or an emulation of it. MORECORE default: sbrk The name of the sbrk-style system routine to call to obtain more memory. See below for guidance on writing custom MORECORE functions. The type of the argument to sbrk/MORECORE varies across systems. It cannot be size_t, because it supports negative arguments, so it is normally the signed type of the same width as size_t (sometimes declared as "intptr_t"). It doesn't much matter though. Internally, we only call it with arguments less than half the max value of a size_t, which should work across all reasonable possibilities, although sometimes generating compiler warnings. MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE If true, take advantage of fact that consecutive calls to MORECORE with positive arguments always return contiguous increasing addresses. This is true of unix sbrk. It does not hurt too much to set it true anyway, since malloc copes with non-contiguities. Setting it false when definitely non-contiguous saves time and possibly wasted space it would take to discover this though. MORECORE_CANNOT_TRIM default: NOT defined True if MORECORE cannot release space back to the system when given negative arguments. This is generally necessary only if you are using a hand-crafted MORECORE function that cannot handle negative arguments. NO_SEGMENT_TRAVERSAL default: 0 If non-zero, suppresses traversals of memory segments returned by either MORECORE or CALL_MMAP. This disables merging of segments that are contiguous, and selectively releasing them to the OS if unused, but bounds execution times. HAVE_MMAP default: 1 (true) True if this system supports mmap or an emulation of it. If so, and HAVE_MORECORE is not true, MMAP is used for all system allocation. If set and HAVE_MORECORE is true as well, MMAP is primarily used to directly allocate very large blocks. It is also used as a backup strategy in cases where MORECORE fails to provide space from system. Note: A single call to MUNMAP is assumed to be able to unmap memory that may have be allocated using multiple calls to MMAP, so long as they are adjacent. HAVE_MREMAP default: 1 on linux, else 0 If true realloc() uses mremap() to re-allocate large blocks and extend or shrink allocation spaces. MMAP_CLEARS default: 1 except on WINCE. True if mmap clears memory so calloc doesn't need to. This is true for standard unix mmap using /dev/zero and on WIN32 except for WINCE. USE_BUILTIN_FFS default: 0 (i.e., not used) Causes malloc to use the builtin ffs() function to compute indices. Some compilers may recognize and intrinsify ffs to be faster than the supplied C version. Also, the case of x86 using gcc is special-cased to an asm instruction, so is already as fast as it can be, and so this setting has no effect. Similarly for Win32 under recent MS compilers. (On most x86s, the asm version is only slightly faster than the C version.) malloc_getpagesize default: derive from system includes, or 4096. The system page size. To the extent possible, this malloc manages memory from the system in page-size units. This may be (and usually is) a function rather than a constant. This is ignored if WIN32, where page size is determined using getSystemInfo during initialization. USE_DEV_RANDOM default: 0 (i.e., not used) Causes malloc to use /dev/random to initialize secure magic seed for stamping footers. Otherwise, the current time is used. NO_MALLINFO default: 0 If defined, don't compile "mallinfo". This can be a simple way of dealing with mismatches between system declarations and those in this file. MALLINFO_FIELD_TYPE default: size_t The type of the fields in the mallinfo struct. This was originally defined as "int" in SVID etc, but is more usefully defined as size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set REALLOC_ZERO_BYTES_FREES default: not defined This should be set if a call to realloc with zero bytes should be the same as a call to free. Some people think it should. Otherwise, since this malloc returns a unique pointer for malloc(0), so does realloc(p, 0). LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H LACKS_STDLIB_H default: NOT defined unless on WIN32 Define these if your system does not have these header files. You might need to manually insert some of the declarations they provide. DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, system_info.dwAllocationGranularity in WIN32, otherwise 64K. Also settable using mallopt(M_GRANULARITY, x) The unit for allocating and deallocating memory from the system. On most systems with contiguous MORECORE, there is no reason to make this more than a page. However, systems with MMAP tend to either require or encourage larger granularities. You can increase this value to prevent system allocation functions to be called so often, especially if they are slow. The value must be at least one page and must be a power of two. Setting to 0 causes initialization to either page size or win32 region size. (Note: In previous versions of malloc, the equivalent of this option was called "TOP_PAD") DEFAULT_TRIM_THRESHOLD default: 2MB Also settable using mallopt(M_TRIM_THRESHOLD, x) The maximum amount of unused top-most memory to keep before releasing via malloc_trim in free(). Automatic trimming is mainly useful in long-lived programs using contiguous MORECORE. Because trimming via sbrk can be slow on some systems, and can sometimes be wasteful (in cases where programs immediately afterward allocate more large chunks) the value should be high enough so that your overall system performance would improve by releasing this much memory. As a rough guide, you might set to a value close to the average size of a process (program) running on your system. Releasing this much memory would allow such a process to run in memory. Generally, it is worth tuning trim thresholds when a program undergoes phases where several large chunks are allocated and released in ways that can reuse each other's storage, perhaps mixed with phases where there are no such chunks at all. The trim value must be greater than page size to have any useful effect. To disable trimming completely, you can set to MAX_SIZE_T. Note that the trick some people use of mallocing a huge space and then freeing it at program startup, in an attempt to reserve system memory, doesn't have the intended effect under automatic trimming, since that memory will immediately be returned to the system. DEFAULT_MMAP_THRESHOLD default: 256K Also settable using mallopt(M_MMAP_THRESHOLD, x) The request size threshold for using MMAP to directly service a request. Requests of at least this size that cannot be allocated using already-existing space will be serviced via mmap. (If enough normal freed space already exists it is used instead.) Using mmap segregates relatively large chunks of memory so that they can be individually obtained and released from the host system. A request serviced through mmap is never reused by any other request (at least not directly; the system may just so happen to remap successive requests to the same locations). Segregating space in this way has the benefits that: Mmapped space can always be individually released back to the system, which helps keep the system level memory demands of a long-lived program low. Also, mapped memory doesn't become `locked' between other chunks, as can happen with normally allocated chunks, which means that even trimming via malloc_trim would not release them. However, it has the disadvantage that the space cannot be reclaimed, consolidated, and then used to service later requests, as happens with normal chunks. The advantages of mmap nearly always outweigh disadvantages for "large" chunks, but the value of "large" may vary across systems. The default is an empirically derived value that works well in most systems. You can disable mmap by setting to MAX_SIZE_T. MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP The number of consolidated frees between checks to release unused segments when freeing. When using non-contiguous segments, especially with multiple mspaces, checking only for topmost space doesn't always suffice to trigger trimming. To compensate for this, free() will, with a period of MAX_RELEASE_CHECK_RATE (or the current number of segments, if greater) try to release unused segments to the OS when freeing chunks that result in consolidation. The best value for this parameter is a compromise between slowing down frees with relatively costly checks that rarely trigger versus holding on to unused memory. To effectively disable, set to MAX_SIZE_T. This may lead to a very slight speed improvement at the expense of carrying around more memory. */ /* Version identifier to allow people to support multiple versions */ #ifndef DLMALLOC_VERSION #define DLMALLOC_VERSION 20804 #endif /* DLMALLOC_VERSION */ #ifndef WIN32 #ifdef _WIN32 #define WIN32 1 #endif /* _WIN32 */ #ifdef _WIN32_WCE #define LACKS_FCNTL_H #define WIN32 1 #endif /* _WIN32_WCE */ #endif /* WIN32 */ #ifdef WIN32 #define WIN32_LEAN_AND_MEAN #define _WIN32_WINNT 0x403 #include #define HAVE_MMAP 1 #define HAVE_MORECORE 0 #define LACKS_UNISTD_H #define LACKS_SYS_PARAM_H #define LACKS_SYS_MMAN_H #define LACKS_STRING_H #define LACKS_STRINGS_H #define LACKS_SYS_TYPES_H #define LACKS_ERRNO_H #ifndef MALLOC_FAILURE_ACTION #define MALLOC_FAILURE_ACTION #endif /* MALLOC_FAILURE_ACTION */ #ifdef _WIN32_WCE /* WINCE reportedly does not clear */ #define MMAP_CLEARS 0 #else #define MMAP_CLEARS 1 #endif /* _WIN32_WCE */ #endif /* WIN32 */ #if defined(DARWIN) || defined(_DARWIN) /* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ #ifndef HAVE_MORECORE #define HAVE_MORECORE 0 #define HAVE_MMAP 1 /* OSX allocators provide 16 byte alignment */ #ifndef MALLOC_ALIGNMENT #define MALLOC_ALIGNMENT ((size_t)16U) #endif #endif /* HAVE_MORECORE */ #endif /* DARWIN */ #ifndef LACKS_SYS_TYPES_H #include /* For size_t */ #endif /* LACKS_SYS_TYPES_H */ /* The maximum possible size_t value has all bits set */ #define MAX_SIZE_T (~(size_t)0) #ifndef ONLY_MSPACES #define ONLY_MSPACES 0 /* define to a value */ #else #define ONLY_MSPACES 1 #endif /* ONLY_MSPACES */ #ifndef MSPACES #if ONLY_MSPACES #define MSPACES 1 #else /* ONLY_MSPACES */ #define MSPACES 0 #endif /* ONLY_MSPACES */ #endif /* MSPACES */ #ifndef MALLOC_ALIGNMENT #define MALLOC_ALIGNMENT ((size_t)8U) #endif /* MALLOC_ALIGNMENT */ #ifndef FOOTERS #define FOOTERS 0 #endif /* FOOTERS */ #ifndef ABORT #define ABORT abort() #endif /* ABORT */ #ifndef ABORT_ON_ASSERT_FAILURE #define ABORT_ON_ASSERT_FAILURE 1 #endif /* ABORT_ON_ASSERT_FAILURE */ #ifndef PROCEED_ON_ERROR #define PROCEED_ON_ERROR 0 #endif /* PROCEED_ON_ERROR */ #ifndef USE_LOCKS #define USE_LOCKS 0 #endif /* USE_LOCKS */ #ifndef USE_SPIN_LOCKS #if USE_LOCKS && (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310) #define USE_SPIN_LOCKS 1 #else #define USE_SPIN_LOCKS 0 #endif /* USE_LOCKS && ... */ #endif /* USE_SPIN_LOCKS */ #ifndef INSECURE #define INSECURE 0 #endif /* INSECURE */ #ifndef HAVE_MMAP #define HAVE_MMAP 1 #endif /* HAVE_MMAP */ #ifndef MMAP_CLEARS #define MMAP_CLEARS 1 #endif /* MMAP_CLEARS */ #ifndef HAVE_MREMAP #ifdef linux #define HAVE_MREMAP 1 #else /* linux */ #define HAVE_MREMAP 0 #endif /* linux */ #endif /* HAVE_MREMAP */ #ifndef MALLOC_FAILURE_ACTION #define MALLOC_FAILURE_ACTION errno = ENOMEM; #endif /* MALLOC_FAILURE_ACTION */ #ifndef HAVE_MORECORE #if ONLY_MSPACES #define HAVE_MORECORE 0 #else /* ONLY_MSPACES */ #define HAVE_MORECORE 1 #endif /* ONLY_MSPACES */ #endif /* HAVE_MORECORE */ #if !HAVE_MORECORE #define MORECORE_CONTIGUOUS 0 #else /* !HAVE_MORECORE */ #define MORECORE_DEFAULT sbrk #ifndef MORECORE_CONTIGUOUS #define MORECORE_CONTIGUOUS 1 #endif /* MORECORE_CONTIGUOUS */ #endif /* HAVE_MORECORE */ #ifndef DEFAULT_GRANULARITY #if (MORECORE_CONTIGUOUS || defined(WIN32)) #define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ #else /* MORECORE_CONTIGUOUS */ #define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) #endif /* MORECORE_CONTIGUOUS */ #endif /* DEFAULT_GRANULARITY */ #ifndef DEFAULT_TRIM_THRESHOLD #ifndef MORECORE_CANNOT_TRIM #define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) #else /* MORECORE_CANNOT_TRIM */ #define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T #endif /* MORECORE_CANNOT_TRIM */ #endif /* DEFAULT_TRIM_THRESHOLD */ #ifndef DEFAULT_MMAP_THRESHOLD #if HAVE_MMAP #define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) #else /* HAVE_MMAP */ #define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T #endif /* HAVE_MMAP */ #endif /* DEFAULT_MMAP_THRESHOLD */ #ifndef MAX_RELEASE_CHECK_RATE #if HAVE_MMAP #define MAX_RELEASE_CHECK_RATE 4095 #else #define MAX_RELEASE_CHECK_RATE MAX_SIZE_T #endif /* HAVE_MMAP */ #endif /* MAX_RELEASE_CHECK_RATE */ #ifndef USE_BUILTIN_FFS #define USE_BUILTIN_FFS 0 #endif /* USE_BUILTIN_FFS */ #ifndef USE_DEV_RANDOM #define USE_DEV_RANDOM 0 #endif /* USE_DEV_RANDOM */ #ifndef NO_MALLINFO #define NO_MALLINFO 0 #endif /* NO_MALLINFO */ #ifndef MALLINFO_FIELD_TYPE #define MALLINFO_FIELD_TYPE size_t #endif /* MALLINFO_FIELD_TYPE */ #ifndef NO_SEGMENT_TRAVERSAL #define NO_SEGMENT_TRAVERSAL 0 #endif /* NO_SEGMENT_TRAVERSAL */ /* mallopt tuning options. SVID/XPG defines four standard parameter numbers for mallopt, normally defined in malloc.h. None of these are used in this malloc, so setting them has no effect. But this malloc does support the following options. */ #define M_TRIM_THRESHOLD (-1) #define M_GRANULARITY (-2) #define M_MMAP_THRESHOLD (-3) /* ------------------------ Mallinfo declarations ------------------------ */ #if !NO_MALLINFO /* This version of malloc supports the standard SVID/XPG mallinfo routine that returns a struct containing usage properties and statistics. It should work on any system that has a /usr/include/malloc.h defining struct mallinfo. The main declaration needed is the mallinfo struct that is returned (by-copy) by mallinfo(). The malloinfo struct contains a bunch of fields that are not even meaningful in this version of malloc. These fields are are instead filled by mallinfo() with other numbers that might be of interest. HAVE_USR_INCLUDE_MALLOC_H should be set if you have a /usr/include/malloc.h file that includes a declaration of struct mallinfo. If so, it is included; else a compliant version is declared below. These must be precisely the same for mallinfo() to work. The original SVID version of this struct, defined on most systems with mallinfo, declares all fields as ints. But some others define as unsigned long. If your system defines the fields using a type of different width than listed here, you MUST #include your system version and #define HAVE_USR_INCLUDE_MALLOC_H. */ /* #define HAVE_USR_INCLUDE_MALLOC_H */ #ifdef HAVE_USR_INCLUDE_MALLOC_H #include "/usr/include/malloc.h" #else /* HAVE_USR_INCLUDE_MALLOC_H */ #ifndef STRUCT_MALLINFO_DECLARED #define STRUCT_MALLINFO_DECLARED 1 struct mallinfo { MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ MALLINFO_FIELD_TYPE smblks; /* always 0 */ MALLINFO_FIELD_TYPE hblks; /* always 0 */ MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ MALLINFO_FIELD_TYPE fordblks; /* total free space */ MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ }; #endif /* STRUCT_MALLINFO_DECLARED */ #endif /* HAVE_USR_INCLUDE_MALLOC_H */ #endif /* NO_MALLINFO */ /* Try to persuade compilers to inline. The most critical functions for inlining are defined as macros, so these aren't used for them. */ #ifndef FORCEINLINE #if defined(__GNUC__) #define FORCEINLINE __inline __attribute__ ((always_inline)) #elif defined(_MSC_VER) #define FORCEINLINE __forceinline #endif #endif #ifndef NOINLINE #if defined(__GNUC__) #define NOINLINE __attribute__ ((noinline)) #elif defined(_MSC_VER) #define NOINLINE __declspec(noinline) #else #define NOINLINE #endif #endif #ifdef __cplusplus extern "C" { #ifndef FORCEINLINE #define FORCEINLINE inline #endif #endif /* __cplusplus */ #ifndef FORCEINLINE #define FORCEINLINE #endif #if !ONLY_MSPACES /* ------------------- Declarations of public routines ------------------- */ #ifndef USE_DL_PREFIX #define dlcalloc calloc #define dlfree free #define dlmalloc malloc #define dlmemalign memalign #define dlrealloc realloc #define dlvalloc valloc #define dlpvalloc pvalloc #define dlmallinfo mallinfo #define dlmallopt mallopt #define dlmalloc_trim malloc_trim #define dlmalloc_stats malloc_stats #define dlmalloc_usable_size malloc_usable_size #define dlmalloc_footprint malloc_footprint #define dlmalloc_max_footprint malloc_max_footprint #define dlindependent_calloc independent_calloc #define dlindependent_comalloc independent_comalloc #endif /* USE_DL_PREFIX */ /* malloc(size_t n) Returns a pointer to a newly allocated chunk of at least n bytes, or null if no space is available, in which case errno is set to ENOMEM on ANSI C systems. If n is zero, malloc returns a minimum-sized chunk. (The minimum size is 16 bytes on most 32bit systems, and 32 bytes on 64bit systems.) Note that size_t is an unsigned type, so calls with arguments that would be negative if signed are interpreted as requests for huge amounts of space, which will often fail. The maximum supported value of n differs across systems, but is in all cases less than the maximum representable value of a size_t. */ void* dlmalloc(size_t); /* free(void* p) Releases the chunk of memory pointed to by p, that had been previously allocated using malloc or a related routine such as realloc. It has no effect if p is null. If p was not malloced or already freed, free(p) will by default cause the current program to abort. */ void dlfree(void*); /* calloc(size_t n_elements, size_t element_size); Returns a pointer to n_elements * element_size bytes, with all locations set to zero. */ void* dlcalloc(size_t, size_t); /* realloc(void* p, size_t n) Returns a pointer to a chunk of size n that contains the same data as does chunk p up to the minimum of (n, p's size) bytes, or null if no space is available. The returned pointer may or may not be the same as p. The algorithm prefers extending p in most cases when possible, otherwise it employs the equivalent of a malloc-copy-free sequence. If p is null, realloc is equivalent to malloc. If space is not available, realloc returns null, errno is set (if on ANSI) and p is NOT freed. if n is for fewer bytes than already held by p, the newly unused space is lopped off and freed if possible. realloc with a size argument of zero (re)allocates a minimum-sized chunk. The old unix realloc convention of allowing the last-free'd chunk to be used as an argument to realloc is not supported. */ void* dlrealloc(void*, size_t); /* memalign(size_t alignment, size_t n); Returns a pointer to a newly allocated chunk of n bytes, aligned in accord with the alignment argument. The alignment argument should be a power of two. If the argument is not a power of two, the nearest greater power is used. 8-byte alignment is guaranteed by normal malloc calls, so don't bother calling memalign with an argument of 8 or less. Overreliance on memalign is a sure way to fragment space. */ void* dlmemalign(size_t, size_t); /* valloc(size_t n); Equivalent to memalign(pagesize, n), where pagesize is the page size of the system. If the pagesize is unknown, 4096 is used. */ void* dlvalloc(size_t); /* mallopt(int parameter_number, int parameter_value) Sets tunable parameters The format is to provide a (parameter-number, parameter-value) pair. mallopt then sets the corresponding parameter to the argument value if it can (i.e., so long as the value is meaningful), and returns 1 if successful else 0. To workaround the fact that mallopt is specified to use int, not size_t parameters, the value -1 is specially treated as the maximum unsigned size_t value. SVID/XPG/ANSI defines four standard param numbers for mallopt, normally defined in malloc.h. None of these are use in this malloc, so setting them has no effect. But this malloc also supports other options in mallopt. See below for details. Briefly, supported parameters are as follows (listed defaults are for "typical" configurations). Symbol param # default allowed param values M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) M_GRANULARITY -2 page size any power of 2 >= page size M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) */ int dlmallopt(int, int); /* malloc_footprint(); Returns the number of bytes obtained from the system. The total number of bytes allocated by malloc, realloc etc., is less than this value. Unlike mallinfo, this function returns only a precomputed result, so can be called frequently to monitor memory consumption. Even if locks are otherwise defined, this function does not use them, so results might not be up to date. */ size_t dlmalloc_footprint(void); /* malloc_max_footprint(); Returns the maximum number of bytes obtained from the system. This value will be greater than current footprint if deallocated space has been reclaimed by the system. The peak number of bytes allocated by malloc, realloc etc., is less than this value. Unlike mallinfo, this function returns only a precomputed result, so can be called frequently to monitor memory consumption. Even if locks are otherwise defined, this function does not use them, so results might not be up to date. */ size_t dlmalloc_max_footprint(void); #if !NO_MALLINFO /* mallinfo() Returns (by copy) a struct containing various summary statistics: arena: current total non-mmapped bytes allocated from system ordblks: the number of free chunks smblks: always zero. hblks: current number of mmapped regions hblkhd: total bytes held in mmapped regions usmblks: the maximum total allocated space. This will be greater than current total if trimming has occurred. fsmblks: always zero uordblks: current total allocated space (normal or mmapped) fordblks: total free space keepcost: the maximum number of bytes that could ideally be released back to system via malloc_trim. ("ideally" means that it ignores page restrictions etc.) Because these fields are ints, but internal bookkeeping may be kept as longs, the reported values may wrap around zero and thus be inaccurate. */ struct mallinfo dlmallinfo(void); #endif /* NO_MALLINFO */ /* independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); independent_calloc is similar to calloc, but instead of returning a single cleared space, it returns an array of pointers to n_elements independent elements that can hold contents of size elem_size, each of which starts out cleared, and can be independently freed, realloc'ed etc. The elements are guaranteed to be adjacently allocated (this is not guaranteed to occur with multiple callocs or mallocs), which may also improve cache locality in some applications. The "chunks" argument is optional (i.e., may be null, which is probably the most typical usage). If it is null, the returned array is itself dynamically allocated and should also be freed when it is no longer needed. Otherwise, the chunks array must be of at least n_elements in length. It is filled in with the pointers to the chunks. In either case, independent_calloc returns this pointer array, or null if the allocation failed. If n_elements is zero and "chunks" is null, it returns a chunk representing an array with zero elements (which should be freed if not wanted). Each element must be individually freed when it is no longer needed. If you'd like to instead be able to free all at once, you should instead use regular calloc and assign pointers into this space to represent elements. (In this case though, you cannot independently free elements.) independent_calloc simplifies and speeds up implementations of many kinds of pools. It may also be useful when constructing large data structures that initially have a fixed number of fixed-sized nodes, but the number is not known at compile time, and some of the nodes may later need to be freed. For example: struct Node { int item; struct Node* next; }; struct Node* build_list() { struct Node** pool; int n = read_number_of_nodes_needed(); if (n <= 0) return 0; pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); if (pool == 0) die(); // organize into a linked list... struct Node* first = pool[0]; for (i = 0; i < n-1; ++i) pool[i]->next = pool[i+1]; free(pool); // Can now free the array (or not, if it is needed later) return first; } */ void** dlindependent_calloc(size_t, size_t, void**); /* independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); independent_comalloc allocates, all at once, a set of n_elements chunks with sizes indicated in the "sizes" array. It returns an array of pointers to these elements, each of which can be independently freed, realloc'ed etc. The elements are guaranteed to be adjacently allocated (this is not guaranteed to occur with multiple callocs or mallocs), which may also improve cache locality in some applications. The "chunks" argument is optional (i.e., may be null). If it is null the returned array is itself dynamically allocated and should also be freed when it is no longer needed. Otherwise, the chunks array must be of at least n_elements in length. It is filled in with the pointers to the chunks. In either case, independent_comalloc returns this pointer array, or null if the allocation failed. If n_elements is zero and chunks is null, it returns a chunk representing an array with zero elements (which should be freed if not wanted). Each element must be individually freed when it is no longer needed. If you'd like to instead be able to free all at once, you should instead use a single regular malloc, and assign pointers at particular offsets in the aggregate space. (In this case though, you cannot independently free elements.) independent_comallac differs from independent_calloc in that each element may have a different size, and also that it does not automatically clear elements. independent_comalloc can be used to speed up allocation in cases where several structs or objects must always be allocated at the same time. For example: struct Head { ... } struct Foot { ... } void send_message(char* msg) { int msglen = strlen(msg); size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; void* chunks[3]; if (independent_comalloc(3, sizes, chunks) == 0) die(); struct Head* head = (struct Head*)(chunks[0]); char* body = (char*)(chunks[1]); struct Foot* foot = (struct Foot*)(chunks[2]); // ... } In general though, independent_comalloc is worth using only for larger values of n_elements. For small values, you probably won't detect enough difference from series of malloc calls to bother. Overuse of independent_comalloc can increase overall memory usage, since it cannot reuse existing noncontiguous small chunks that might be available for some of the elements. */ void** dlindependent_comalloc(size_t, size_t*, void**); /* pvalloc(size_t n); Equivalent to valloc(minimum-page-that-holds(n)), that is, round up n to nearest pagesize. */ void* dlpvalloc(size_t); /* malloc_trim(size_t pad); If possible, gives memory back to the system (via negative arguments to sbrk) if there is unused memory at the `high' end of the malloc pool or in unused MMAP segments. You can call this after freeing large blocks of memory to potentially reduce the system-level memory requirements of a program. However, it cannot guarantee to reduce memory. Under some allocation patterns, some large free blocks of memory will be locked between two used chunks, so they cannot be given back to the system. The `pad' argument to malloc_trim represents the amount of free trailing space to leave untrimmed. If this argument is zero, only the minimum amount of memory to maintain internal data structures will be left. Non-zero arguments can be supplied to maintain enough trailing space to service future expected allocations without having to re-obtain memory from the system. Malloc_trim returns 1 if it actually released any memory, else 0. */ int dlmalloc_trim(size_t); /* malloc_stats(); Prints on stderr the amount of space obtained from the system (both via sbrk and mmap), the maximum amount (which may be more than current if malloc_trim and/or munmap got called), and the current number of bytes allocated via malloc (or realloc, etc) but not yet freed. Note that this is the number of bytes allocated, not the number requested. It will be larger than the number requested because of alignment and bookkeeping overhead. Because it includes alignment wastage as being in use, this figure may be greater than zero even when no user-level chunks are allocated. The reported current and maximum system memory can be inaccurate if a program makes other calls to system memory allocation functions (normally sbrk) outside of malloc. malloc_stats prints only the most commonly interesting statistics. More information can be obtained by calling mallinfo. */ void dlmalloc_stats(void); #endif /* ONLY_MSPACES */ /* malloc_usable_size(void* p); Returns the number of bytes you can actually use in an allocated chunk, which may be more than you requested (although often not) due to alignment and minimum size constraints. You can use this many bytes without worrying about overwriting other allocated objects. This is not a particularly great programming practice. malloc_usable_size can be more useful in debugging and assertions, for example: p = malloc(n); assert(malloc_usable_size(p) >= 256); */ size_t dlmalloc_usable_size(void*); #if MSPACES /* mspace is an opaque type representing an independent region of space that supports mspace_malloc, etc. */ typedef void* mspace; /* create_mspace creates and returns a new independent space with the given initial capacity, or, if 0, the default granularity size. It returns null if there is no system memory available to create the space. If argument locked is non-zero, the space uses a separate lock to control access. The capacity of the space will grow dynamically as needed to service mspace_malloc requests. You can control the sizes of incremental increases of this space by compiling with a different DEFAULT_GRANULARITY or dynamically setting with mallopt(M_GRANULARITY, value). */ mspace create_mspace(size_t capacity, int locked); /* destroy_mspace destroys the given space, and attempts to return all of its memory back to the system, returning the total number of bytes freed. After destruction, the results of access to all memory used by the space become undefined. */ size_t destroy_mspace(mspace msp); /* create_mspace_with_base uses the memory supplied as the initial base of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping, so the capacity must be at least this large. (Otherwise 0 is returned.) When this initial space is exhausted, additional memory will be obtained from the system. Destroying this space will deallocate all additionally allocated space (if possible) but not the initial base. */ mspace create_mspace_with_base(void* base, size_t capacity, int locked); /* mspace_mmap_large_chunks controls whether requests for large chunks are allocated in their own mmapped regions, separate from others in this mspace. By default this is enabled, which reduces fragmentation. However, such chunks are not necessarily released to the system upon destroy_mspace. Disabling by setting to false may increase fragmentation, but avoids leakage when relying on destroy_mspace to release all memory allocated using this space. */ int mspace_mmap_large_chunks(mspace msp, int enable); /* mspace_malloc behaves as malloc, but operates within the given space. */ void* mspace_malloc(mspace msp, size_t bytes); /* mspace_free behaves as free, but operates within the given space. If compiled with FOOTERS==1, mspace_free is not actually needed. free may be called instead of mspace_free because freed chunks from any space are handled by their originating spaces. */ void mspace_free(mspace msp, void* mem); /* mspace_realloc behaves as realloc, but operates within the given space. If compiled with FOOTERS==1, mspace_realloc is not actually needed. realloc may be called instead of mspace_realloc because realloced chunks from any space are handled by their originating spaces. */ void* mspace_realloc(mspace msp, void* mem, size_t newsize); /* mspace_calloc behaves as calloc, but operates within the given space. */ void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); /* mspace_memalign behaves as memalign, but operates within the given space. */ void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); /* mspace_independent_calloc behaves as independent_calloc, but operates within the given space. */ void** mspace_independent_calloc(mspace msp, size_t n_elements, size_t elem_size, void* chunks[]); /* mspace_independent_comalloc behaves as independent_comalloc, but operates within the given space. */ void** mspace_independent_comalloc(mspace msp, size_t n_elements, size_t sizes[], void* chunks[]); /* mspace_footprint() returns the number of bytes obtained from the system for this space. */ size_t mspace_footprint(mspace msp); /* mspace_max_footprint() returns the peak number of bytes obtained from the system for this space. */ size_t mspace_max_footprint(mspace msp); #if !NO_MALLINFO /* mspace_mallinfo behaves as mallinfo, but reports properties of the given space. */ struct mallinfo mspace_mallinfo(mspace msp); #endif /* NO_MALLINFO */ /* malloc_usable_size(void* p) behaves the same as malloc_usable_size; */ size_t mspace_usable_size(void* mem); /* mspace_malloc_stats behaves as malloc_stats, but reports properties of the given space. */ void mspace_malloc_stats(mspace msp); /* mspace_trim behaves as malloc_trim, but operates within the given space. */ int mspace_trim(mspace msp, size_t pad); /* An alias for mallopt. */ int mspace_mallopt(int, int); #endif /* MSPACES */ #ifdef __cplusplus }; /* end of extern "C" */ #endif /* __cplusplus */ /* ======================================================================== To make a fully customizable malloc.h header file, cut everything above this line, put into file malloc.h, edit to suit, and #include it on the next line, as well as in programs that use this malloc. ======================================================================== */ /* #include "malloc.h" */ /*------------------------------ internal #includes ---------------------- */ #ifdef WIN32 #pragma warning( disable : 4146 ) /* no "unsigned" warnings */ #endif /* WIN32 */ #include /* for printing in malloc_stats */ #ifndef LACKS_ERRNO_H #include /* for MALLOC_FAILURE_ACTION */ #endif /* LACKS_ERRNO_H */ #if FOOTERS #include /* for magic initialization */ #endif /* FOOTERS */ #ifndef LACKS_STDLIB_H #include /* for abort() */ #endif /* LACKS_STDLIB_H */ #ifdef DEBUG #if ABORT_ON_ASSERT_FAILURE #define assert(x) if(!(x)) ABORT #else /* ABORT_ON_ASSERT_FAILURE */ #include #endif /* ABORT_ON_ASSERT_FAILURE */ #else /* DEBUG */ #ifndef assert #define assert(x) #endif #define DEBUG 0 #endif /* DEBUG */ #ifndef LACKS_STRING_H #include /* for memset etc */ #endif /* LACKS_STRING_H */ #if USE_BUILTIN_FFS #ifndef LACKS_STRINGS_H #include /* for ffs */ #endif /* LACKS_STRINGS_H */ #endif /* USE_BUILTIN_FFS */ #if HAVE_MMAP #ifndef LACKS_SYS_MMAN_H #include /* for mmap */ #endif /* LACKS_SYS_MMAN_H */ #ifndef LACKS_FCNTL_H #include #endif /* LACKS_FCNTL_H */ #endif /* HAVE_MMAP */ #ifndef LACKS_UNISTD_H #include /* for sbrk, sysconf */ #else /* LACKS_UNISTD_H */ #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) extern void* sbrk(ptrdiff_t); #endif /* FreeBSD etc */ #endif /* LACKS_UNISTD_H */ /* Declarations for locking */ #if USE_LOCKS #ifndef WIN32 #include #if defined (__SVR4) && defined (__sun) /* solaris */ #include #endif /* solaris */ #else #ifndef _M_AMD64 /* These are already defined on AMD64 builds */ #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp); LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value); #ifdef __cplusplus } #endif /* __cplusplus */ #endif /* _M_AMD64 */ #pragma intrinsic (_InterlockedCompareExchange) #pragma intrinsic (_InterlockedExchange) #define interlockedcompareexchange _InterlockedCompareExchange #define interlockedexchange _InterlockedExchange #endif /* Win32 */ #endif /* USE_LOCKS */ /* Declarations for bit scanning on win32 */ #if defined(_MSC_VER) && _MSC_VER>=1300 #ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ unsigned char _BitScanForward(unsigned long *index, unsigned long mask); unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); #ifdef __cplusplus } #endif /* __cplusplus */ #define BitScanForward _BitScanForward #define BitScanReverse _BitScanReverse #pragma intrinsic(_BitScanForward) #pragma intrinsic(_BitScanReverse) #endif /* BitScanForward */ #endif /* defined(_MSC_VER) && _MSC_VER>=1300 */ #ifndef WIN32 #ifndef malloc_getpagesize # ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ # ifndef _SC_PAGE_SIZE # define _SC_PAGE_SIZE _SC_PAGESIZE # endif # endif # ifdef _SC_PAGE_SIZE # define malloc_getpagesize sysconf(_SC_PAGE_SIZE) # else # if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) extern size_t getpagesize(); # define malloc_getpagesize getpagesize() # else # ifdef WIN32 /* use supplied emulation of getpagesize */ # define malloc_getpagesize getpagesize() # else # ifndef LACKS_SYS_PARAM_H # include # endif # ifdef EXEC_PAGESIZE # define malloc_getpagesize EXEC_PAGESIZE # else # ifdef NBPG # ifndef CLSIZE # define malloc_getpagesize NBPG # else # define malloc_getpagesize (NBPG * CLSIZE) # endif # else # ifdef NBPC # define malloc_getpagesize NBPC # else # ifdef PAGESIZE # define malloc_getpagesize PAGESIZE # else /* just guess */ # define malloc_getpagesize ((size_t)4096U) # endif # endif # endif # endif # endif # endif # endif #endif #endif /* ------------------- size_t and alignment properties -------------------- */ /* The byte and bit size of a size_t */ #define SIZE_T_SIZE (sizeof(size_t)) #define SIZE_T_BITSIZE (sizeof(size_t) << 3) /* Some constants coerced to size_t */ /* Annoying but necessary to avoid errors on some platforms */ #define SIZE_T_ZERO ((size_t)0) #define SIZE_T_ONE ((size_t)1) #define SIZE_T_TWO ((size_t)2) #define SIZE_T_FOUR ((size_t)4) #define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) #define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) #define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) #define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) /* The bit mask value corresponding to MALLOC_ALIGNMENT */ #define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) /* True if address a has acceptable alignment */ #define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) /* the number of bytes to offset an address to align it */ #define align_offset(A)\ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) /* -------------------------- MMAP preliminaries ------------------------- */ /* If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and checks to fail so compiler optimizer can delete code rather than using so many "#if"s. */ /* MORECORE and MMAP must return MFAIL on failure */ #define MFAIL ((void*)(MAX_SIZE_T)) #define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ #if HAVE_MMAP #ifndef WIN32 #define MUNMAP_DEFAULT(a, s) munmap((a), (s)) #define MMAP_PROT (PROT_READ|PROT_WRITE) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) #define MAP_ANONYMOUS MAP_ANON #endif /* MAP_ANON */ #ifdef MAP_ANONYMOUS #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) #define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) #else /* MAP_ANONYMOUS */ /* Nearly all versions of mmap support MAP_ANONYMOUS, so the following is unlikely to be needed, but is supplied just in case. */ #define MMAP_FLAGS (MAP_PRIVATE) static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ #define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ (dev_zero_fd = open("/dev/zero", O_RDWR), \ mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) #endif /* MAP_ANONYMOUS */ #define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) #else /* WIN32 */ /* Win32 MMAP via VirtualAlloc */ static FORCEINLINE void* win32mmap(size_t size) { void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); return (ptr != 0)? ptr: MFAIL; } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ static FORCEINLINE void* win32direct_mmap(size_t size) { void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, PAGE_READWRITE); return (ptr != 0)? ptr: MFAIL; } /* This function supports releasing coalesed segments */ static FORCEINLINE int win32munmap(void* ptr, size_t size) { MEMORY_BASIC_INFORMATION minfo; char* cptr = (char*)ptr; while (size) { if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) return -1; if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || minfo.State != MEM_COMMIT || minfo.RegionSize > size) return -1; if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) return -1; cptr += minfo.RegionSize; size -= minfo.RegionSize; } return 0; } #define MMAP_DEFAULT(s) win32mmap(s) #define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) #define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) #endif /* WIN32 */ #endif /* HAVE_MMAP */ #if HAVE_MREMAP #ifndef WIN32 #define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) #endif /* WIN32 */ #endif /* HAVE_MREMAP */ /** * Define CALL_MORECORE */ #if HAVE_MORECORE #ifdef MORECORE #define CALL_MORECORE(S) MORECORE(S) #else /* MORECORE */ #define CALL_MORECORE(S) MORECORE_DEFAULT(S) #endif /* MORECORE */ #else /* HAVE_MORECORE */ #define CALL_MORECORE(S) MFAIL #endif /* HAVE_MORECORE */ /** * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP */ #if HAVE_MMAP #define IS_MMAPPED_BIT (SIZE_T_ONE) #define USE_MMAP_BIT (SIZE_T_ONE) #ifdef MMAP #define CALL_MMAP(s) MMAP(s) #else /* MMAP */ #define CALL_MMAP(s) MMAP_DEFAULT(s) #endif /* MMAP */ #ifdef MUNMAP #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) #else /* MUNMAP */ #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) #endif /* MUNMAP */ #ifdef DIRECT_MMAP #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) #else /* DIRECT_MMAP */ #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) #endif /* DIRECT_MMAP */ #else /* HAVE_MMAP */ #define IS_MMAPPED_BIT (SIZE_T_ZERO) #define USE_MMAP_BIT (SIZE_T_ZERO) #define MMAP(s) MFAIL #define MUNMAP(a, s) (-1) #define DIRECT_MMAP(s) MFAIL #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) #define CALL_MMAP(s) MMAP(s) #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) #endif /* HAVE_MMAP */ /** * Define CALL_MREMAP */ #if HAVE_MMAP && HAVE_MREMAP #ifdef MREMAP #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv)) #else /* MREMAP */ #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) #endif /* MREMAP */ #else /* HAVE_MMAP && HAVE_MREMAP */ #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL #endif /* HAVE_MMAP && HAVE_MREMAP */ /* mstate bit set if continguous morecore disabled or failed */ #define USE_NONCONTIGUOUS_BIT (4U) /* segment bit set in create_mspace_with_base */ #define EXTERN_BIT (8U) /* --------------------------- Lock preliminaries ------------------------ */ /* When locks are defined, there is one global lock, plus one per-mspace lock. The global lock_ensures that mparams.magic and other unique mparams values are initialized only once. It also protects sequences of calls to MORECORE. In many cases sys_alloc requires two calls, that should not be interleaved with calls by other threads. This does not protect against direct calls to MORECORE by other threads not using this lock, so there is still code to cope the best we can on interference. Per-mspace locks surround calls to malloc, free, etc. To enable use in layered extensions, per-mspace locks are reentrant. Because lock-protected regions generally have bounded times, it is OK to use the supplied simple spinlocks in the custom versions for x86. If USE_LOCKS is > 1, the definitions of lock routines here are bypassed, in which case you will need to define at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly TRY_LOCK (which is not used in this malloc, but commonly needed in extensions.) */ #if USE_LOCKS == 1 #if USE_SPIN_LOCKS #ifndef WIN32 /* Custom pthread-style spin locks on x86 and x64 for gcc */ struct pthread_mlock_t { volatile unsigned int l; volatile unsigned int c; volatile pthread_t threadid; }; #define MLOCK_T struct pthread_mlock_t #define CURRENT_THREAD pthread_self() #define INITIAL_LOCK(sl) (memset(sl, 0, sizeof(MLOCK_T)), 0) #define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl) #define RELEASE_LOCK(sl) pthread_release_lock(sl) #define TRY_LOCK(sl) pthread_try_lock(sl) #define SPINS_PER_YIELD 63 static MLOCK_T malloc_global_mutex = { 0, 0, 0}; static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) { int spins = 0; volatile unsigned int* lp = &sl->l; for (;;) { if (*lp != 0) { if (sl->threadid == CURRENT_THREAD) { ++sl->c; return 0; } } else { /* place args to cmpxchgl in locals to evade oddities in some gccs */ int cmp = 0; int val = 1; int ret; __asm__ __volatile__ ("lock; cmpxchgl %1, %2" : "=a" (ret) : "r" (val), "m" (*(lp)), "0"(cmp) : "memory", "cc"); if (!ret) { assert(!sl->threadid); sl->c = 1; sl->threadid = CURRENT_THREAD; return 0; } if ((++spins & SPINS_PER_YIELD) == 0) { #if defined (__SVR4) && defined (__sun) /* solaris */ thr_yield(); #else #if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) sched_yield(); #else /* no-op yield on unknown systems */ ; #endif /* __linux__ || __FreeBSD__ || __APPLE__ */ #endif /* solaris */ } } } } static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) { assert(sl->l != 0); assert(sl->threadid == CURRENT_THREAD); if (--sl->c == 0) { sl->threadid = 0; volatile unsigned int* lp = &sl->l; int prev = 0; int ret; __asm__ __volatile__ ("lock; xchgl %0, %1" : "=r" (ret) : "m" (*(lp)), "0"(prev) : "memory"); } } static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) { volatile unsigned int* lp = &sl->l; if (*lp != 0) { if (sl->threadid == CURRENT_THREAD) { ++sl->c; return 1; } } else { int cmp = 0; int val = 1; int ret; __asm__ __volatile__ ("lock; cmpxchgl %1, %2" : "=a" (ret) : "r" (val), "m" (*(lp)), "0"(cmp) : "memory", "cc"); if (!ret) { assert(!sl->threadid); sl->c = 1; sl->threadid = CURRENT_THREAD; return 1; } } return 0; } #else /* WIN32 */ /* Custom win32-style spin locks on x86 and x64 for MSC */ struct win32_mlock_t { volatile long l; volatile unsigned int c; volatile long threadid; }; #define MLOCK_T struct win32_mlock_t #define CURRENT_THREAD win32_getcurrentthreadid() #define INITIAL_LOCK(sl) (memset(sl, 0, sizeof(MLOCK_T)), 0) #define ACQUIRE_LOCK(sl) win32_acquire_lock(sl) #define RELEASE_LOCK(sl) win32_release_lock(sl) #define TRY_LOCK(sl) win32_try_lock(sl) #define SPINS_PER_YIELD 63 static MLOCK_T malloc_global_mutex = { 0, 0, 0}; static FORCEINLINE long win32_getcurrentthreadid() { #ifdef _MSC_VER #if defined(_M_IX86) long *threadstruct=(long *)__readfsdword(0x18); long threadid=threadstruct[0x24/sizeof(long)]; return threadid; #elif defined(_M_X64) /* todo */ return GetCurrentThreadId(); #else return GetCurrentThreadId(); #endif #else return GetCurrentThreadId(); #endif } static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) { int spins = 0; for (;;) { if (sl->l != 0) { if (sl->threadid == CURRENT_THREAD) { ++sl->c; return 0; } } else { if (!interlockedexchange(&sl->l, 1)) { assert(!sl->threadid); sl->c=CURRENT_THREAD; sl->threadid = CURRENT_THREAD; sl->c = 1; return 0; } } if ((++spins & SPINS_PER_YIELD) == 0) SleepEx(0, FALSE); } } static FORCEINLINE void win32_release_lock (MLOCK_T *sl) { assert(sl->threadid == CURRENT_THREAD); assert(sl->l != 0); if (--sl->c == 0) { sl->threadid = 0; interlockedexchange (&sl->l, 0); } } static FORCEINLINE int win32_try_lock (MLOCK_T *sl) { if(sl->l != 0) { if (sl->threadid == CURRENT_THREAD) { ++sl->c; return 1; } } else { if (!interlockedexchange(&sl->l, 1)){ assert(!sl->threadid); sl->threadid = CURRENT_THREAD; sl->c = 1; return 1; } } return 0; } #endif /* WIN32 */ #else /* USE_SPIN_LOCKS */ #ifndef WIN32 /* pthreads-based locks */ #define MLOCK_T pthread_mutex_t #define CURRENT_THREAD pthread_self() #define INITIAL_LOCK(sl) pthread_init_lock(sl) #define ACQUIRE_LOCK(sl) pthread_mutex_lock(sl) #define RELEASE_LOCK(sl) pthread_mutex_unlock(sl) #define TRY_LOCK(sl) (!pthread_mutex_trylock(sl)) static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER; /* Cope with old-style linux recursive lock initialization by adding */ /* skipped internal declaration from pthread.h */ #ifdef linux #ifndef PTHREAD_MUTEX_RECURSIVE extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr, int __kind)); #define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP #define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y) #endif #endif static int pthread_init_lock (MLOCK_T *sl) { pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr)) return 1; if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1; if (pthread_mutex_init(sl, &attr)) return 1; if (pthread_mutexattr_destroy(&attr)) return 1; return 0; } #else /* WIN32 */ /* Win32 critical sections */ #define MLOCK_T CRITICAL_SECTION #define CURRENT_THREAD GetCurrentThreadId() #define INITIAL_LOCK(s) (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000)) #define ACQUIRE_LOCK(s) (EnterCriticalSection(s), 0) #define RELEASE_LOCK(s) LeaveCriticalSection(s) #define TRY_LOCK(s) TryEnterCriticalSection(s) #define NEED_GLOBAL_LOCK_INIT static MLOCK_T malloc_global_mutex; static volatile long malloc_global_mutex_status; /* Use spin loop to initialize global lock */ static void init_malloc_global_mutex() { for (;;) { long stat = malloc_global_mutex_status; if (stat > 0) return; /* transition to < 0 while initializing, then to > 0) */ if (stat == 0 && interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) { InitializeCriticalSection(&malloc_global_mutex); interlockedexchange(&malloc_global_mutex_status,1); return; } SleepEx(0, FALSE); } } #endif /* WIN32 */ #endif /* USE_SPIN_LOCKS */ #endif /* USE_LOCKS == 1 */ /* ----------------------- User-defined locks ------------------------ */ #if USE_LOCKS > 1 /* Define your own lock implementation here */ /* #define INITIAL_LOCK(sl) ... */ /* #define ACQUIRE_LOCK(sl) ... */ /* #define RELEASE_LOCK(sl) ... */ /* #define TRY_LOCK(sl) ... */ /* static MLOCK_T malloc_global_mutex = ... */ #endif /* USE_LOCKS > 1 */ /* ----------------------- Lock-based state ------------------------ */ #if USE_LOCKS #define USE_LOCK_BIT (2U) #else /* USE_LOCKS */ #define USE_LOCK_BIT (0U) #define INITIAL_LOCK(l) #endif /* USE_LOCKS */ #if USE_LOCKS #define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); #define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); #else /* USE_LOCKS */ #define ACQUIRE_MALLOC_GLOBAL_LOCK() #define RELEASE_MALLOC_GLOBAL_LOCK() #endif /* USE_LOCKS */ /* ----------------------- Chunk representations ------------------------ */ /* (The following includes lightly edited explanations by Colin Plumb.) The malloc_chunk declaration below is misleading (but accurate and necessary). It declares a "view" into memory allowing access to necessary fields at known offsets from a given base. Chunks of memory are maintained using a `boundary tag' method as originally described by Knuth. (See the paper by Paul Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such techniques.) Sizes of free chunks are stored both in the front of each chunk and at the end. This makes consolidating fragmented chunks into bigger chunks fast. The head fields also hold bits representing whether chunks are free or in use. Here are some pictures to make it clearer. They are "exploded" to show that the state of a chunk can be thought of as extending from the high 31 bits of the head field of its header through the prev_foot and PINUSE_BIT bit of the following chunk header. A chunk that's in use looks like: chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of previous chunk (if P = 0) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| | Size of this chunk 1| +-+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | +- -+ | | +- -+ | : +- size - sizeof(size_t) available payload bytes -+ : | chunk-> +- -+ | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| | Size of next chunk (may or may not be in use) | +-+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ And if it's free, it looks like this: chunk-> +- -+ | User payload (must be in use, or we would have merged!) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| | Size of this chunk 0| +-+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Next pointer | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Prev pointer | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | : +- size - sizeof(struct chunk) unused bytes -+ : | chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of this chunk | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| | Size of next chunk (must be in use, or we would have merged)| +-+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | : +- User payload -+ : | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| +-+ Note that since we always merge adjacent free chunks, the chunks adjacent to a free chunk must be in use. Given a pointer to a chunk (which can be derived trivially from the payload pointer) we can, in O(1) time, find out whether the adjacent chunks are free, and if so, unlink them from the lists that they are on and merge them with the current chunk. Chunks always begin on even word boundaries, so the mem portion (which is returned to the user) is also on an even word boundary, and thus at least double-word aligned. The P (PINUSE_BIT) bit, stored in the unused low-order bit of the chunk size (which is always a multiple of two words), is an in-use bit for the *previous* chunk. If that bit is *clear*, then the word before the current chunk size contains the previous chunk size, and can be used to find the front of the previous chunk. The very first chunk allocated always has this bit set, preventing access to non-existent (or non-owned) memory. If pinuse is set for any given chunk, then you CANNOT determine the size of the previous chunk, and might even get a memory addressing fault when trying to do so. The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of the chunk size redundantly records whether the current chunk is inuse. This redundancy enables usage checks within free and realloc, and reduces indirection when freeing and consolidating chunks. Each freshly allocated chunk must have both cinuse and pinuse set. That is, each allocated chunk borders either a previously allocated and still in-use chunk, or the base of its memory arena. This is ensured by making all allocations from the the `lowest' part of any found chunk. Further, no free chunk physically borders another one, so each free chunk is known to be preceded and followed by either inuse chunks or the ends of memory. Note that the `foot' of the current chunk is actually represented as the prev_foot of the NEXT chunk. This makes it easier to deal with alignments etc but can be very confusing when trying to extend or adapt this code. The exceptions to all this are 1. The special chunk `top' is the top-most available chunk (i.e., the one bordering the end of available memory). It is treated specially. Top is never included in any bin, is used only if no other chunk is available, and is released back to the system if it is very large (see M_TRIM_THRESHOLD). In effect, the top chunk is treated as larger (and thus less well fitting) than any other available chunk. The top chunk doesn't update its trailing size field since there is no next contiguous chunk that would have to index off it. However, space is still allocated for it (TOP_FOOT_SIZE) to enable separation or merging when space is extended. 3. Chunks allocated via mmap, which have the lowest-order bit (IS_MMAPPED_BIT) set in their prev_foot fields, and do not set PINUSE_BIT in their head fields. Because they are allocated one-by-one, each must carry its own prev_foot field, which is also used to hold the offset this chunk has within its mmapped region, which is needed to preserve alignment. Each mmapped chunk is trailed by the first two fields of a fake next-chunk for sake of usage checks. */ struct malloc_chunk { size_t prev_foot; /* Size of previous chunk (if free). */ size_t head; /* Size and inuse bits. */ struct malloc_chunk* fd; /* double links -- used only if free. */ struct malloc_chunk* bk; }; typedef struct malloc_chunk mchunk; typedef struct malloc_chunk* mchunkptr; typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ typedef unsigned int bindex_t; /* Described below */ typedef unsigned int binmap_t; /* Described below */ typedef unsigned int flag_t; /* The type of various bit flag sets */ /* ------------------- Chunks sizes and alignments ----------------------- */ #define MCHUNK_SIZE (sizeof(mchunk)) #if FOOTERS #define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) #else /* FOOTERS */ #define CHUNK_OVERHEAD (SIZE_T_SIZE) #endif /* FOOTERS */ /* MMapped chunks need a second word of overhead ... */ #define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) /* ... and additional padding for fake next-chunk at foot */ #define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES) /* The smallest size we can malloc is an aligned minimal chunk */ #define MIN_CHUNK_SIZE\ ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) /* conversion from malloc headers to user pointers, and back */ #define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES)) #define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) /* chunk associated with aligned address A */ #define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) /* Bounds on request (not chunk) sizes. */ #define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) #define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) /* pad request bytes into a usable size */ #define pad_request(req) \ (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) /* pad request, checking for minimum (but not maximum) */ #define request2size(req) \ (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) /* ------------------ Operations on head and foot fields ----------------- */ /* The head field of a chunk is or'ed with PINUSE_BIT when previous adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in use. If the chunk was obtained with mmap, the prev_foot field has IS_MMAPPED_BIT set, otherwise holding the offset of the base of the mmapped region to the base of the chunk. FLAG4_BIT is not used by this malloc, but might be useful in extensions. */ #define PINUSE_BIT (SIZE_T_ONE) #define CINUSE_BIT (SIZE_T_TWO) #define FLAG4_BIT (SIZE_T_FOUR) #define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) #define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT) /* Head value for fenceposts */ #define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) /* extraction of fields from head words */ #define cinuse(p) ((p)->head & CINUSE_BIT) #define pinuse(p) ((p)->head & PINUSE_BIT) #define chunksize(p) ((p)->head & ~(FLAG_BITS)) #define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) #define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT) /* Treat space at ptr +/- offset as a chunk */ #define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) #define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) /* Ptr to next or previous physical malloc_chunk. */ #define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS))) #define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) /* extract next chunk's pinuse bit */ #define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) /* Get/set size at footer */ #define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot) #define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) /* Set size, pinuse bit, and foot */ #define set_size_and_pinuse_of_free_chunk(p, s)\ ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) /* Set size, pinuse bit, foot, and clear next pinuse */ #define set_free_with_pinuse(p, s, n)\ (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) #define is_mmapped(p)\ (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_MMAPPED_BIT)) /* Get the internal overhead associated with chunk p */ #define overhead_for(p)\ (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) /* Return true if malloced space is not necessarily cleared */ #if MMAP_CLEARS #define calloc_must_clear(p) (!is_mmapped(p)) #else /* MMAP_CLEARS */ #define calloc_must_clear(p) (1) #endif /* MMAP_CLEARS */ /* ---------------------- Overlaid data structures ----------------------- */ /* When chunks are not in use, they are treated as nodes of either lists or trees. "Small" chunks are stored in circular doubly-linked lists, and look like this: chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of previous chunk | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `head:' | Size of chunk, in bytes |P| mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Forward pointer to next chunk in list | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Back pointer to previous chunk in list | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Unused space (may be 0 bytes long) . . . . | nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `foot:' | Size of chunk, in bytes | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Larger chunks are kept in a form of bitwise digital trees (aka tries) keyed on chunksizes. Because malloc_tree_chunks are only for free chunks greater than 256 bytes, their size doesn't impose any constraints on user chunk sizes. Each node looks like: chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of previous chunk | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `head:' | Size of chunk, in bytes |P| mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Forward pointer to next chunk of same size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Back pointer to previous chunk of same size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Pointer to left child (child[0]) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Pointer to right child (child[1]) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Pointer to parent | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | bin index of this chunk | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Unused space . . | nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `foot:' | Size of chunk, in bytes | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Each tree holding treenodes is a tree of unique chunk sizes. Chunks of the same size are arranged in a circularly-linked list, with only the oldest chunk (the next to be used, in our FIFO ordering) actually in the tree. (Tree members are distinguished by a non-null parent pointer.) If a chunk with the same size an an existing node is inserted, it is linked off the existing node using pointers that work in the same way as fd/bk pointers of small chunks. Each tree contains a power of 2 sized range of chunk sizes (the smallest is 0x100 <= x < 0x180), which is is divided in half at each tree level, with the chunks in the smaller half of the range (0x100 <= x < 0x140 for the top nose) in the left subtree and the larger half (0x140 <= x < 0x180) in the right subtree. This is, of course, done by inspecting individual bits. Using these rules, each node's left subtree contains all smaller sizes than its right subtree. However, the node at the root of each subtree has no particular ordering relationship to either. (The dividing line between the subtree sizes is based on trie relation.) If we remove the last chunk of a given size from the interior of the tree, we need to replace it with a leaf node. The tree ordering rules permit a node to be replaced by any leaf below it. The smallest chunk in a tree (a common operation in a best-fit allocator) can be found by walking a path to the leftmost leaf in the tree. Unlike a usual binary tree, where we follow left child pointers until we reach a null, here we follow the right child pointer any time the left one is null, until we reach a leaf with both child pointers null. The smallest chunk in the tree will be somewhere along that path. The worst case number of steps to add, find, or remove a node is bounded by the number of bits differentiating chunks within bins. Under current bin calculations, this ranges from 6 up to 21 (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case is of course much better. */ struct malloc_tree_chunk { /* The first four fields must be compatible with malloc_chunk */ size_t prev_foot; size_t head; struct malloc_tree_chunk* fd; struct malloc_tree_chunk* bk; struct malloc_tree_chunk* child[2]; struct malloc_tree_chunk* parent; bindex_t index; }; typedef struct malloc_tree_chunk tchunk; typedef struct malloc_tree_chunk* tchunkptr; typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ /* A little helper macro for trees */ #define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) /* ----------------------------- Segments -------------------------------- */ /* Each malloc space may include non-contiguous segments, held in a list headed by an embedded malloc_segment record representing the top-most space. Segments also include flags holding properties of the space. Large chunks that are directly allocated by mmap are not included in this list. They are instead independently created and destroyed without otherwise keeping track of them. Segment management mainly comes into play for spaces allocated by MMAP. Any call to MMAP might or might not return memory that is adjacent to an existing segment. MORECORE normally contiguously extends the current space, so this space is almost always adjacent, which is simpler and faster to deal with. (This is why MORECORE is used preferentially to MMAP when both are available -- see sys_alloc.) When allocating using MMAP, we don't use any of the hinting mechanisms (inconsistently) supported in various implementations of unix mmap, or distinguish reserving from committing memory. Instead, we just ask for space, and exploit contiguity when we get it. It is probably possible to do better than this on some systems, but no general scheme seems to be significantly better. Management entails a simpler variant of the consolidation scheme used for chunks to reduce fragmentation -- new adjacent memory is normally prepended or appended to an existing segment. However, there are limitations compared to chunk consolidation that mostly reflect the fact that segment processing is relatively infrequent (occurring only when getting memory from system) and that we don't expect to have huge numbers of segments: * Segments are not indexed, so traversal requires linear scans. (It would be possible to index these, but is not worth the extra overhead and complexity for most programs on most platforms.) * New segments are only appended to old ones when holding top-most memory; if they cannot be prepended to others, they are held in different segments. Except for the top-most segment of an mstate, each segment record is kept at the tail of its segment. Segments are added by pushing segment records onto the list headed by &mstate.seg for the containing mstate. Segment flags control allocation/merge/deallocation policies: * If EXTERN_BIT set, then we did not allocate this segment, and so should not try to deallocate or merge with others. (This currently holds only for the initial segment passed into create_mspace_with_base.) * If IS_MMAPPED_BIT set, the segment may be merged with other surrounding mmapped segments and trimmed/de-allocated using munmap. * If neither bit is set, then the segment was obtained using MORECORE so can be merged with surrounding MORECORE'd segments and deallocated/trimmed using MORECORE with negative arguments. */ struct malloc_segment { char* base; /* base address */ size_t size; /* allocated size */ struct malloc_segment* next; /* ptr to next segment */ flag_t sflags; /* mmap and extern flag */ }; #define is_mmapped_segment(S) ((S)->sflags & IS_MMAPPED_BIT) #define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) typedef struct malloc_segment msegment; typedef struct malloc_segment* msegmentptr; /* ---------------------------- malloc_state ----------------------------- */ /* A malloc_state holds all of the bookkeeping for a space. The main fields are: Top The topmost chunk of the currently active segment. Its size is cached in topsize. The actual size of topmost space is topsize+TOP_FOOT_SIZE, which includes space reserved for adding fenceposts and segment records if necessary when getting more space from the system. The size at which to autotrim top is cached from mparams in trim_check, except that it is disabled if an autotrim fails. Designated victim (dv) This is the preferred chunk for servicing small requests that don't have exact fits. It is normally the chunk split off most recently to service another small request. Its size is cached in dvsize. The link fields of this chunk are not maintained since it is not kept in a bin. SmallBins An array of bin headers for free chunks. These bins hold chunks with sizes less than MIN_LARGE_SIZE bytes. Each bin contains chunks of all the same size, spaced 8 bytes apart. To simplify use in double-linked lists, each bin header acts as a malloc_chunk pointing to the real first node, if it exists (else pointing to itself). This avoids special-casing for headers. But to avoid waste, we allocate only the fd/bk pointers of bins, and then use repositioning tricks to treat these as the fields of a chunk. TreeBins Treebins are pointers to the roots of trees holding a range of sizes. There are 2 equally spaced treebins for each power of two from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything larger. Bin maps There is one bit map for small bins ("smallmap") and one for treebins ("treemap). Each bin sets its bit when non-empty, and clears the bit when empty. Bit operations are then used to avoid bin-by-bin searching -- nearly all "search" is done without ever looking at bins that won't be selected. The bit maps conservatively use 32 bits per map word, even if on 64bit system. For a good description of some of the bit-based techniques used here, see Henry S. Warren Jr's book "Hacker's Delight" (and supplement at http://hackersdelight.org/). Many of these are intended to reduce the branchiness of paths through malloc etc, as well as to reduce the number of memory locations read or written. Segments A list of segments headed by an embedded malloc_segment record representing the initial space. Address check support The least_addr field is the least address ever obtained from MORECORE or MMAP. Attempted frees and reallocs of any address less than this are trapped (unless INSECURE is defined). Magic tag A cross-check field that should always hold same value as mparams.magic. Flags Bits recording whether to use MMAP, locks, or contiguous MORECORE Statistics Each space keeps track of current and maximum system memory obtained via MORECORE or MMAP. Trim support Fields holding the amount of unused topmost memory that should trigger timming, and a counter to force periodic scanning to release unused non-topmost segments. Locking If USE_LOCKS is defined, the "mutex" lock is acquired and released around every public call using this mspace. Extension support A void* pointer and a size_t field that can be used to help implement extensions to this malloc. */ /* Bin types, widths and sizes */ #define NSMALLBINS (32U) #define NTREEBINS (32U) #define SMALLBIN_SHIFT (3U) #define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) #define TREEBIN_SHIFT (8U) #define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) #define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) #define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) struct malloc_state { binmap_t smallmap; binmap_t treemap; size_t dvsize; size_t topsize; char* least_addr; mchunkptr dv; mchunkptr top; size_t trim_check; size_t release_checks; size_t magic; mchunkptr smallbins[(NSMALLBINS+1)*2]; tbinptr treebins[NTREEBINS]; size_t footprint; size_t max_footprint; flag_t mflags; #if USE_LOCKS MLOCK_T mutex; /* locate lock among fields that rarely change */ #endif /* USE_LOCKS */ msegment seg; void* extp; /* Unused but available for extensions */ size_t exts; }; typedef struct malloc_state* mstate; /* ------------- Global malloc_state and malloc_params ------------------- */ /* malloc_params holds global properties, including those that can be dynamically set using mallopt. There is a single instance, mparams, initialized in init_mparams. Note that the non-zeroness of "magic" also serves as an initialization flag. */ struct malloc_params { volatile size_t magic; size_t page_size; size_t granularity; size_t mmap_threshold; size_t trim_threshold; flag_t default_mflags; }; static struct malloc_params mparams; /* Ensure mparams initialized */ #define ensure_initialization() (mparams.magic != 0 || init_mparams()) #if !ONLY_MSPACES /* The global malloc_state used for all non-"mspace" calls */ static struct malloc_state _gm_; #define gm (&_gm_) #define is_global(M) ((M) == &_gm_) #endif /* !ONLY_MSPACES */ #define is_initialized(M) ((M)->top != 0) /* -------------------------- system alloc setup ------------------------- */ /* Operations on mflags */ #define use_lock(M) ((M)->mflags & USE_LOCK_BIT) #define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT) #define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT) #define use_mmap(M) ((M)->mflags & USE_MMAP_BIT) #define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT) #define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT) #define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT) #define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT) #define set_lock(M,L)\ ((M)->mflags = (L)?\ ((M)->mflags | USE_LOCK_BIT) :\ ((M)->mflags & ~USE_LOCK_BIT)) /* page-align a size */ #define page_align(S)\ (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE)) /* granularity-align a size */ #define granularity_align(S)\ (((S) + (mparams.granularity - SIZE_T_ONE))\ & ~(mparams.granularity - SIZE_T_ONE)) /* For mmap, use granularity alignment on windows, else page-align */ #ifdef WIN32 #define mmap_align(S) granularity_align(S) #else #define mmap_align(S) page_align(S) #endif /* For sys_alloc, enough padding to ensure can malloc request on success */ #define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) #define is_page_aligned(S)\ (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) #define is_granularity_aligned(S)\ (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) /* True if segment S holds address A */ #define segment_holds(S, A)\ ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) /* Return segment holding given address */ static msegmentptr segment_holding(mstate m, char* addr) { msegmentptr sp = &m->seg; for (;;) { if (addr >= sp->base && addr < sp->base + sp->size) return sp; if ((sp = sp->next) == 0) return 0; } } /* Return true if segment contains a segment link */ static int has_segment_link(mstate m, msegmentptr ss) { msegmentptr sp = &m->seg; for (;;) { if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) return 1; if ((sp = sp->next) == 0) return 0; } } #ifndef MORECORE_CANNOT_TRIM #define should_trim(M,s) ((s) > (M)->trim_check) #else /* MORECORE_CANNOT_TRIM */ #define should_trim(M,s) (0) #endif /* MORECORE_CANNOT_TRIM */ /* TOP_FOOT_SIZE is padding at the end of a segment, including space that may be needed to place segment records and fenceposts when new noncontiguous segments are added. */ #define TOP_FOOT_SIZE\ (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) /* ------------------------------- Hooks -------------------------------- */ /* PREACTION should be defined to return 0 on success, and nonzero on failure. If you are not using locking, you can redefine these to do anything you like. */ #if USE_LOCKS #define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) #define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } #else /* USE_LOCKS */ #ifndef PREACTION #define PREACTION(M) (0) #endif /* PREACTION */ #ifndef POSTACTION #define POSTACTION(M) #endif /* POSTACTION */ #endif /* USE_LOCKS */ /* CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. USAGE_ERROR_ACTION is triggered on detected bad frees and reallocs. The argument p is an address that might have triggered the fault. It is ignored by the two predefined actions, but might be useful in custom actions that try to help diagnose errors. */ #if PROCEED_ON_ERROR /* A count of the number of corruption errors causing resets */ int malloc_corruption_error_count; /* default corruption action */ static void reset_on_error(mstate m); #define CORRUPTION_ERROR_ACTION(m) reset_on_error(m) #define USAGE_ERROR_ACTION(m, p) #else /* PROCEED_ON_ERROR */ #ifndef CORRUPTION_ERROR_ACTION #define CORRUPTION_ERROR_ACTION(m) ABORT #endif /* CORRUPTION_ERROR_ACTION */ #ifndef USAGE_ERROR_ACTION #define USAGE_ERROR_ACTION(m,p) ABORT #endif /* USAGE_ERROR_ACTION */ #endif /* PROCEED_ON_ERROR */ /* -------------------------- Debugging setup ---------------------------- */ #if ! DEBUG #define check_free_chunk(M,P) #define check_inuse_chunk(M,P) #define check_malloced_chunk(M,P,N) #define check_mmapped_chunk(M,P) #define check_malloc_state(M) #define check_top_chunk(M,P) #else /* DEBUG */ #define check_free_chunk(M,P) do_check_free_chunk(M,P) #define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P) #define check_top_chunk(M,P) do_check_top_chunk(M,P) #define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) #define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P) #define check_malloc_state(M) do_check_malloc_state(M) static void do_check_any_chunk(mstate m, mchunkptr p); static void do_check_top_chunk(mstate m, mchunkptr p); static void do_check_mmapped_chunk(mstate m, mchunkptr p); static void do_check_inuse_chunk(mstate m, mchunkptr p); static void do_check_free_chunk(mstate m, mchunkptr p); static void do_check_malloced_chunk(mstate m, void* mem, size_t s); static void do_check_tree(mstate m, tchunkptr t); static void do_check_treebin(mstate m, bindex_t i); static void do_check_smallbin(mstate m, bindex_t i); static void do_check_malloc_state(mstate m); static int bin_find(mstate m, mchunkptr x); static size_t traverse_and_check(mstate m); #endif /* DEBUG */ /* ---------------------------- Indexing Bins ---------------------------- */ #define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) #define small_index(s) ((s) >> SMALLBIN_SHIFT) #define small_index2size(i) ((i) << SMALLBIN_SHIFT) #define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) /* addressing by index. See above about smallbin repositioning */ #define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) #define treebin_at(M,i) (&((M)->treebins[i])) /* assign tree index for size S to variable I. Use x86 asm if possible */ #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) #define compute_tree_index(S, I)\ {\ unsigned int X = S >> TREEBIN_SHIFT;\ if (X == 0)\ I = 0;\ else if (X > 0xFFFF)\ I = NTREEBINS-1;\ else {\ unsigned int K;\ __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "rm" (X));\ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ }\ } #elif defined (__INTEL_COMPILER) #define compute_tree_index(S, I)\ {\ size_t X = S >> TREEBIN_SHIFT;\ if (X == 0)\ I = 0;\ else if (X > 0xFFFF)\ I = NTREEBINS-1;\ else {\ unsigned int K = _bit_scan_reverse (X); \ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ }\ } #elif defined(_MSC_VER) && _MSC_VER>=1300 #define compute_tree_index(S, I)\ {\ size_t X = S >> TREEBIN_SHIFT;\ if (X == 0)\ I = 0;\ else if (X > 0xFFFF)\ I = NTREEBINS-1;\ else {\ unsigned int K;\ _BitScanReverse((DWORD *) &K, X);\ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ }\ } #else /* GNUC */ #define compute_tree_index(S, I)\ {\ size_t X = S >> TREEBIN_SHIFT;\ if (X == 0)\ I = 0;\ else if (X > 0xFFFF)\ I = NTREEBINS-1;\ else {\ unsigned int Y = (unsigned int)X;\ unsigned int N = ((Y - 0x100) >> 16) & 8;\ unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ N += K;\ N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ K = 14 - N + ((Y <<= K) >> 15);\ I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ }\ } #endif /* GNUC */ /* Bit representing maximum resolved size in a treebin at i */ #define bit_for_tree_index(i) \ (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) /* Shift placing maximum resolved bit in a treebin at i as sign bit */ #define leftshift_for_tree_index(i) \ ((i == NTREEBINS-1)? 0 : \ ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) /* The size of the smallest chunk held in bin with index i */ #define minsize_for_tree_index(i) \ ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) /* ------------------------ Operations on bin maps ----------------------- */ /* bit corresponding to given index */ #define idx2bit(i) ((binmap_t)(1) << (i)) /* Mark/Clear bits with given index */ #define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) #define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) #define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) #define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) #define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) #define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) /* isolate the least set bit of a bitmap */ #define least_bit(x) ((x) & -(x)) /* mask with all bits to left of least bit of x on */ #define left_bits(x) ((x<<1) | -(x<<1)) /* mask with all bits to left of or equal to least bit of x on */ #define same_or_left_bits(x) ((x) | -(x)) /* index corresponding to given bit. Use x86 asm if possible */ #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) #define compute_bit2idx(X, I)\ {\ unsigned int J;\ __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "rm" (X));\ I = (bindex_t)J;\ } #elif defined (__INTEL_COMPILER) #define compute_bit2idx(X, I)\ {\ unsigned int J;\ J = _bit_scan_forward (X); \ I = (bindex_t)J;\ } #elif defined(_MSC_VER) && _MSC_VER>=1300 #define compute_bit2idx(X, I)\ {\ unsigned int J;\ _BitScanForward((DWORD *) &J, X);\ I = (bindex_t)J;\ } #elif USE_BUILTIN_FFS #define compute_bit2idx(X, I) I = ffs(X)-1 #else #define compute_bit2idx(X, I)\ {\ unsigned int Y = X - 1;\ unsigned int K = Y >> (16-4) & 16;\ unsigned int N = K; Y >>= K;\ N += K = Y >> (8-3) & 8; Y >>= K;\ N += K = Y >> (4-2) & 4; Y >>= K;\ N += K = Y >> (2-1) & 2; Y >>= K;\ N += K = Y >> (1-0) & 1; Y >>= K;\ I = (bindex_t)(N + Y);\ } #endif /* GNUC */ /* ----------------------- Runtime Check Support ------------------------- */ /* For security, the main invariant is that malloc/free/etc never writes to a static address other than malloc_state, unless static malloc_state itself has been corrupted, which cannot occur via malloc (because of these checks). In essence this means that we believe all pointers, sizes, maps etc held in malloc_state, but check all of those linked or offsetted from other embedded data structures. These checks are interspersed with main code in a way that tends to minimize their run-time cost. When FOOTERS is defined, in addition to range checking, we also verify footer fields of inuse chunks, which can be used guarantee that the mstate controlling malloc/free is intact. This is a streamlined version of the approach described by William Robertson et al in "Run-time Detection of Heap-based Overflows" LISA'03 http://www.usenix.org/events/lisa03/tech/robertson.html The footer of an inuse chunk holds the xor of its mstate and a random seed, that is checked upon calls to free() and realloc(). This is (probablistically) unguessable from outside the program, but can be computed by any code successfully malloc'ing any chunk, so does not itself provide protection against code that has already broken security through some other means. Unlike Robertson et al, we always dynamically check addresses of all offset chunks (previous, next, etc). This turns out to be cheaper than relying on hashes. */ #if !INSECURE /* Check if address a is at least as high as any from MORECORE or MMAP */ #define ok_address(M, a) ((char*)(a) >= (M)->least_addr) /* Check if address of next chunk n is higher than base chunk p */ #define ok_next(p, n) ((char*)(p) < (char*)(n)) /* Check if p has its cinuse bit on */ #define ok_cinuse(p) cinuse(p) /* Check if p has its pinuse bit on */ #define ok_pinuse(p) pinuse(p) #else /* !INSECURE */ #define ok_address(M, a) (1) #define ok_next(b, n) (1) #define ok_cinuse(p) (1) #define ok_pinuse(p) (1) #endif /* !INSECURE */ #if (FOOTERS && !INSECURE) /* Check if (alleged) mstate m has expected magic field */ #define ok_magic(M) ((M)->magic == mparams.magic) #else /* (FOOTERS && !INSECURE) */ #define ok_magic(M) (1) #endif /* (FOOTERS && !INSECURE) */ /* In gcc, use __builtin_expect to minimize impact of checks */ #if !INSECURE #if defined(__GNUC__) && __GNUC__ >= 3 #define RTCHECK(e) __builtin_expect(e, 1) #else /* GNUC */ #define RTCHECK(e) (e) #endif /* GNUC */ #else /* !INSECURE */ #define RTCHECK(e) (1) #endif /* !INSECURE */ /* macros to set up inuse chunks with or without footers */ #if !FOOTERS #define mark_inuse_foot(M,p,s) /* Set cinuse bit and pinuse bit of next chunk */ #define set_inuse(M,p,s)\ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) /* Set cinuse and pinuse of this chunk and pinuse of next chunk */ #define set_inuse_and_pinuse(M,p,s)\ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) /* Set size, cinuse and pinuse bit of this chunk */ #define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) #else /* FOOTERS */ /* Set foot of inuse chunk to be xor of mstate and seed */ #define mark_inuse_foot(M,p,s)\ (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) #define get_mstate_for(p)\ ((mstate)(((mchunkptr)((char*)(p) +\ (chunksize(p))))->prev_foot ^ mparams.magic)) #define set_inuse(M,p,s)\ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ mark_inuse_foot(M,p,s)) #define set_inuse_and_pinuse(M,p,s)\ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ mark_inuse_foot(M,p,s)) #define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ mark_inuse_foot(M, p, s)) #endif /* !FOOTERS */ /* ---------------------------- setting mparams -------------------------- */ /* Initialize mparams */ static int init_mparams(void) { #ifdef NEED_GLOBAL_LOCK_INIT if (malloc_global_mutex_status <= 0) init_malloc_global_mutex(); #endif ACQUIRE_MALLOC_GLOBAL_LOCK(); if (mparams.magic == 0) { size_t magic; size_t psize; size_t gsize; #ifndef WIN32 psize = malloc_getpagesize; gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); #else /* WIN32 */ { SYSTEM_INFO system_info; GetSystemInfo(&system_info); psize = system_info.dwPageSize; gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); } #endif /* WIN32 */ /* Sanity-check configuration: size_t must be unsigned and as wide as pointer type. ints must be at least 4 bytes. alignment must be at least 8. Alignment, min chunk size, and page size must all be powers of 2. */ if ((sizeof(size_t) != sizeof(char*)) || (MAX_SIZE_T < MIN_CHUNK_SIZE) || (sizeof(int) < 4) || (MALLOC_ALIGNMENT < (size_t)8U) || ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || ((gsize & (gsize-SIZE_T_ONE)) != 0) || ((psize & (psize-SIZE_T_ONE)) != 0)) ABORT; mparams.granularity = gsize; mparams.page_size = psize; mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; #if MORECORE_CONTIGUOUS mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; #else /* MORECORE_CONTIGUOUS */ mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; #endif /* MORECORE_CONTIGUOUS */ #if !ONLY_MSPACES /* Set up lock for main malloc area */ gm->mflags = mparams.default_mflags; INITIAL_LOCK(&gm->mutex); #endif #if (FOOTERS && !INSECURE) { #if USE_DEV_RANDOM int fd; unsigned char buf[sizeof(size_t)]; /* Try to use /dev/urandom, else fall back on using time */ if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && read(fd, buf, sizeof(buf)) == sizeof(buf)) { magic = *((size_t *) buf); close(fd); } else #endif /* USE_DEV_RANDOM */ #ifdef WIN32 magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); #else magic = (size_t)(time(0) ^ (size_t)0x55555555U); #endif magic |= (size_t)8U; /* ensure nonzero */ magic &= ~(size_t)7U; /* improve chances of fault for bad values */ } #else /* (FOOTERS && !INSECURE) */ magic = (size_t)0x58585858U; #endif /* (FOOTERS && !INSECURE) */ mparams.magic = magic; } RELEASE_MALLOC_GLOBAL_LOCK(); return 1; } /* support for mallopt */ static int change_mparam(int param_number, int value) { size_t val = (value == -1)? MAX_SIZE_T : (size_t)value; ensure_initialization(); switch(param_number) { case M_TRIM_THRESHOLD: mparams.trim_threshold = val; return 1; case M_GRANULARITY: if (val >= mparams.page_size && ((val & (val-1)) == 0)) { mparams.granularity = val; return 1; } else return 0; case M_MMAP_THRESHOLD: mparams.mmap_threshold = val; return 1; default: return 0; } } #if DEBUG /* ------------------------- Debugging Support --------------------------- */ /* Check properties of any chunk, whether free, inuse, mmapped etc */ static void do_check_any_chunk(mstate m, mchunkptr p) { assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); assert(ok_address(m, p)); } /* Check properties of top chunk */ static void do_check_top_chunk(mstate m, mchunkptr p) { msegmentptr sp = segment_holding(m, (char*)p); size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */ assert(sp != 0); assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); assert(ok_address(m, p)); assert(sz == m->topsize); assert(sz > 0); assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); assert(pinuse(p)); assert(!pinuse(chunk_plus_offset(p, sz))); } /* Check properties of (inuse) mmapped chunks */ static void do_check_mmapped_chunk(mstate m, mchunkptr p) { size_t sz = chunksize(p); size_t len = (sz + (p->prev_foot & ~IS_MMAPPED_BIT) + MMAP_FOOT_PAD); assert(is_mmapped(p)); assert(use_mmap(m)); assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); assert(ok_address(m, p)); assert(!is_small(sz)); assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); } /* Check properties of inuse chunks */ static void do_check_inuse_chunk(mstate m, mchunkptr p) { do_check_any_chunk(m, p); assert(cinuse(p)); assert(next_pinuse(p)); /* If not pinuse and not mmapped, previous chunk has OK offset */ assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); if (is_mmapped(p)) do_check_mmapped_chunk(m, p); } /* Check properties of free chunks */ static void do_check_free_chunk(mstate m, mchunkptr p) { size_t sz = chunksize(p); mchunkptr next = chunk_plus_offset(p, sz); do_check_any_chunk(m, p); assert(!cinuse(p)); assert(!next_pinuse(p)); assert (!is_mmapped(p)); if (p != m->dv && p != m->top) { if (sz >= MIN_CHUNK_SIZE) { assert((sz & CHUNK_ALIGN_MASK) == 0); assert(is_aligned(chunk2mem(p))); assert(next->prev_foot == sz); assert(pinuse(p)); assert (next == m->top || cinuse(next)); assert(p->fd->bk == p); assert(p->bk->fd == p); } else /* markers are always of size SIZE_T_SIZE */ assert(sz == SIZE_T_SIZE); } } /* Check properties of malloced chunks at the point they are malloced */ static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { if (mem != 0) { mchunkptr p = mem2chunk(mem); size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT); do_check_inuse_chunk(m, p); assert((sz & CHUNK_ALIGN_MASK) == 0); assert(sz >= MIN_CHUNK_SIZE); assert(sz >= s); /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); } } /* Check a tree and its subtrees. */ static void do_check_tree(mstate m, tchunkptr t) { tchunkptr head = 0; tchunkptr u = t; bindex_t tindex = t->index; size_t tsize = chunksize(t); bindex_t idx; compute_tree_index(tsize, idx); assert(tindex == idx); assert(tsize >= MIN_LARGE_SIZE); assert(tsize >= minsize_for_tree_index(idx)); assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); do { /* traverse through chain of same-sized nodes */ do_check_any_chunk(m, ((mchunkptr)u)); assert(u->index == tindex); assert(chunksize(u) == tsize); assert(!cinuse(u)); assert(!next_pinuse(u)); assert(u->fd->bk == u); assert(u->bk->fd == u); if (u->parent == 0) { assert(u->child[0] == 0); assert(u->child[1] == 0); } else { assert(head == 0); /* only one node on chain has parent */ head = u; assert(u->parent != u); assert (u->parent->child[0] == u || u->parent->child[1] == u || *((tbinptr*)(u->parent)) == u); if (u->child[0] != 0) { assert(u->child[0]->parent == u); assert(u->child[0] != u); do_check_tree(m, u->child[0]); } if (u->child[1] != 0) { assert(u->child[1]->parent == u); assert(u->child[1] != u); do_check_tree(m, u->child[1]); } if (u->child[0] != 0 && u->child[1] != 0) { assert(chunksize(u->child[0]) < chunksize(u->child[1])); } } u = u->fd; } while (u != t); assert(head != 0); } /* Check all the chunks in a treebin. */ static void do_check_treebin(mstate m, bindex_t i) { tbinptr* tb = treebin_at(m, i); tchunkptr t = *tb; int empty = (m->treemap & (1U << i)) == 0; if (t == 0) assert(empty); if (!empty) do_check_tree(m, t); } /* Check all the chunks in a smallbin. */ static void do_check_smallbin(mstate m, bindex_t i) { sbinptr b = smallbin_at(m, i); mchunkptr p = b->bk; unsigned int empty = (m->smallmap & (1U << i)) == 0; if (p == b) assert(empty); if (!empty) { for (; p != b; p = p->bk) { size_t size = chunksize(p); mchunkptr q; /* each chunk claims to be free */ do_check_free_chunk(m, p); /* chunk belongs in bin */ assert(small_index(size) == i); assert(p->bk == b || chunksize(p->bk) == chunksize(p)); /* chunk is followed by an inuse chunk */ q = next_chunk(p); if (q->head != FENCEPOST_HEAD) do_check_inuse_chunk(m, q); } } } /* Find x in a bin. Used in other check functions. */ static int bin_find(mstate m, mchunkptr x) { size_t size = chunksize(x); if (is_small(size)) { bindex_t sidx = small_index(size); sbinptr b = smallbin_at(m, sidx); if (smallmap_is_marked(m, sidx)) { mchunkptr p = b; do { if (p == x) return 1; } while ((p = p->fd) != b); } } else { bindex_t tidx; compute_tree_index(size, tidx); if (treemap_is_marked(m, tidx)) { tchunkptr t = *treebin_at(m, tidx); size_t sizebits = size << leftshift_for_tree_index(tidx); while (t != 0 && chunksize(t) != size) { t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; sizebits <<= 1; } if (t != 0) { tchunkptr u = t; do { if (u == (tchunkptr)x) return 1; } while ((u = u->fd) != t); } } } return 0; } /* Traverse each chunk and check it; return total */ static size_t traverse_and_check(mstate m) { size_t sum = 0; if (is_initialized(m)) { msegmentptr s = &m->seg; sum += m->topsize + TOP_FOOT_SIZE; while (s != 0) { mchunkptr q = align_as_chunk(s->base); mchunkptr lastq = 0; assert(pinuse(q)); while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { sum += chunksize(q); if (cinuse(q)) { assert(!bin_find(m, q)); do_check_inuse_chunk(m, q); } else { assert(q == m->dv || bin_find(m, q)); assert(lastq == 0 || cinuse(lastq)); /* Not 2 consecutive free */ do_check_free_chunk(m, q); } lastq = q; q = next_chunk(q); } s = s->next; } } return sum; } /* Check all properties of malloc_state. */ static void do_check_malloc_state(mstate m) { bindex_t i; size_t total; /* check bins */ for (i = 0; i < NSMALLBINS; ++i) do_check_smallbin(m, i); for (i = 0; i < NTREEBINS; ++i) do_check_treebin(m, i); if (m->dvsize != 0) { /* check dv chunk */ do_check_any_chunk(m, m->dv); assert(m->dvsize == chunksize(m->dv)); assert(m->dvsize >= MIN_CHUNK_SIZE); assert(bin_find(m, m->dv) == 0); } if (m->top != 0) { /* check top chunk */ do_check_top_chunk(m, m->top); /*assert(m->topsize == chunksize(m->top)); redundant */ assert(m->topsize > 0); assert(bin_find(m, m->top) == 0); } total = traverse_and_check(m); assert(total <= m->footprint); assert(m->footprint <= m->max_footprint); } #endif /* DEBUG */ /* ----------------------------- statistics ------------------------------ */ #if !NO_MALLINFO static struct mallinfo internal_mallinfo(mstate m) { struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; ensure_initialization(); if (!PREACTION(m)) { check_malloc_state(m); if (is_initialized(m)) { size_t nfree = SIZE_T_ONE; /* top always free */ size_t mfree = m->topsize + TOP_FOOT_SIZE; size_t sum = mfree; msegmentptr s = &m->seg; while (s != 0) { mchunkptr q = align_as_chunk(s->base); while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { size_t sz = chunksize(q); sum += sz; if (!cinuse(q)) { mfree += sz; ++nfree; } q = next_chunk(q); } s = s->next; } nm.arena = sum; nm.ordblks = nfree; nm.hblkhd = m->footprint - sum; nm.usmblks = m->max_footprint; nm.uordblks = m->footprint - mfree; nm.fordblks = mfree; nm.keepcost = m->topsize; } POSTACTION(m); } return nm; } #endif /* !NO_MALLINFO */ static void internal_malloc_stats(mstate m) { ensure_initialization(); if (!PREACTION(m)) { size_t maxfp = 0; size_t fp = 0; size_t used = 0; check_malloc_state(m); if (is_initialized(m)) { msegmentptr s = &m->seg; maxfp = m->max_footprint; fp = m->footprint; used = fp - (m->topsize + TOP_FOOT_SIZE); while (s != 0) { mchunkptr q = align_as_chunk(s->base); while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { if (!cinuse(q)) used -= chunksize(q); q = next_chunk(q); } s = s->next; } } fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp)); fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used)); POSTACTION(m); } } /* ----------------------- Operations on smallbins ----------------------- */ /* Various forms of linking and unlinking are defined as macros. Even the ones for trees, which are very long but have very short typical paths. This is ugly but reduces reliance on inlining support of compilers. */ /* Link a free chunk into a smallbin */ #define insert_small_chunk(M, P, S) {\ bindex_t I = small_index(S);\ mchunkptr B = smallbin_at(M, I);\ mchunkptr F = B;\ assert(S >= MIN_CHUNK_SIZE);\ if (!smallmap_is_marked(M, I))\ mark_smallmap(M, I);\ else if (RTCHECK(ok_address(M, B->fd)))\ F = B->fd;\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ B->fd = P;\ F->bk = P;\ P->fd = F;\ P->bk = B;\ } /* Unlink a chunk from a smallbin */ #define unlink_small_chunk(M, P, S) {\ mchunkptr F = P->fd;\ mchunkptr B = P->bk;\ bindex_t I = small_index(S);\ assert(P != B);\ assert(P != F);\ assert(chunksize(P) == small_index2size(I));\ if (F == B)\ clear_smallmap(M, I);\ else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\ (B == smallbin_at(M,I) || ok_address(M, B)))) {\ F->bk = B;\ B->fd = F;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ } /* Unlink the first chunk from a smallbin */ #define unlink_first_small_chunk(M, B, P, I) {\ mchunkptr F = P->fd;\ assert(P != B);\ assert(P != F);\ assert(chunksize(P) == small_index2size(I));\ if (B == F)\ clear_smallmap(M, I);\ else if (RTCHECK(ok_address(M, F))) {\ B->fd = F;\ F->bk = B;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ } /* Replace dv node, binning the old one */ /* Used only when dvsize known to be small */ #define replace_dv(M, P, S) {\ size_t DVS = M->dvsize;\ if (DVS != 0) {\ mchunkptr DV = M->dv;\ assert(is_small(DVS));\ insert_small_chunk(M, DV, DVS);\ }\ M->dvsize = S;\ M->dv = P;\ } /* ------------------------- Operations on trees ------------------------- */ /* Insert chunk into tree */ #define insert_large_chunk(M, X, S) {\ tbinptr* H;\ bindex_t I;\ compute_tree_index(S, I);\ H = treebin_at(M, I);\ X->index = I;\ X->child[0] = X->child[1] = 0;\ if (!treemap_is_marked(M, I)) {\ mark_treemap(M, I);\ *H = X;\ X->parent = (tchunkptr)H;\ X->fd = X->bk = X;\ }\ else {\ tchunkptr T = *H;\ size_t K = S << leftshift_for_tree_index(I);\ for (;;) {\ if (chunksize(T) != S) {\ tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ K <<= 1;\ if (*C != 0)\ T = *C;\ else if (RTCHECK(ok_address(M, C))) {\ *C = X;\ X->parent = T;\ X->fd = X->bk = X;\ break;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ break;\ }\ }\ else {\ tchunkptr F = T->fd;\ if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ T->fd = F->bk = X;\ X->fd = F;\ X->bk = T;\ X->parent = 0;\ break;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ break;\ }\ }\ }\ }\ } /* Unlink steps: 1. If x is a chained node, unlink it from its same-sized fd/bk links and choose its bk node as its replacement. 2. If x was the last node of its size, but not a leaf node, it must be replaced with a leaf node (not merely one with an open left or right), to make sure that lefts and rights of descendents correspond properly to bit masks. We use the rightmost descendent of x. We could use any other leaf, but this is easy to locate and tends to counteract removal of leftmosts elsewhere, and so keeps paths shorter than minimally guaranteed. This doesn't loop much because on average a node in a tree is near the bottom. 3. If x is the base of a chain (i.e., has parent links) relink x's parent and children to x's replacement (or null if none). */ #define unlink_large_chunk(M, X) {\ tchunkptr XP = X->parent;\ tchunkptr R;\ if (X->bk != X) {\ tchunkptr F = X->fd;\ R = X->bk;\ if (RTCHECK(ok_address(M, F))) {\ F->bk = R;\ R->fd = F;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ }\ else {\ tchunkptr* RP;\ if (((R = *(RP = &(X->child[1]))) != 0) ||\ ((R = *(RP = &(X->child[0]))) != 0)) {\ tchunkptr* CP;\ while ((*(CP = &(R->child[1])) != 0) ||\ (*(CP = &(R->child[0])) != 0)) {\ R = *(RP = CP);\ }\ if (RTCHECK(ok_address(M, RP)))\ *RP = 0;\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ }\ }\ if (XP != 0) {\ tbinptr* H = treebin_at(M, X->index);\ if (X == *H) {\ if ((*H = R) == 0) \ clear_treemap(M, X->index);\ }\ else if (RTCHECK(ok_address(M, XP))) {\ if (XP->child[0] == X) \ XP->child[0] = R;\ else \ XP->child[1] = R;\ }\ else\ CORRUPTION_ERROR_ACTION(M);\ if (R != 0) {\ if (RTCHECK(ok_address(M, R))) {\ tchunkptr C0, C1;\ R->parent = XP;\ if ((C0 = X->child[0]) != 0) {\ if (RTCHECK(ok_address(M, C0))) {\ R->child[0] = C0;\ C0->parent = R;\ }\ else\ CORRUPTION_ERROR_ACTION(M);\ }\ if ((C1 = X->child[1]) != 0) {\ if (RTCHECK(ok_address(M, C1))) {\ R->child[1] = C1;\ C1->parent = R;\ }\ else\ CORRUPTION_ERROR_ACTION(M);\ }\ }\ else\ CORRUPTION_ERROR_ACTION(M);\ }\ }\ } /* Relays to large vs small bin operations */ #define insert_chunk(M, P, S)\ if (is_small(S)) insert_small_chunk(M, P, S)\ else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } #define unlink_chunk(M, P, S)\ if (is_small(S)) unlink_small_chunk(M, P, S)\ else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } /* Relays to internal calls to malloc/free from realloc, memalign etc */ #if ONLY_MSPACES #define internal_malloc(m, b) mspace_malloc(m, b) #define internal_free(m, mem) mspace_free(m,mem); #else /* ONLY_MSPACES */ #if MSPACES #define internal_malloc(m, b)\ (m == gm)? dlmalloc(b) : mspace_malloc(m, b) #define internal_free(m, mem)\ if (m == gm) dlfree(mem); else mspace_free(m,mem); #else /* MSPACES */ #define internal_malloc(m, b) dlmalloc(b) #define internal_free(m, mem) dlfree(mem) #endif /* MSPACES */ #endif /* ONLY_MSPACES */ /* ----------------------- Direct-mmapping chunks ----------------------- */ /* Directly mmapped chunks are set up with an offset to the start of the mmapped region stored in the prev_foot field of the chunk. This allows reconstruction of the required argument to MUNMAP when freed, and also allows adjustment of the returned chunk to meet alignment requirements (especially in memalign). There is also enough space allocated to hold a fake next chunk of size SIZE_T_SIZE to maintain the PINUSE bit so frees can be checked. */ /* Malloc using mmap */ static void* mmap_alloc(mstate m, size_t nb) { size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); if (mmsize > nb) { /* Check for wrap around 0 */ char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); if (mm != CMFAIL) { size_t offset = align_offset(chunk2mem(mm)); size_t psize = mmsize - offset - MMAP_FOOT_PAD; mchunkptr p = (mchunkptr)(mm + offset); p->prev_foot = offset | IS_MMAPPED_BIT; (p)->head = (psize|CINUSE_BIT); mark_inuse_foot(m, p, psize); chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; if (mm < m->least_addr) m->least_addr = mm; if ((m->footprint += mmsize) > m->max_footprint) m->max_footprint = m->footprint; assert(is_aligned(chunk2mem(p))); check_mmapped_chunk(m, p); return chunk2mem(p); } } return 0; } /* Realloc using mmap */ static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) { size_t oldsize = chunksize(oldp); if (is_small(nb)) /* Can't shrink mmap regions below small size */ return 0; /* Keep old chunk if big enough but not too big */ if (oldsize >= nb + SIZE_T_SIZE && (oldsize - nb) <= (mparams.granularity << 1)) return oldp; else { size_t offset = oldp->prev_foot & ~IS_MMAPPED_BIT; size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); char* cp = (char*)(intptr_t)CALL_MREMAP((char*)oldp - offset, oldmmsize, newmmsize, 1); if (cp != CMFAIL) { mchunkptr newp = (mchunkptr)(cp + offset); size_t psize = newmmsize - offset - MMAP_FOOT_PAD; newp->head = (psize|CINUSE_BIT); mark_inuse_foot(m, newp, psize); chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; if (cp < m->least_addr) m->least_addr = cp; if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) m->max_footprint = m->footprint; check_mmapped_chunk(m, newp); return newp; } } return 0; } /* -------------------------- mspace management -------------------------- */ /* Initialize top chunk and its size */ static void init_top(mstate m, mchunkptr p, size_t psize) { /* Ensure alignment */ size_t offset = align_offset(chunk2mem(p)); p = (mchunkptr)((char*)p + offset); psize -= offset; m->top = p; m->topsize = psize; p->head = psize | PINUSE_BIT; /* set size of fake trailing chunk holding overhead space only once */ chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; m->trim_check = mparams.trim_threshold; /* reset on each update */ } /* Initialize bins for a new mstate that is otherwise zeroed out */ static void init_bins(mstate m) { /* Establish circular links for smallbins */ bindex_t i; for (i = 0; i < NSMALLBINS; ++i) { sbinptr bin = smallbin_at(m,i); bin->fd = bin->bk = bin; } } #if PROCEED_ON_ERROR /* default corruption action */ static void reset_on_error(mstate m) { int i; ++malloc_corruption_error_count; /* Reinitialize fields to forget about all memory */ m->smallbins = m->treebins = 0; m->dvsize = m->topsize = 0; m->seg.base = 0; m->seg.size = 0; m->seg.next = 0; m->top = m->dv = 0; for (i = 0; i < NTREEBINS; ++i) *treebin_at(m, i) = 0; init_bins(m); } #endif /* PROCEED_ON_ERROR */ /* Allocate chunk and prepend remainder with chunk in successor base. */ static void* prepend_alloc(mstate m, char* newbase, char* oldbase, size_t nb) { mchunkptr p = align_as_chunk(newbase); mchunkptr oldfirst = align_as_chunk(oldbase); size_t psize = (char*)oldfirst - (char*)p; mchunkptr q = chunk_plus_offset(p, nb); size_t qsize = psize - nb; set_size_and_pinuse_of_inuse_chunk(m, p, nb); assert((char*)oldfirst > (char*)q); assert(pinuse(oldfirst)); assert(qsize >= MIN_CHUNK_SIZE); /* consolidate remainder with first chunk of old base */ if (oldfirst == m->top) { size_t tsize = m->topsize += qsize; m->top = q; q->head = tsize | PINUSE_BIT; check_top_chunk(m, q); } else if (oldfirst == m->dv) { size_t dsize = m->dvsize += qsize; m->dv = q; set_size_and_pinuse_of_free_chunk(q, dsize); } else { if (!cinuse(oldfirst)) { size_t nsize = chunksize(oldfirst); unlink_chunk(m, oldfirst, nsize); oldfirst = chunk_plus_offset(oldfirst, nsize); qsize += nsize; } set_free_with_pinuse(q, qsize, oldfirst); insert_chunk(m, q, qsize); check_free_chunk(m, q); } check_malloced_chunk(m, chunk2mem(p), nb); return chunk2mem(p); } /* Add a segment to hold a new noncontiguous region */ static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { /* Determine locations and sizes of segment, fenceposts, old top */ char* old_top = (char*)m->top; msegmentptr oldsp = segment_holding(m, old_top); char* old_end = oldsp->base + oldsp->size; size_t ssize = pad_request(sizeof(struct malloc_segment)); char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); size_t offset = align_offset(chunk2mem(rawsp)); char* asp = rawsp + offset; char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; mchunkptr sp = (mchunkptr)csp; msegmentptr ss = (msegmentptr)(chunk2mem(sp)); mchunkptr tnext = chunk_plus_offset(sp, ssize); mchunkptr p = tnext; int nfences = 0; /* reset top to new space */ init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); /* Set up segment record */ assert(is_aligned(ss)); set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); *ss = m->seg; /* Push current record */ m->seg.base = tbase; m->seg.size = tsize; m->seg.sflags = mmapped; m->seg.next = ss; /* Insert trailing fenceposts */ for (;;) { mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); p->head = FENCEPOST_HEAD; ++nfences; if ((char*)(&(nextp->head)) < old_end) p = nextp; else break; } assert(nfences >= 2); /* Insert the rest of old top into a bin as an ordinary free chunk */ if (csp != old_top) { mchunkptr q = (mchunkptr)old_top; size_t psize = csp - old_top; mchunkptr tn = chunk_plus_offset(q, psize); set_free_with_pinuse(q, psize, tn); insert_chunk(m, q, psize); } check_top_chunk(m, m->top); } /* -------------------------- System allocation -------------------------- */ /* Get memory from system using MORECORE or MMAP */ static void* sys_alloc(mstate m, size_t nb) { char* tbase = CMFAIL; size_t tsize = 0; flag_t mmap_flag = 0; ensure_initialization(); /* Directly map large chunks */ if (use_mmap(m) && nb >= mparams.mmap_threshold) { void* mem = mmap_alloc(m, nb); if (mem != 0) return mem; } /* Try getting memory in any of three ways (in most-preferred to least-preferred order): 1. A call to MORECORE that can normally contiguously extend memory. (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or or main space is mmapped or a previous contiguous call failed) 2. A call to MMAP new space (disabled if not HAVE_MMAP). Note that under the default settings, if MORECORE is unable to fulfill a request, and HAVE_MMAP is true, then mmap is used as a noncontiguous system allocator. This is a useful backup strategy for systems with holes in address spaces -- in this case sbrk cannot contiguously expand the heap, but mmap may be able to find space. 3. A call to MORECORE that cannot usually contiguously extend memory. (disabled if not HAVE_MORECORE) In all cases, we need to request enough bytes from system to ensure we can malloc nb bytes upon success, so pad with enough space for top_foot, plus alignment-pad to make sure we don't lose bytes if not on boundary, and round this up to a granularity unit. */ if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { char* br = CMFAIL; msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); size_t asize = 0; ACQUIRE_MALLOC_GLOBAL_LOCK(); if (ss == 0) { /* First time through or recovery */ char* base = (char*)CALL_MORECORE(0); if (base != CMFAIL) { asize = granularity_align(nb + SYS_ALLOC_PADDING); /* Adjust to end on a page boundary */ if (!is_page_aligned(base)) asize += (page_align((size_t)base) - (size_t)base); /* Can't call MORECORE if size is negative when treated as signed */ if (asize < HALF_MAX_SIZE_T && (br = (char*)(CALL_MORECORE(asize))) == base) { tbase = base; tsize = asize; } } } else { /* Subtract out existing available top space from MORECORE request. */ asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING); /* Use mem here only if it did continuously extend old space */ if (asize < HALF_MAX_SIZE_T && (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) { tbase = br; tsize = asize; } } if (tbase == CMFAIL) { /* Cope with partial failure */ if (br != CMFAIL) { /* Try to use/extend the space we did get */ if (asize < HALF_MAX_SIZE_T && asize < nb + SYS_ALLOC_PADDING) { size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize); if (esize < HALF_MAX_SIZE_T) { char* end = (char*)CALL_MORECORE(esize); if (end != CMFAIL) asize += esize; else { /* Can't use; try to release */ (void) CALL_MORECORE(-asize); br = CMFAIL; } } } } if (br != CMFAIL) { /* Use the space we did get */ tbase = br; tsize = asize; } else disable_contiguous(m); /* Don't try contiguous path in the future */ } RELEASE_MALLOC_GLOBAL_LOCK(); } if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING); if (rsize > nb) { /* Fail if wraps around zero */ char* mp = (char*)(CALL_MMAP(rsize)); if (mp != CMFAIL) { tbase = mp; tsize = rsize; mmap_flag = IS_MMAPPED_BIT; } } } if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ size_t asize = granularity_align(nb + SYS_ALLOC_PADDING); if (asize < HALF_MAX_SIZE_T) { char* br = CMFAIL; char* end = CMFAIL; ACQUIRE_MALLOC_GLOBAL_LOCK(); br = (char*)(CALL_MORECORE(asize)); end = (char*)(CALL_MORECORE(0)); RELEASE_MALLOC_GLOBAL_LOCK(); if (br != CMFAIL && end != CMFAIL && br < end) { size_t ssize = end - br; if (ssize > nb + TOP_FOOT_SIZE) { tbase = br; tsize = ssize; } } } } if (tbase != CMFAIL) { if ((m->footprint += tsize) > m->max_footprint) m->max_footprint = m->footprint; if (!is_initialized(m)) { /* first-time initialization */ m->seg.base = m->least_addr = tbase; m->seg.size = tsize; m->seg.sflags = mmap_flag; m->magic = mparams.magic; m->release_checks = MAX_RELEASE_CHECK_RATE; init_bins(m); #if !ONLY_MSPACES if (is_global(m)) init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); else #endif { /* Offset top by embedded malloc_state */ mchunkptr mn = next_chunk(mem2chunk(m)); init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); } } else { /* Try to merge with an existing segment */ msegmentptr sp = &m->seg; /* Only consider most recent segment if traversal suppressed */ while (sp != 0 && tbase != sp->base + sp->size) sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; if (sp != 0 && !is_extern_segment(sp) && (sp->sflags & IS_MMAPPED_BIT) == mmap_flag && segment_holds(sp, m->top)) { /* append */ sp->size += tsize; init_top(m, m->top, m->topsize + tsize); } else { if (tbase < m->least_addr) m->least_addr = tbase; sp = &m->seg; while (sp != 0 && sp->base != tbase + tsize) sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; if (sp != 0 && !is_extern_segment(sp) && (sp->sflags & IS_MMAPPED_BIT) == mmap_flag) { char* oldbase = sp->base; sp->base = tbase; sp->size += tsize; return prepend_alloc(m, tbase, oldbase, nb); } else add_segment(m, tbase, tsize, mmap_flag); } } if (nb < m->topsize) { /* Allocate from new or extended top space */ size_t rsize = m->topsize -= nb; mchunkptr p = m->top; mchunkptr r = m->top = chunk_plus_offset(p, nb); r->head = rsize | PINUSE_BIT; set_size_and_pinuse_of_inuse_chunk(m, p, nb); check_top_chunk(m, m->top); check_malloced_chunk(m, chunk2mem(p), nb); return chunk2mem(p); } } MALLOC_FAILURE_ACTION; return 0; } /* ----------------------- system deallocation -------------------------- */ /* Unmap and unlink any mmapped segments that don't contain used chunks */ static size_t release_unused_segments(mstate m) { size_t released = 0; int nsegs = 0; msegmentptr pred = &m->seg; msegmentptr sp = pred->next; while (sp != 0) { char* base = sp->base; size_t size = sp->size; msegmentptr next = sp->next; ++nsegs; if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { mchunkptr p = align_as_chunk(base); size_t psize = chunksize(p); /* Can unmap if first chunk holds entire segment and not pinned */ if (!cinuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { tchunkptr tp = (tchunkptr)p; assert(segment_holds(sp, (char*)sp)); if (p == m->dv) { m->dv = 0; m->dvsize = 0; } else { unlink_large_chunk(m, tp); } if (CALL_MUNMAP(base, size) == 0) { released += size; m->footprint -= size; /* unlink obsoleted record */ sp = pred; sp->next = next; } else { /* back out if cannot unmap */ insert_large_chunk(m, tp, psize); } } } if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */ break; pred = sp; sp = next; } /* Reset check counter */ m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)? nsegs : MAX_RELEASE_CHECK_RATE); return released; } static int sys_trim(mstate m, size_t pad) { size_t released = 0; ensure_initialization(); if (pad < MAX_REQUEST && is_initialized(m)) { pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ if (m->topsize > pad) { /* Shrink top space in granularity-size units, keeping at least one */ size_t unit = mparams.granularity; size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - SIZE_T_ONE) * unit; msegmentptr sp = segment_holding(m, (char*)m->top); if (!is_extern_segment(sp)) { if (is_mmapped_segment(sp)) { if (HAVE_MMAP && sp->size >= extra && !has_segment_link(m, sp)) { /* can't shrink if pinned */ size_t newsize = sp->size - extra; /* Prefer mremap, fall back to munmap */ if (((void*)(intptr_t)CALL_MREMAP(sp->base, sp->size, newsize, 0) != (void*)MFAIL) || (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { released = extra; } } } else if (HAVE_MORECORE) { if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; ACQUIRE_MALLOC_GLOBAL_LOCK(); { /* Make sure end of memory is where we last set it. */ char* old_br = (char*)(CALL_MORECORE(0)); if (old_br == sp->base + sp->size) { char* rel_br = (char*)(CALL_MORECORE(-extra)); char* new_br = (char*)(CALL_MORECORE(0)); if (rel_br != CMFAIL && new_br < old_br) released = old_br - new_br; } } RELEASE_MALLOC_GLOBAL_LOCK(); } } if (released != 0) { sp->size -= released; m->footprint -= released; init_top(m, m->top, m->topsize - released); check_top_chunk(m, m->top); } } /* Unmap any unused mmapped segments */ if (HAVE_MMAP) released += release_unused_segments(m); /* On failure, disable autotrim to avoid repeated failed future calls */ if (released == 0 && m->topsize > m->trim_check) m->trim_check = MAX_SIZE_T; } return (released != 0)? 1 : 0; } /* ---------------------------- malloc support --------------------------- */ /* allocate a large request from the best fitting chunk in a treebin */ static void* tmalloc_large(mstate m, size_t nb) { tchunkptr v = 0; size_t rsize = -nb; /* Unsigned negation */ tchunkptr t; bindex_t idx; compute_tree_index(nb, idx); if ((t = *treebin_at(m, idx)) != 0) { /* Traverse tree for this bin looking for node with size == nb */ size_t sizebits = nb << leftshift_for_tree_index(idx); tchunkptr rst = 0; /* The deepest untaken right subtree */ for (;;) { tchunkptr rt; size_t trem = chunksize(t) - nb; if (trem < rsize) { v = t; if ((rsize = trem) == 0) break; } rt = t->child[1]; t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; if (rt != 0 && rt != t) rst = rt; if (t == 0) { t = rst; /* set t to least subtree holding sizes > nb */ break; } sizebits <<= 1; } } if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; if (leftbits != 0) { bindex_t i; binmap_t leastbit = least_bit(leftbits); compute_bit2idx(leastbit, i); t = *treebin_at(m, i); } } while (t != 0) { /* find smallest of tree or subtree */ size_t trem = chunksize(t) - nb; if (trem < rsize) { rsize = trem; v = t; } t = leftmost_child(t); } /* If dv is a better fit, return 0 so malloc will use it */ if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { if (RTCHECK(ok_address(m, v))) { /* split */ mchunkptr r = chunk_plus_offset(v, nb); assert(chunksize(v) == rsize + nb); if (RTCHECK(ok_next(v, r))) { unlink_large_chunk(m, v); if (rsize < MIN_CHUNK_SIZE) set_inuse_and_pinuse(m, v, (rsize + nb)); else { set_size_and_pinuse_of_inuse_chunk(m, v, nb); set_size_and_pinuse_of_free_chunk(r, rsize); insert_chunk(m, r, rsize); } return chunk2mem(v); } } CORRUPTION_ERROR_ACTION(m); } return 0; } /* allocate a small request from the best fitting chunk in a treebin */ static void* tmalloc_small(mstate m, size_t nb) { tchunkptr t, v; size_t rsize; bindex_t i; binmap_t leastbit = least_bit(m->treemap); compute_bit2idx(leastbit, i); v = t = *treebin_at(m, i); rsize = chunksize(t) - nb; while ((t = leftmost_child(t)) != 0) { size_t trem = chunksize(t) - nb; if (trem < rsize) { rsize = trem; v = t; } } if (RTCHECK(ok_address(m, v))) { mchunkptr r = chunk_plus_offset(v, nb); assert(chunksize(v) == rsize + nb); if (RTCHECK(ok_next(v, r))) { unlink_large_chunk(m, v); if (rsize < MIN_CHUNK_SIZE) set_inuse_and_pinuse(m, v, (rsize + nb)); else { set_size_and_pinuse_of_inuse_chunk(m, v, nb); set_size_and_pinuse_of_free_chunk(r, rsize); replace_dv(m, r, rsize); } return chunk2mem(v); } } CORRUPTION_ERROR_ACTION(m); return 0; } /* --------------------------- realloc support --------------------------- */ static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { if (bytes >= MAX_REQUEST) { MALLOC_FAILURE_ACTION; return 0; } if (!PREACTION(m)) { mchunkptr oldp = mem2chunk(oldmem); size_t oldsize = chunksize(oldp); mchunkptr next = chunk_plus_offset(oldp, oldsize); mchunkptr newp = 0; void* extra = 0; /* Try to either shrink or extend into top. Else malloc-copy-free */ if (RTCHECK(ok_address(m, oldp) && ok_cinuse(oldp) && ok_next(oldp, next) && ok_pinuse(next))) { size_t nb = request2size(bytes); if (is_mmapped(oldp)) newp = mmap_resize(m, oldp, nb); else if (oldsize >= nb) { /* already big enough */ size_t rsize = oldsize - nb; newp = oldp; if (rsize >= MIN_CHUNK_SIZE) { mchunkptr remainder = chunk_plus_offset(newp, nb); set_inuse(m, newp, nb); set_inuse(m, remainder, rsize); extra = chunk2mem(remainder); } } else if (next == m->top && oldsize + m->topsize > nb) { /* Expand into top */ size_t newsize = oldsize + m->topsize; size_t newtopsize = newsize - nb; mchunkptr newtop = chunk_plus_offset(oldp, nb); set_inuse(m, oldp, nb); newtop->head = newtopsize |PINUSE_BIT; m->top = newtop; m->topsize = newtopsize; newp = oldp; } } else { USAGE_ERROR_ACTION(m, oldmem); POSTACTION(m); return 0; } POSTACTION(m); if (newp != 0) { if (extra != 0) { internal_free(m, extra); } check_inuse_chunk(m, newp); return chunk2mem(newp); } else { void* newmem = internal_malloc(m, bytes); if (newmem != 0) { size_t oc = oldsize - overhead_for(oldp); memcpy(newmem, oldmem, (oc < bytes)? oc : bytes); internal_free(m, oldmem); } return newmem; } } return 0; } /* --------------------------- memalign support -------------------------- */ static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { if (alignment <= MALLOC_ALIGNMENT) /* Can just use malloc */ return internal_malloc(m, bytes); if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ alignment = MIN_CHUNK_SIZE; if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ size_t a = MALLOC_ALIGNMENT << 1; while (a < alignment) a <<= 1; alignment = a; } if (bytes >= MAX_REQUEST - alignment) { if (m != 0) { /* Test isn't needed but avoids compiler warning */ MALLOC_FAILURE_ACTION; } } else { size_t nb = request2size(bytes); size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; char* mem = (char*)internal_malloc(m, req); if (mem != 0) { void* leader = 0; void* trailer = 0; mchunkptr p = mem2chunk(mem); if (PREACTION(m)) return 0; if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */ /* Find an aligned spot inside chunk. Since we need to give back leading space in a chunk of at least MIN_CHUNK_SIZE, if the first calculation places us at a spot with less than MIN_CHUNK_SIZE leader, we can move to the next aligned spot. We've allocated enough total room so that this is always possible. */ char* br = (char*)mem2chunk((size_t)(((size_t)(mem + alignment - SIZE_T_ONE)) & -alignment)); char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? br : br+alignment; mchunkptr newp = (mchunkptr)pos; size_t leadsize = pos - (char*)(p); size_t newsize = chunksize(p) - leadsize; if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ newp->prev_foot = p->prev_foot + leadsize; newp->head = (newsize|CINUSE_BIT); } else { /* Otherwise, give back leader, use the rest */ set_inuse(m, newp, newsize); set_inuse(m, p, leadsize); leader = chunk2mem(p); } p = newp; } /* Give back spare room at the end */ if (!is_mmapped(p)) { size_t size = chunksize(p); if (size > nb + MIN_CHUNK_SIZE) { size_t remainder_size = size - nb; mchunkptr remainder = chunk_plus_offset(p, nb); set_inuse(m, p, nb); set_inuse(m, remainder, remainder_size); trailer = chunk2mem(remainder); } } assert (chunksize(p) >= nb); assert((((size_t)(chunk2mem(p))) % alignment) == 0); check_inuse_chunk(m, p); POSTACTION(m); if (leader != 0) { internal_free(m, leader); } if (trailer != 0) { internal_free(m, trailer); } return chunk2mem(p); } } return 0; } /* ------------------------ comalloc/coalloc support --------------------- */ static void** ialloc(mstate m, size_t n_elements, size_t* sizes, int opts, void* chunks[]) { /* This provides common support for independent_X routines, handling all of the combinations that can result. The opts arg has: bit 0 set if all elements are same size (using sizes[0]) bit 1 set if elements should be zeroed */ size_t element_size; /* chunksize of each element, if all same */ size_t contents_size; /* total size of elements */ size_t array_size; /* request size of pointer array */ void* mem; /* malloced aggregate space */ mchunkptr p; /* corresponding chunk */ size_t remainder_size; /* remaining bytes while splitting */ void** marray; /* either "chunks" or malloced ptr array */ mchunkptr array_chunk; /* chunk for malloced ptr array */ flag_t was_enabled; /* to disable mmap */ size_t size; size_t i; ensure_initialization(); /* compute array length, if needed */ if (chunks != 0) { if (n_elements == 0) return chunks; /* nothing to do */ marray = chunks; array_size = 0; } else { /* if empty req, must still return chunk representing empty array */ if (n_elements == 0) return (void**)internal_malloc(m, 0); marray = 0; array_size = request2size(n_elements * (sizeof(void*))); } /* compute total element size */ if (opts & 0x1) { /* all-same-size */ element_size = request2size(*sizes); contents_size = n_elements * element_size; } else { /* add up all the sizes */ element_size = 0; contents_size = 0; for (i = 0; i != n_elements; ++i) contents_size += request2size(sizes[i]); } size = contents_size + array_size; /* Allocate the aggregate chunk. First disable direct-mmapping so malloc won't use it, since we would not be able to later free/realloc space internal to a segregated mmap region. */ was_enabled = use_mmap(m); disable_mmap(m); mem = internal_malloc(m, size - CHUNK_OVERHEAD); if (was_enabled) enable_mmap(m); if (mem == 0) return 0; if (PREACTION(m)) return 0; p = mem2chunk(mem); remainder_size = chunksize(p); assert(!is_mmapped(p)); if (opts & 0x2) { /* optionally clear the elements */ memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); } /* If not provided, allocate the pointer array as final part of chunk */ if (marray == 0) { size_t array_chunk_size; array_chunk = chunk_plus_offset(p, contents_size); array_chunk_size = remainder_size - contents_size; marray = (void**) (chunk2mem(array_chunk)); set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); remainder_size = contents_size; } /* split out elements */ for (i = 0; ; ++i) { marray[i] = chunk2mem(p); if (i != n_elements-1) { if (element_size != 0) size = element_size; else size = request2size(sizes[i]); remainder_size -= size; set_size_and_pinuse_of_inuse_chunk(m, p, size); p = chunk_plus_offset(p, size); } else { /* the final element absorbs any overallocation slop */ set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); break; } } #if DEBUG if (marray != chunks) { /* final element must have exactly exhausted chunk */ if (element_size != 0) { assert(remainder_size == element_size); } else { assert(remainder_size == request2size(sizes[i])); } check_inuse_chunk(m, mem2chunk(marray)); } for (i = 0; i != n_elements; ++i) check_inuse_chunk(m, mem2chunk(marray[i])); #endif /* DEBUG */ POSTACTION(m); return marray; } /* -------------------------- public routines ---------------------------- */ #if !ONLY_MSPACES void* dlmalloc(size_t bytes) { /* Basic algorithm: If a small request (< 256 bytes minus per-chunk overhead): 1. If one exists, use a remainderless chunk in associated smallbin. (Remainderless means that there are too few excess bytes to represent as a chunk.) 2. If it is big enough, use the dv chunk, which is normally the chunk adjacent to the one used for the most recent small request. 3. If one exists, split the smallest available chunk in a bin, saving remainder in dv. 4. If it is big enough, use the top chunk. 5. If available, get memory from system and use it Otherwise, for a large request: 1. Find the smallest available binned chunk that fits, and use it if it is better fitting than dv chunk, splitting if necessary. 2. If better fitting than any binned chunk, use the dv chunk. 3. If it is big enough, use the top chunk. 4. If request size >= mmap threshold, try to directly mmap this chunk. 5. If available, get memory from system and use it The ugly goto's here ensure that postaction occurs along all paths. */ #if USE_LOCKS ensure_initialization(); /* initialize in sys_alloc if not using locks */ #endif if (!PREACTION(gm)) { void* mem; size_t nb; if (bytes <= MAX_SMALL_REQUEST) { bindex_t idx; binmap_t smallbits; nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); idx = small_index(nb); smallbits = gm->smallmap >> idx; if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ mchunkptr b, p; idx += ~smallbits & 1; /* Uses next bin if idx empty */ b = smallbin_at(gm, idx); p = b->fd; assert(chunksize(p) == small_index2size(idx)); unlink_first_small_chunk(gm, b, p, idx); set_inuse_and_pinuse(gm, p, small_index2size(idx)); mem = chunk2mem(p); check_malloced_chunk(gm, mem, nb); goto postaction; } else if (nb > gm->dvsize) { if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ mchunkptr b, p, r; size_t rsize; bindex_t i; binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); binmap_t leastbit = least_bit(leftbits); compute_bit2idx(leastbit, i); b = smallbin_at(gm, i); p = b->fd; assert(chunksize(p) == small_index2size(i)); unlink_first_small_chunk(gm, b, p, i); rsize = small_index2size(i) - nb; /* Fit here cannot be remainderless if 4byte sizes */ if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) set_inuse_and_pinuse(gm, p, small_index2size(i)); else { set_size_and_pinuse_of_inuse_chunk(gm, p, nb); r = chunk_plus_offset(p, nb); set_size_and_pinuse_of_free_chunk(r, rsize); replace_dv(gm, r, rsize); } mem = chunk2mem(p); check_malloced_chunk(gm, mem, nb); goto postaction; } else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { check_malloced_chunk(gm, mem, nb); goto postaction; } } } else if (bytes >= MAX_REQUEST) nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ else { nb = pad_request(bytes); if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { check_malloced_chunk(gm, mem, nb); goto postaction; } } if (nb <= gm->dvsize) { size_t rsize = gm->dvsize - nb; mchunkptr p = gm->dv; if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ mchunkptr r = gm->dv = chunk_plus_offset(p, nb); gm->dvsize = rsize; set_size_and_pinuse_of_free_chunk(r, rsize); set_size_and_pinuse_of_inuse_chunk(gm, p, nb); } else { /* exhaust dv */ size_t dvs = gm->dvsize; gm->dvsize = 0; gm->dv = 0; set_inuse_and_pinuse(gm, p, dvs); } mem = chunk2mem(p); check_malloced_chunk(gm, mem, nb); goto postaction; } else if (nb < gm->topsize) { /* Split top */ size_t rsize = gm->topsize -= nb; mchunkptr p = gm->top; mchunkptr r = gm->top = chunk_plus_offset(p, nb); r->head = rsize | PINUSE_BIT; set_size_and_pinuse_of_inuse_chunk(gm, p, nb); mem = chunk2mem(p); check_top_chunk(gm, gm->top); check_malloced_chunk(gm, mem, nb); goto postaction; } mem = sys_alloc(gm, nb); postaction: POSTACTION(gm); return mem; } return 0; } void dlfree(void* mem) { /* Consolidate freed chunks with preceeding or succeeding bordering free chunks, if they exist, and then place in a bin. Intermixed with special cases for top, dv, mmapped chunks, and usage errors. */ if (mem != 0) { mchunkptr p = mem2chunk(mem); #if FOOTERS mstate fm = get_mstate_for(p); if (!ok_magic(fm)) { USAGE_ERROR_ACTION(fm, p); return; } #else /* FOOTERS */ #define fm gm #endif /* FOOTERS */ if (!PREACTION(fm)) { check_inuse_chunk(fm, p); if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { size_t psize = chunksize(p); mchunkptr next = chunk_plus_offset(p, psize); if (!pinuse(p)) { size_t prevsize = p->prev_foot; if ((prevsize & IS_MMAPPED_BIT) != 0) { prevsize &= ~IS_MMAPPED_BIT; psize += prevsize + MMAP_FOOT_PAD; if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) fm->footprint -= psize; goto postaction; } else { mchunkptr prev = chunk_minus_offset(p, prevsize); psize += prevsize; p = prev; if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ if (p != fm->dv) { unlink_chunk(fm, p, prevsize); } else if ((next->head & INUSE_BITS) == INUSE_BITS) { fm->dvsize = psize; set_free_with_pinuse(p, psize, next); goto postaction; } } else goto erroraction; } } if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { if (!cinuse(next)) { /* consolidate forward */ if (next == fm->top) { size_t tsize = fm->topsize += psize; fm->top = p; p->head = tsize | PINUSE_BIT; if (p == fm->dv) { fm->dv = 0; fm->dvsize = 0; } if (should_trim(fm, tsize)) sys_trim(fm, 0); goto postaction; } else if (next == fm->dv) { size_t dsize = fm->dvsize += psize; fm->dv = p; set_size_and_pinuse_of_free_chunk(p, dsize); goto postaction; } else { size_t nsize = chunksize(next); psize += nsize; unlink_chunk(fm, next, nsize); set_size_and_pinuse_of_free_chunk(p, psize); if (p == fm->dv) { fm->dvsize = psize; goto postaction; } } } else set_free_with_pinuse(p, psize, next); if (is_small(psize)) { insert_small_chunk(fm, p, psize); check_free_chunk(fm, p); } else { tchunkptr tp = (tchunkptr)p; insert_large_chunk(fm, tp, psize); check_free_chunk(fm, p); if (--fm->release_checks == 0) release_unused_segments(fm); } goto postaction; } } erroraction: USAGE_ERROR_ACTION(fm, p); postaction: POSTACTION(fm); } } #if !FOOTERS #undef fm #endif /* FOOTERS */ } void* dlcalloc(size_t n_elements, size_t elem_size) { void* mem; size_t req = 0; if (n_elements != 0) { req = n_elements * elem_size; if (((n_elements | elem_size) & ~(size_t)0xffff) && (req / n_elements != elem_size)) req = MAX_SIZE_T; /* force downstream failure on overflow */ } mem = dlmalloc(req); if (mem != 0 && calloc_must_clear(mem2chunk(mem))) memset(mem, 0, req); return mem; } void* dlrealloc(void* oldmem, size_t bytes) { if (oldmem == 0) return dlmalloc(bytes); #ifdef REALLOC_ZERO_BYTES_FREES if (bytes == 0) { dlfree(oldmem); return 0; } #endif /* REALLOC_ZERO_BYTES_FREES */ else { #if ! FOOTERS mstate m = gm; #else /* FOOTERS */ mstate m = get_mstate_for(mem2chunk(oldmem)); if (!ok_magic(m)) { USAGE_ERROR_ACTION(m, oldmem); return 0; } #endif /* FOOTERS */ return internal_realloc(m, oldmem, bytes); } } void* dlmemalign(size_t alignment, size_t bytes) { return internal_memalign(gm, alignment, bytes); } void** dlindependent_calloc(size_t n_elements, size_t elem_size, void* chunks[]) { size_t sz = elem_size; /* serves as 1-element array */ return ialloc(gm, n_elements, &sz, 3, chunks); } void** dlindependent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]) { return ialloc(gm, n_elements, sizes, 0, chunks); } void* dlvalloc(size_t bytes) { size_t pagesz; ensure_initialization(); pagesz = mparams.page_size; return dlmemalign(pagesz, bytes); } void* dlpvalloc(size_t bytes) { size_t pagesz; ensure_initialization(); pagesz = mparams.page_size; return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); } int dlmalloc_trim(size_t pad) { ensure_initialization(); int result = 0; if (!PREACTION(gm)) { result = sys_trim(gm, pad); POSTACTION(gm); } return result; } size_t dlmalloc_footprint(void) { return gm->footprint; } size_t dlmalloc_max_footprint(void) { return gm->max_footprint; } #if !NO_MALLINFO struct mallinfo dlmallinfo(void) { return internal_mallinfo(gm); } #endif /* NO_MALLINFO */ void dlmalloc_stats() { internal_malloc_stats(gm); } int dlmallopt(int param_number, int value) { return change_mparam(param_number, value); } #endif /* !ONLY_MSPACES */ size_t dlmalloc_usable_size(void* mem) { if (mem != 0) { mchunkptr p = mem2chunk(mem); if (cinuse(p)) return chunksize(p) - overhead_for(p); } return 0; } /* ----------------------------- user mspaces ---------------------------- */ #if MSPACES static mstate init_user_mstate(char* tbase, size_t tsize) { size_t msize = pad_request(sizeof(struct malloc_state)); mchunkptr mn; mchunkptr msp = align_as_chunk(tbase); mstate m = (mstate)(chunk2mem(msp)); memset(m, 0, msize); INITIAL_LOCK(&m->mutex); msp->head = (msize|PINUSE_BIT|CINUSE_BIT); m->seg.base = m->least_addr = tbase; m->seg.size = m->footprint = m->max_footprint = tsize; m->magic = mparams.magic; m->release_checks = MAX_RELEASE_CHECK_RATE; m->mflags = mparams.default_mflags; m->extp = 0; m->exts = 0; disable_contiguous(m); init_bins(m); mn = next_chunk(mem2chunk(m)); init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); check_top_chunk(m, m->top); return m; } mspace create_mspace(size_t capacity, int locked) { mstate m = 0; size_t msize; ensure_initialization(); msize = pad_request(sizeof(struct malloc_state)); if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { size_t rs = ((capacity == 0)? mparams.granularity : (capacity + TOP_FOOT_SIZE + msize)); size_t tsize = granularity_align(rs); char* tbase = (char*)(CALL_MMAP(tsize)); if (tbase != CMFAIL) { m = init_user_mstate(tbase, tsize); m->seg.sflags = IS_MMAPPED_BIT; set_lock(m, locked); } } return (mspace)m; } mspace create_mspace_with_base(void* base, size_t capacity, int locked) { mstate m = 0; size_t msize; ensure_initialization(); msize = pad_request(sizeof(struct malloc_state)); if (capacity > msize + TOP_FOOT_SIZE && capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { m = init_user_mstate((char*)base, capacity); m->seg.sflags = EXTERN_BIT; set_lock(m, locked); } return (mspace)m; } int mspace_mmap_large_chunks(mspace msp, int enable) { int ret = 0; mstate ms = (mstate)msp; if (!PREACTION(ms)) { if (use_mmap(ms)) ret = 1; if (enable) enable_mmap(ms); else disable_mmap(ms); POSTACTION(ms); } return ret; } size_t destroy_mspace(mspace msp) { size_t freed = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { msegmentptr sp = &ms->seg; while (sp != 0) { char* base = sp->base; size_t size = sp->size; flag_t flag = sp->sflags; sp = sp->next; if ((flag & IS_MMAPPED_BIT) && !(flag & EXTERN_BIT) && CALL_MUNMAP(base, size) == 0) freed += size; } } else { USAGE_ERROR_ACTION(ms,ms); } return freed; } /* mspace versions of routines are near-clones of the global versions. This is not so nice but better than the alternatives. */ void* mspace_malloc(mspace msp, size_t bytes) { mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } if (!PREACTION(ms)) { void* mem; size_t nb; if (bytes <= MAX_SMALL_REQUEST) { bindex_t idx; binmap_t smallbits; nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); idx = small_index(nb); smallbits = ms->smallmap >> idx; if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ mchunkptr b, p; idx += ~smallbits & 1; /* Uses next bin if idx empty */ b = smallbin_at(ms, idx); p = b->fd; assert(chunksize(p) == small_index2size(idx)); unlink_first_small_chunk(ms, b, p, idx); set_inuse_and_pinuse(ms, p, small_index2size(idx)); mem = chunk2mem(p); check_malloced_chunk(ms, mem, nb); goto postaction; } else if (nb > ms->dvsize) { if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ mchunkptr b, p, r; size_t rsize; bindex_t i; binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); binmap_t leastbit = least_bit(leftbits); compute_bit2idx(leastbit, i); b = smallbin_at(ms, i); p = b->fd; assert(chunksize(p) == small_index2size(i)); unlink_first_small_chunk(ms, b, p, i); rsize = small_index2size(i) - nb; /* Fit here cannot be remainderless if 4byte sizes */ if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) set_inuse_and_pinuse(ms, p, small_index2size(i)); else { set_size_and_pinuse_of_inuse_chunk(ms, p, nb); r = chunk_plus_offset(p, nb); set_size_and_pinuse_of_free_chunk(r, rsize); replace_dv(ms, r, rsize); } mem = chunk2mem(p); check_malloced_chunk(ms, mem, nb); goto postaction; } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { check_malloced_chunk(ms, mem, nb); goto postaction; } } } else if (bytes >= MAX_REQUEST) nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ else { nb = pad_request(bytes); if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { check_malloced_chunk(ms, mem, nb); goto postaction; } } if (nb <= ms->dvsize) { size_t rsize = ms->dvsize - nb; mchunkptr p = ms->dv; if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ mchunkptr r = ms->dv = chunk_plus_offset(p, nb); ms->dvsize = rsize; set_size_and_pinuse_of_free_chunk(r, rsize); set_size_and_pinuse_of_inuse_chunk(ms, p, nb); } else { /* exhaust dv */ size_t dvs = ms->dvsize; ms->dvsize = 0; ms->dv = 0; set_inuse_and_pinuse(ms, p, dvs); } mem = chunk2mem(p); check_malloced_chunk(ms, mem, nb); goto postaction; } else if (nb < ms->topsize) { /* Split top */ size_t rsize = ms->topsize -= nb; mchunkptr p = ms->top; mchunkptr r = ms->top = chunk_plus_offset(p, nb); r->head = rsize | PINUSE_BIT; set_size_and_pinuse_of_inuse_chunk(ms, p, nb); mem = chunk2mem(p); check_top_chunk(ms, ms->top); check_malloced_chunk(ms, mem, nb); goto postaction; } mem = sys_alloc(ms, nb); postaction: POSTACTION(ms); return mem; } return 0; } void mspace_free(mspace msp, void* mem) { if (mem != 0) { mchunkptr p = mem2chunk(mem); #if FOOTERS mstate fm = get_mstate_for(p); #else /* FOOTERS */ mstate fm = (mstate)msp; #endif /* FOOTERS */ if (!ok_magic(fm)) { USAGE_ERROR_ACTION(fm, p); return; } if (!PREACTION(fm)) { check_inuse_chunk(fm, p); if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { size_t psize = chunksize(p); mchunkptr next = chunk_plus_offset(p, psize); if (!pinuse(p)) { size_t prevsize = p->prev_foot; if ((prevsize & IS_MMAPPED_BIT) != 0) { prevsize &= ~IS_MMAPPED_BIT; psize += prevsize + MMAP_FOOT_PAD; if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) fm->footprint -= psize; goto postaction; } else { mchunkptr prev = chunk_minus_offset(p, prevsize); psize += prevsize; p = prev; if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ if (p != fm->dv) { unlink_chunk(fm, p, prevsize); } else if ((next->head & INUSE_BITS) == INUSE_BITS) { fm->dvsize = psize; set_free_with_pinuse(p, psize, next); goto postaction; } } else goto erroraction; } } if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { if (!cinuse(next)) { /* consolidate forward */ if (next == fm->top) { size_t tsize = fm->topsize += psize; fm->top = p; p->head = tsize | PINUSE_BIT; if (p == fm->dv) { fm->dv = 0; fm->dvsize = 0; } if (should_trim(fm, tsize)) sys_trim(fm, 0); goto postaction; } else if (next == fm->dv) { size_t dsize = fm->dvsize += psize; fm->dv = p; set_size_and_pinuse_of_free_chunk(p, dsize); goto postaction; } else { size_t nsize = chunksize(next); psize += nsize; unlink_chunk(fm, next, nsize); set_size_and_pinuse_of_free_chunk(p, psize); if (p == fm->dv) { fm->dvsize = psize; goto postaction; } } } else set_free_with_pinuse(p, psize, next); if (is_small(psize)) { insert_small_chunk(fm, p, psize); check_free_chunk(fm, p); } else { tchunkptr tp = (tchunkptr)p; insert_large_chunk(fm, tp, psize); check_free_chunk(fm, p); if (--fm->release_checks == 0) release_unused_segments(fm); } goto postaction; } } erroraction: USAGE_ERROR_ACTION(fm, p); postaction: POSTACTION(fm); } } } void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { void* mem; size_t req = 0; mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } if (n_elements != 0) { req = n_elements * elem_size; if (((n_elements | elem_size) & ~(size_t)0xffff) && (req / n_elements != elem_size)) req = MAX_SIZE_T; /* force downstream failure on overflow */ } mem = internal_malloc(ms, req); if (mem != 0 && calloc_must_clear(mem2chunk(mem))) memset(mem, 0, req); return mem; } void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { if (oldmem == 0) return mspace_malloc(msp, bytes); #ifdef REALLOC_ZERO_BYTES_FREES if (bytes == 0) { mspace_free(msp, oldmem); return 0; } #endif /* REALLOC_ZERO_BYTES_FREES */ else { #if FOOTERS mchunkptr p = mem2chunk(oldmem); mstate ms = get_mstate_for(p); #else /* FOOTERS */ mstate ms = (mstate)msp; #endif /* FOOTERS */ if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } return internal_realloc(ms, oldmem, bytes); } } void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } return internal_memalign(ms, alignment, bytes); } void** mspace_independent_calloc(mspace msp, size_t n_elements, size_t elem_size, void* chunks[]) { size_t sz = elem_size; /* serves as 1-element array */ mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } return ialloc(ms, n_elements, &sz, 3, chunks); } void** mspace_independent_comalloc(mspace msp, size_t n_elements, size_t sizes[], void* chunks[]) { mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } return ialloc(ms, n_elements, sizes, 0, chunks); } int mspace_trim(mspace msp, size_t pad) { int result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { if (!PREACTION(ms)) { result = sys_trim(ms, pad); POSTACTION(ms); } } else { USAGE_ERROR_ACTION(ms,ms); } return result; } void mspace_malloc_stats(mspace msp) { mstate ms = (mstate)msp; if (ok_magic(ms)) { internal_malloc_stats(ms); } else { USAGE_ERROR_ACTION(ms,ms); } } size_t mspace_footprint(mspace msp) { size_t result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { result = ms->footprint; } else { USAGE_ERROR_ACTION(ms,ms); } return result; } size_t mspace_max_footprint(mspace msp) { size_t result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { result = ms->max_footprint; } else { USAGE_ERROR_ACTION(ms,ms); } return result; } #if !NO_MALLINFO struct mallinfo mspace_mallinfo(mspace msp) { mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); } return internal_mallinfo(ms); } #endif /* NO_MALLINFO */ size_t mspace_usable_size(void* mem) { if (mem != 0) { mchunkptr p = mem2chunk(mem); if (cinuse(p)) return chunksize(p) - overhead_for(p); } return 0; } int mspace_mallopt(int param_number, int value) { return change_mparam(param_number, value); } #endif /* MSPACES */ /* -------------------- Alternative MORECORE functions ------------------- */ /* Guidelines for creating a custom version of MORECORE: * For best performance, MORECORE should allocate in multiples of pagesize. * MORECORE may allocate more memory than requested. (Or even less, but this will usually result in a malloc failure.) * MORECORE must not allocate memory when given argument zero, but instead return one past the end address of memory from previous nonzero call. * For best performance, consecutive calls to MORECORE with positive arguments should return increasing addresses, indicating that space has been contiguously extended. * Even though consecutive calls to MORECORE need not return contiguous addresses, it must be OK for malloc'ed chunks to span multiple regions in those cases where they do happen to be contiguous. * MORECORE need not handle negative arguments -- it may instead just return MFAIL when given negative arguments. Negative arguments are always multiples of pagesize. MORECORE must not misinterpret negative args as large positive unsigned args. You can suppress all such calls from even occurring by defining MORECORE_CANNOT_TRIM, As an example alternative MORECORE, here is a custom allocator kindly contributed for pre-OSX macOS. It uses virtually but not necessarily physically contiguous non-paged memory (locked in, present and won't get swapped out). You can use it by uncommenting this section, adding some #includes, and setting up the appropriate defines above: #define MORECORE osMoreCore There is also a shutdown routine that should somehow be called for cleanup upon program exit. #define MAX_POOL_ENTRIES 100 #define MINIMUM_MORECORE_SIZE (64 * 1024U) static int next_os_pool; void *our_os_pools[MAX_POOL_ENTRIES]; void *osMoreCore(int size) { void *ptr = 0; static void *sbrk_top = 0; if (size > 0) { if (size < MINIMUM_MORECORE_SIZE) size = MINIMUM_MORECORE_SIZE; if (CurrentExecutionLevel() == kTaskLevel) ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); if (ptr == 0) { return (void *) MFAIL; } // save ptrs so they can be freed during cleanup our_os_pools[next_os_pool] = ptr; next_os_pool++; ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); sbrk_top = (char *) ptr + size; return ptr; } else if (size < 0) { // we don't currently support shrink behavior return (void *) MFAIL; } else { return sbrk_top; } } // cleanup any allocated memory pools // called as last thing before shutting down driver void osCleanupMem(void) { void **ptr; for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) if (*ptr) { PoolDeallocate(*ptr); *ptr = 0; } } */ /* ----------------------------------------------------------------------- History: V2.8.4 (not yet released) * Add mspace_mmap_large_chunks; thanks to Jean Brouwers * Fix insufficient sys_alloc padding when using 16byte alignment * Fix bad error check in mspace_footprint * Adaptations for ptmalloc, courtesy of Wolfram Gloger. * Reentrant spin locks, courtesy of Earl Chew and others * Win32 improvements, courtesy of Niall Douglas and Earl Chew * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options * Extension hook in malloc_state * Various small adjustments to reduce warnings on some compilers * Various configuration extensions/changes for more platforms. Thanks to all who contributed these. V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) * Add max_footprint functions * Ensure all appropriate literals are size_t * Fix conditional compilation problem for some #define settings * Avoid concatenating segments with the one provided in create_mspace_with_base * Rename some variables to avoid compiler shadowing warnings * Use explicit lock initialization. * Better handling of sbrk interference. * Simplify and fix segment insertion, trimming and mspace_destroy * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x * Thanks especially to Dennis Flanagan for help on these. V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee) * Fix memalign brace error. V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee) * Fix improper #endif nesting in C++ * Add explicit casts needed for C++ V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee) * Use trees for large bins * Support mspaces * Use segments to unify sbrk-based and mmap-based system allocation, removing need for emulation on most platforms without sbrk. * Default safety checks * Optional footer checks. Thanks to William Robertson for the idea. * Internal code refactoring * Incorporate suggestions and platform-specific changes. Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, Aaron Bachmann, Emery Berger, and others. * Speed up non-fastbin processing enough to remove fastbins. * Remove useless cfree() to avoid conflicts with other apps. * Remove internal memcpy, memset. Compilers handle builtins better. * Remove some options that no one ever used and rename others. V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) * Fix malloc_state bitmap array misdeclaration V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) * Allow tuning of FIRST_SORTED_BIN_SIZE * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. * Better detection and support for non-contiguousness of MORECORE. Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger * Bypass most of malloc if no frees. Thanks To Emery Berger. * Fix freeing of old top non-contiguous chunk im sysmalloc. * Raised default trim and map thresholds to 256K. * Fix mmap-related #defines. Thanks to Lubos Lunak. * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. * Branch-free bin calculation * Default trim and mmap thresholds now 256K. V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) * Introduce independent_comalloc and independent_calloc. Thanks to Michael Pachos for motivation and help. * Make optional .h file available * Allow > 2GB requests on 32bit systems. * new WIN32 sbrk, mmap, munmap, lock code from . Thanks also to Andreas Mueller , and Anonymous. * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for helping test this.) * memalign: check alignment arg * realloc: don't try to shift chunks backwards, since this leads to more fragmentation in some programs and doesn't seem to help in any others. * Collect all cases in malloc requiring system memory into sysmalloc * Use mmap as backup to sbrk * Place all internal state in malloc_state * Introduce fastbins (although similar to 2.5.1) * Many minor tunings and cosmetic improvements * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS Thanks to Tony E. Bennett and others. * Include errno.h to support default failure action. V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) * return null for negative arguments * Added Several WIN32 cleanups from Martin C. Fong * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' (e.g. WIN32 platforms) * Cleanup header file inclusion for WIN32 platforms * Cleanup code to avoid Microsoft Visual C++ compiler complaints * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing memory allocation routines * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to usage of 'assert' in non-WIN32 code * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to avoid infinite loop * Always call 'fREe()' rather than 'free()' V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) * Fixed ordering problem with boundary-stamping V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) * Added pvalloc, as recommended by H.J. Liu * Added 64bit pointer support mainly from Wolfram Gloger * Added anonymously donated WIN32 sbrk emulation * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen * malloc_extend_top: fix mask error that caused wastage after foreign sbrks * Add linux mremap support code from HJ Liu V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) * Integrated most documentation with the code. * Add support for mmap, with help from Wolfram Gloger (Gloger@lrz.uni-muenchen.de). * Use last_remainder in more cases. * Pack bins using idea from colin@nyx10.cs.du.edu * Use ordered bins instead of best-fit threshhold * Eliminate block-local decls to simplify tracing and debugging. * Support another case of realloc via move into top * Fix error occuring when initial sbrk_base not word-aligned. * Rely on page size for units instead of SBRK_UNIT to avoid surprises about sbrk alignment conventions. * Add mallinfo, mallopt. Thanks to Raymond Nijssen (raymond@es.ele.tue.nl) for the suggestion. * Add `pad' argument to malloc_trim and top_pad mallopt parameter. * More precautions for cases where other routines call sbrk, courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). * Added macros etc., allowing use in linux libc from H.J. Lu (hjl@gnu.ai.mit.edu) * Inverted this history list V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) * Re-tuned and fixed to behave more nicely with V2.6.0 changes. * Removed all preallocation code since under current scheme the work required to undo bad preallocations exceeds the work saved in good cases for most test programs. * No longer use return list or unconsolidated bins since no scheme using them consistently outperforms those that don't given above changes. * Use best fit for very large chunks to prevent some worst-cases. * Added some support for debugging V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) * Removed footers when chunks are in use. Thanks to Paul Wilson (wilson@cs.texas.edu) for the suggestion. V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) * Added malloc_trim, with help from Wolfram Gloger (wmglo@Dent.MED.Uni-Muenchen.DE). V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) * realloc: try to expand in both directions * malloc: swap order of clean-bin strategy; * realloc: only conditionally expand backwards * Try not to scavenge used bins * Use bin counts as a guide to preallocation * Occasionally bin return list chunks in first scan * Add a few optimizations from colin@nyx10.cs.du.edu V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) * faster bin computation & slightly different binning * merged all consolidations to one part of malloc proper (eliminating old malloc_find_space & malloc_clean_bin) * Scan 2 returns chunks (not just 1) * Propagate failure in realloc if malloc returns 0 * Add stuff to allow compilation on non-ANSI compilers from kpv@research.att.com V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) * removed potential for odd address access in prev_chunk * removed dependency on getpagesize.h * misc cosmetics and a bit more internal documentation * anticosmetics: mangled names in macros to evade debugger strangeness * tested on sparc, hp-700, dec-mips, rs6000 with gcc & native cc (hp, dec only) allowing Detlefs & Zorn comparison study (in SIGPLAN Notices.) Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) * Based loosely on libg++-1.2X malloc. (It retains some of the overall structure of old version, but most details differ.) */ LucenePlusPlus-rel_3.0.4/src/core/util/nedmalloc/nedmalloc.c000066400000000000000000000611751217574114600240610ustar00rootroot00000000000000/* Alternative malloc implementation for multiple threads without lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifdef _MSC_VER /* Enable full aliasing on MSVC */ /*#pragma optimize("a", on)*/ #endif /*#define FULLSANITYCHECKS*/ #include "nedmalloc.h" #ifdef WIN32 #include #endif #define MSPACES 1 #define ONLY_MSPACES 1 #ifndef USE_LOCKS #define USE_LOCKS 1 #endif #define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */ #undef DEBUG /* dlmalloc wants DEBUG either 0 or 1 */ #ifdef _DEBUG #define DEBUG 1 #else #define DEBUG 0 #endif #ifdef NDEBUG /* Disable assert checking on release builds */ #undef DEBUG #endif /* The default of 64Kb means we spend too much time kernel-side */ #ifndef DEFAULT_GRANULARITY #define DEFAULT_GRANULARITY (1*1024*1024) #endif /*#define USE_SPIN_LOCKS 0*/ /*#define FORCEINLINE*/ #include "malloc.c.h" #ifdef NDEBUG /* Disable assert checking on release builds */ #undef DEBUG #endif /* The maximum concurrent threads in a pool possible */ #ifndef MAXTHREADSINPOOL #define MAXTHREADSINPOOL 16 #endif /* The maximum number of threadcaches which can be allocated */ #ifndef THREADCACHEMAXCACHES #define THREADCACHEMAXCACHES 256 #endif /* The maximum size to be allocated from the thread cache */ #ifndef THREADCACHEMAX #define THREADCACHEMAX 8192 #endif #if 0 /* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */ #define THREADCACHEMAXBINS ((13-4)*2) #else /* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */ #define THREADCACHEMAXBINS (13-4) #endif /* Point at which the free space in a thread cache is garbage collected */ #ifndef THREADCACHEMAXFREESPACE #define THREADCACHEMAXFREESPACE (512*1024) #endif #ifdef WIN32 #define TLSVAR DWORD #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k)) #define TLSFREE(k) (!TlsFree(k)) #define TLSGET(k) TlsGetValue(k) #define TLSSET(k, a) (!TlsSetValue(k, a)) #ifdef DEBUG static LPVOID ChkedTlsGetValue(DWORD idx) { LPVOID ret=TlsGetValue(idx); assert(S_OK==GetLastError()); return ret; } #undef TLSGET #define TLSGET(k) ChkedTlsGetValue(k) #endif #else #define TLSVAR pthread_key_t #define TLSALLOC(k) pthread_key_create(k, 0) #define TLSFREE(k) pthread_key_delete(k) #define TLSGET(k) pthread_getspecific(k) #define TLSSET(k, a) pthread_setspecific(k, a) #endif #if 0 /* Only enable if testing with valgrind. Causes misoperation */ #define mspace_malloc(p, s) malloc(s) #define mspace_realloc(p, m, s) realloc(m, s) #define mspace_calloc(p, n, s) calloc(n, s) #define mspace_free(p, m) free(m) #endif #if defined(__cplusplus) #if !defined(NO_NED_NAMESPACE) namespace nedalloc { #else extern "C" { #endif #endif size_t nedblksize(void *mem) THROWSPEC { #if 0 /* Only enable if testing with valgrind. Causes misoperation */ return THREADCACHEMAX; #else if(mem) { mchunkptr p=mem2chunk(mem); assert(cinuse(p)); /* If this fails, someone tried to free a block twice */ if(cinuse(p)) return chunksize(p)-overhead_for(p); } return 0; #endif } void nedsetvalue(void *v) THROWSPEC { nedpsetvalue(0, v); } void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc(0, size); } void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc(0, no, size); } void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc(0, mem, size); } void nedfree(void *mem) THROWSPEC { nedpfree(0, mem); } void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); } #if !NO_MALLINFO struct mallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo(0); } #endif int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt(0, parno, value); } int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim(0, pad); } void nedmalloc_stats() THROWSPEC { nedpmalloc_stats(0); } size_t nedmalloc_footprint() THROWSPEC { return nedpmalloc_footprint(0); } void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc(0, elemsno, elemsize, chunks); } void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc(0, elems, sizes, chunks); } struct threadcacheblk_t; typedef struct threadcacheblk_t threadcacheblk; struct threadcacheblk_t { /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */ #ifdef FULLSANITYCHECKS unsigned int magic; #endif unsigned int lastUsed, size; threadcacheblk *next, *prev; }; typedef struct threadcache_t { #ifdef FULLSANITYCHECKS unsigned int magic1; #endif int mymspace; /* Last mspace entry this thread used */ long threadid; unsigned int mallocs, frees, successes; size_t freeInCache; /* How much free space is stored in this cache */ threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2]; #ifdef FULLSANITYCHECKS unsigned int magic2; #endif } threadcache; struct nedpool_t { MLOCK_T mutex; void *uservalue; int threads; /* Max entries in m to use */ threadcache *caches[THREADCACHEMAXCACHES]; TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */ mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */ }; static nedpool syspool; static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC { /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */ unsigned int topbit, size=(unsigned int)(_size>>4); /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */ #if defined(__GNUC__) topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size); #elif defined(_MSC_VER) && _MSC_VER>=1300 { unsigned long bsrTopBit; _BitScanReverse(&bsrTopBit, size); topbit = bsrTopBit; } #else #if 0 union { unsigned asInt[2]; double asDouble; }; int n; asDouble = (double)size + 0.5; topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023; #else { unsigned int x=size; x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >>16); x = ~x; x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; x = x + (x << 8); x = x + (x << 16); topbit=31 - (x >> 24); } #endif #endif return topbit; } #ifdef FULLSANITYCHECKS static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC { assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1])); if(ptr[0] && ptr[1]) { assert(nedblksize(ptr[0])>=sizeof(threadcacheblk)); assert(nedblksize(ptr[1])>=sizeof(threadcacheblk)); assert(*(unsigned int *) "NEDN"==ptr[0]->magic); assert(*(unsigned int *) "NEDN"==ptr[1]->magic); assert(!ptr[0]->prev); assert(!ptr[1]->next); if(ptr[0]==ptr[1]) { assert(!ptr[0]->next); assert(!ptr[1]->prev); } } } static void tcfullsanitycheck(threadcache *tc) THROWSPEC { threadcacheblk **tcbptr=tc->bins; int n; for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) { threadcacheblk *b, *ob=0; tcsanitycheck(tcbptr); for(b=tcbptr[0]; b; ob=b, b=b->next) { assert(*(unsigned int *) "NEDN"==b->magic); assert(!ob || ob->next==b); assert(!ob || b->prev==ob); } } } #endif static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC { #ifdef FULLSANITYCHECKS tcfullsanitycheck(tc); #endif if(tc->freeInCache) { threadcacheblk **tcbptr=tc->bins; int n; for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) { threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */ /*tcsanitycheck(tcbptr);*/ for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; ) { threadcacheblk *f=*tcb; size_t blksize=f->size; /*nedblksize(f);*/ assert(blksize<=nedblksize(f)); assert(blksize); #ifdef FULLSANITYCHECKS assert(*(unsigned int *) "NEDN"==(*tcb)->magic); #endif *tcb=(*tcb)->prev; if(*tcb) (*tcb)->next=0; else *tcbptr=0; tc->freeInCache-=blksize; assert((long) tc->freeInCache>=0); mspace_free(0, f); /*tcsanitycheck(tcbptr);*/ } } } #ifdef FULLSANITYCHECKS tcfullsanitycheck(tc); #endif } static void DestroyCaches(nedpool *p) THROWSPEC { if(p->caches) { threadcache *tc; int n; for(n=0; ncaches[n])) { tc->frees++; RemoveCacheEntries(p, tc, 0); assert(!tc->freeInCache); tc->mymspace=-1; tc->threadid=0; mspace_free(0, tc); p->caches[n]=0; } } } } static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC { threadcache *tc=0; int n, end; ACQUIRE_LOCK(&p->mutex); for(n=0; ncaches[n]; n++); if(THREADCACHEMAXCACHES==n) { /* List exhausted, so disable for this thread */ RELEASE_LOCK(&p->mutex); return 0; } tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache)); if(!tc) { RELEASE_LOCK(&p->mutex); return 0; } #ifdef FULLSANITYCHECKS tc->magic1=*(unsigned int *)"NEDMALC1"; tc->magic2=*(unsigned int *)"NEDMALC2"; #endif tc->threadid=(long)(size_t)CURRENT_THREAD; for(end=0; p->m[end]; end++); tc->mymspace=tc->threadid % end; RELEASE_LOCK(&p->mutex); if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort(); return tc; } static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC { void *ret=0; unsigned int bestsize; unsigned int idx=size2binidx(*size); size_t blksize=0; threadcacheblk *blk, **binsptr; #ifdef FULLSANITYCHECKS tcfullsanitycheck(tc); #endif /* Calculate best fit bin size */ bestsize=1<<(idx+4); #if 0 /* Finer grained bin fit */ idx<<=1; if(*size>bestsize) { idx++; bestsize+=bestsize>>1; } if(*size>bestsize) { idx++; bestsize=1<<(4+(idx>>1)); } #else if(*size>bestsize) { idx++; bestsize<<=1; } #endif assert(bestsize>=*size); if(*sizebins[idx*2]; /* Try to match close, but move up a bin if necessary */ blk=*binsptr; if(!blk || blk->size<*size) { /* Bump it up a bin */ if(idxsize; /*nedblksize(blk);*/ assert(nedblksize(blk)>=blksize); assert(blksize>=*size); if(blk->next) blk->next->prev=0; *binsptr=blk->next; if(!*binsptr) binsptr[1]=0; #ifdef FULLSANITYCHECKS blk->magic=0; #endif assert(binsptr[0]!=blk && binsptr[1]!=blk); assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD); /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/ ret=(void *) blk; } ++tc->mallocs; if(ret) { assert(blksize>=*size); ++tc->successes; tc->freeInCache-=blksize; assert((long) tc->freeInCache>=0); } #if defined(DEBUG) && 0 if(!(tc->mallocs & 0xfff)) { printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs, (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache); } #endif #ifdef FULLSANITYCHECKS tcfullsanitycheck(tc); #endif return ret; } static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC { unsigned int age=THREADCACHEMAXFREESPACE/8192; /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/ while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE) { RemoveCacheEntries(p, tc, age); /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/ age>>=1; } /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/ } static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC { unsigned int bestsize; unsigned int idx=size2binidx(size); threadcacheblk **binsptr, *tck=(threadcacheblk *) mem; assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD); #ifdef DEBUG { /* Make sure this is a valid memory block */ mchunkptr p = mem2chunk(mem); mstate fm = get_mstate_for(p); if (!ok_magic(fm)) { USAGE_ERROR_ACTION(fm, p); return; } } #endif #ifdef FULLSANITYCHECKS tcfullsanitycheck(tc); #endif /* Calculate best fit bin size */ bestsize=1<<(idx+4); #if 0 /* Finer grained bin fit */ idx<<=1; if(size>bestsize) { unsigned int biggerbestsize=bestsize+bestsize<<1; if(size>=biggerbestsize) { idx++; bestsize=biggerbestsize; } } #endif if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */ size=bestsize; binsptr=&tc->bins[idx*2]; assert(idx<=THREADCACHEMAXBINS); if(tck==*binsptr) { fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", tck); abort(); } #ifdef FULLSANITYCHECKS tck->magic=*(unsigned int *) "NEDN"; #endif tck->lastUsed=++tc->frees; tck->size=(unsigned int) size; tck->next=*binsptr; tck->prev=0; if(tck->next) tck->next->prev=tck; else binsptr[1]=tck; assert(!*binsptr || (*binsptr)->size==tck->size); *binsptr=tck; assert(tck==tc->bins[idx*2]); assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck); /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/ tc->freeInCache+=size; #ifdef FULLSANITYCHECKS tcfullsanitycheck(tc); #endif #if 1 if(tc->freeInCache>=THREADCACHEMAXFREESPACE) ReleaseFreeInCache(p, tc, mymspace); #endif } static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC { /* threads is -1 for system pool */ ensure_initialization(); ACQUIRE_MALLOC_GLOBAL_LOCK(); if(p->threads) goto done; if(INITIAL_LOCK(&p->mutex)) goto err; if(TLSALLOC(&p->mycache)) goto err; if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err; p->m[0]->extp=p; p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads; done: RELEASE_MALLOC_GLOBAL_LOCK(); return 1; err: if(threads<0) abort(); /* If you can't allocate for system pool, we're screwed */ DestroyCaches(p); if(p->m[0]) { destroy_mspace(p->m[0]); p->m[0]=0; } if(p->mycache) { if(TLSFREE(p->mycache)) abort(); p->mycache=0; } RELEASE_MALLOC_GLOBAL_LOCK(); return 0; } static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC { /* Gets called when thread's last used mspace is in use. The strategy is to run through the list of all available mspaces looking for an unlocked one and if we fail, we create a new one so long as we don't exceed p->threads */ int n, end; for(n=end=*lastUsed+1; p->m[n]; end=++n) { if(TRY_LOCK(&p->m[n]->mutex)) goto found; } for(n=0; n<*lastUsed && p->m[n]; n++) { if(TRY_LOCK(&p->m[n]->mutex)) goto found; } if(endthreads) { mstate temp; if(!(temp=(mstate) create_mspace(size, 1))) goto badexit; /* Now we're ready to modify the lists, we lock */ ACQUIRE_LOCK(&p->mutex); while(p->m[end] && endthreads) end++; if(end>=p->threads) { /* Drat, must destroy it now */ RELEASE_LOCK(&p->mutex); destroy_mspace((mspace) temp); goto badexit; } /* We really want to make sure this goes into memory now but we have to be careful of breaking aliasing rules, so write it twice */ *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp; ACQUIRE_LOCK(&p->m[end]->mutex); /*printf("Created mspace idx %d\n", end);*/ RELEASE_LOCK(&p->mutex); n=end; goto found; } /* Let it lock on the last one it used */ badexit: ACQUIRE_LOCK(&p->m[*lastUsed]->mutex); return p->m[*lastUsed]; found: *lastUsed=n; if(tc) tc->mymspace=n; else { if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort(); } return p->m[n]; } nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC { nedpool *ret; if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0; if(!InitPool(ret, capacity, threads)) { nedpfree(0, ret); return 0; } return ret; } void neddestroypool(nedpool *p) THROWSPEC { int n; ACQUIRE_LOCK(&p->mutex); DestroyCaches(p); for(n=0; p->m[n]; n++) { destroy_mspace(p->m[n]); p->m[n]=0; } RELEASE_LOCK(&p->mutex); if(TLSFREE(p->mycache)) abort(); nedpfree(0, p); } void nedpsetvalue(nedpool *p, void *v) THROWSPEC { if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } p->uservalue=v; } void *nedgetvalue(nedpool **p, void *mem) THROWSPEC { nedpool *np=0; mchunkptr mcp=mem2chunk(mem); mstate fm; if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0; if(!cinuse(mcp)) return 0; if(!next_pinuse(mcp)) return 0; if(!is_mmapped(mcp) && !pinuse(mcp)) { if(next_chunk(prev_chunk(mcp))!=mcp) return 0; } fm=get_mstate_for(mcp); if(!ok_magic(fm)) return 0; if(!ok_address(fm, mcp)) return 0; if(!fm->extp) return 0; np=(nedpool *) fm->extp; if(p) *p=np; return np->uservalue; } void neddisablethreadcache(nedpool *p) THROWSPEC { int mycache; if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } mycache=(int)(size_t) TLSGET(p->mycache); if(!mycache) { /* Set to mspace 0 */ if(TLSSET(p->mycache, (void *)-1)) abort(); } else if(mycache>0) { /* Set to last used mspace */ threadcache *tc=p->caches[mycache-1]; #if defined(DEBUG) printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n", 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs); #endif if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort(); tc->frees++; RemoveCacheEntries(p, tc, 0); assert(!tc->freeInCache); tc->mymspace=-1; tc->threadid=0; mspace_free(0, p->caches[mycache-1]); p->caches[mycache-1]=0; } } #define GETMSPACE(m,p,tc,ms,s,action) \ do \ { \ mstate m = GetMSpace((p),(tc),(ms),(s)); \ action; \ RELEASE_LOCK(&m->mutex); \ } while (0) static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC { /* Returns a locked and ready for use mspace */ mstate m=p->m[mymspace]; assert(m); if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\ /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/ return m; } static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC { int mycache; if(size && *sizemycache); if(mycache>0) { *tc=(*p)->caches[mycache-1]; *mymspace=(*tc)->mymspace; } else if(!mycache) { *tc=AllocCache(*p); if(!*tc) { /* Disable */ if(TLSSET((*p)->mycache, (void *)-1)) abort(); *mymspace=0; } else *mymspace=(*tc)->mymspace; } else { *tc=0; *mymspace=-mycache-1; } assert(*mymspace>=0); assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid); #ifdef FULLSANITYCHECKS if(*tc) { if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2) { abort(); } } #endif } void * nedpmalloc(nedpool *p, size_t size) THROWSPEC { void *ret=0; threadcache *tc; int mymspace; GetThreadCache(&p, &tc, &mymspace, &size); #if THREADCACHEMAX if(tc && size<=THREADCACHEMAX) { /* Use the thread cache */ ret=threadcache_malloc(p, tc, &size); } #endif if(!ret) { /* Use this thread's mspace */ GETMSPACE(m, p, tc, mymspace, size, ret=mspace_malloc(m, size)); } return ret; } void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC { size_t rsize=size*no; void *ret=0; threadcache *tc; int mymspace; GetThreadCache(&p, &tc, &mymspace, &rsize); #if THREADCACHEMAX if(tc && rsize<=THREADCACHEMAX) { /* Use the thread cache */ if((ret=threadcache_malloc(p, tc, &rsize))) memset(ret, 0, rsize); } #endif if(!ret) { /* Use this thread's mspace */ GETMSPACE(m, p, tc, mymspace, rsize, ret=mspace_calloc(m, 1, rsize)); } return ret; } void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC { void *ret=0; threadcache *tc; int mymspace; if(!mem) return nedpmalloc(p, size); GetThreadCache(&p, &tc, &mymspace, &size); #if THREADCACHEMAX if(tc && size && size<=THREADCACHEMAX) { /* Use the thread cache */ size_t memsize=nedblksize(mem); assert(memsize); if((ret=threadcache_malloc(p, tc, &size))) { memcpy(ret, mem, memsizem[n]; n++) { struct mallinfo t=mspace_mallinfo(p->m[n]); ret.arena+=t.arena; ret.ordblks+=t.ordblks; ret.hblkhd+=t.hblkhd; ret.usmblks+=t.usmblks; ret.uordblks+=t.uordblks; ret.fordblks+=t.fordblks; ret.keepcost+=t.keepcost; } return ret; } #endif int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC { return mspace_mallopt(parno, value); } int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC { int n, ret=0; if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } for(n=0; p->m[n]; n++) { ret+=mspace_trim(p->m[n], pad); } return ret; } void nedpmalloc_stats(nedpool *p) THROWSPEC { int n; if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } for(n=0; p->m[n]; n++) { mspace_malloc_stats(p->m[n]); } } size_t nedpmalloc_footprint(nedpool *p) THROWSPEC { size_t ret=0; int n; if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } for(n=0; p->m[n]; n++) { ret+=mspace_footprint(p->m[n]); } return ret; } void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { void **ret; threadcache *tc; int mymspace; GetThreadCache(&p, &tc, &mymspace, &elemsize); GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, ret=mspace_independent_calloc(m, elemsno, elemsize, chunks)); return ret; } void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC { void **ret; threadcache *tc; int mymspace; size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t)); if(!adjustedsizes) return 0; for(i=0; i /* for size_t */ #ifndef EXTSPEC #define EXTSPEC extern #endif #if defined(_MSC_VER) && _MSC_VER>=1400 #define MALLOCATTR __declspec(restrict) #endif #ifdef __GNUC__ #define MALLOCATTR __attribute__ ((malloc)) #endif #ifndef MALLOCATTR #define MALLOCATTR #endif #ifdef REPLACE_SYSTEM_ALLOCATOR #define nedmalloc malloc #define nedcalloc calloc #define nedrealloc realloc #define nedfree free #define nedmemalign memalign #define nedmallinfo mallinfo #define nedmallopt mallopt #define nedmalloc_trim malloc_trim #define nedmalloc_stats malloc_stats #define nedmalloc_footprint malloc_footprint #define nedindependent_calloc independent_calloc #define nedindependent_comalloc independent_comalloc #ifdef _MSC_VER #define nedblksize _msize #endif #endif #ifndef NO_MALLINFO #define NO_MALLINFO 0 #endif #if !NO_MALLINFO struct mallinfo; #endif #if defined(__cplusplus) #if !defined(NO_NED_NAMESPACE) namespace nedalloc { #else extern "C" { #endif #define THROWSPEC throw() #else #define THROWSPEC #endif /* These are the global functions */ /* Gets the usable size of an allocated block. Note this will always be bigger than what was asked for due to rounding etc. */ EXTSPEC size_t nedblksize(void *mem) THROWSPEC; EXTSPEC void nedsetvalue(void *v) THROWSPEC; EXTSPEC MALLOCATTR void * nedmalloc(size_t size) THROWSPEC; EXTSPEC MALLOCATTR void * nedcalloc(size_t no, size_t size) THROWSPEC; EXTSPEC MALLOCATTR void * nedrealloc(void *mem, size_t size) THROWSPEC; EXTSPEC void nedfree(void *mem) THROWSPEC; EXTSPEC MALLOCATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC; #if !NO_MALLINFO EXTSPEC struct mallinfo nedmallinfo(void) THROWSPEC; #endif EXTSPEC int nedmallopt(int parno, int value) THROWSPEC; EXTSPEC int nedmalloc_trim(size_t pad) THROWSPEC; EXTSPEC void nedmalloc_stats(void) THROWSPEC; EXTSPEC size_t nedmalloc_footprint(void) THROWSPEC; EXTSPEC MALLOCATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; EXTSPEC MALLOCATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC; /* These are the pool functions */ struct nedpool_t; typedef struct nedpool_t nedpool; /* Creates a memory pool for use with the nedp* functions below. Capacity is how much to allocate immediately (if you know you'll be allocating a lot of memory very soon) which you can leave at zero. Threads specifies how many threads will *normally* be accessing the pool concurrently. Setting this to zero means it extends on demand, but be careful of this as it can rapidly consume system resources where bursts of concurrent threads use a pool at once. */ EXTSPEC MALLOCATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC; /* Destroys a memory pool previously created by nedcreatepool(). */ EXTSPEC void neddestroypool(nedpool *p) THROWSPEC; /* Sets a value to be associated with a pool. You can retrieve this value by passing any memory block allocated from that pool. */ EXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC; /* Gets a previously set value using nedpsetvalue() or zero if memory is unknown. Optionally can also retrieve pool. */ EXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC; /* Disables the thread cache for the calling thread, returning any existing cache data to the central pool. */ EXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC; EXTSPEC MALLOCATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC; EXTSPEC MALLOCATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC; EXTSPEC MALLOCATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC; EXTSPEC void nedpfree(nedpool *p, void *mem) THROWSPEC; EXTSPEC MALLOCATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC; #if !NO_MALLINFO EXTSPEC struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC; #endif EXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC; EXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC; EXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC; EXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC; EXTSPEC MALLOCATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; EXTSPEC MALLOCATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC; #if defined(__cplusplus) } #endif #undef MALLOCATTR #undef EXTSPEC #endif LucenePlusPlus-rel_3.0.4/src/core/util/unicode/000077500000000000000000000000001217574114600214355ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/core/util/unicode/gunichartables.h000066400000000000000000030616731217574114600246210ustar00rootroot00000000000000/* This file is automatically generated. DO NOT EDIT! Instead, edit gen-unicode-tables.pl and re-run. */ #ifndef CHARTABLES_H #define CHARTABLES_H #define G_UNICODE_DATA_VERSION "5.1.0" #define G_UNICODE_LAST_CHAR 0x10ffff #define G_UNICODE_MAX_TABLE_INDEX 10000 #define G_UNICODE_LAST_CHAR_PART1 0x2FAFF #define G_UNICODE_LAST_PAGE_PART1 762 static const char type_data[][256] = { { /* page 0, index 0 */ G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_FORMAT, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 1, index 1 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 2, index 2 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL }, { /* page 3, index 3 */ G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER }, { /* page 4, index 4 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 5, index 5 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 6, index 6 */ G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_FORMAT, G_UNICODE_ENCLOSING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER }, { /* page 7, index 7 */ G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 9, index 8 */ G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 10, index 9 */ G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 11, index 10 */ G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 12, index 11 */ G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 13, index 12 */ G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 14, index 13 */ G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 15, index 14 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 16, index 15 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 17, index 16 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 18, index 17 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 19, index 18 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 20, index 19 */ G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 22, index 20 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 23, index 21 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 24, index 22 */ G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 25, index 23 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL }, { /* page 26, index 24 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 27, index 25 */ G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 28, index 26 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 29, index 27 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK }, { /* page 30, index 28 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 31, index 29 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UNASSIGNED }, { /* page 32, index 30 */ G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_LINE_SEPARATOR, G_UNICODE_PARAGRAPH_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_OTHER_NUMBER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 33, index 31 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL }, { /* page 35, index 32 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 36, index 33 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER }, { /* page 37, index 34 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL }, { /* page 38, index 35 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 39, index 36 */ G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL }, { /* page 41, index 37 */ G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL }, { /* page 43, index 38 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 44, index 39 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION }, { /* page 45, index 40 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK }, { /* page 46, index 41 */ G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 47, index 42 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 48, index 43 */ G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 49, index 44 */ G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 50, index 45 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED }, { /* page 77, index 46 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL }, { /* page 159, index 47 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 160, index 48 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 164, index 49 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 166, index 50 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 167, index 51 */ G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 168, index 52 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 169, index 53 */ G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 170, index 54 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 215, index 55 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 250, index 56 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 251, index 57 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 253, index 58 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 254, index 59 */ G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT }, { /* page 255, index 60 */ G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 256, index 61 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 257, index 62 */ G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 258, index 63 */ G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 259, index 64 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 260, index 65 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 264, index 66 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 265, index 67 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 266, index 68 */ G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 291, index 69 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 292, index 70 */ G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 464, index 71 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 465, index 72 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 466, index 73 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 467, index 74 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 468, index 75 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 469, index 76 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 470, index 77 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 471, index 78 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER }, { /* page 496, index 79 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 678, index 80 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 762, index 81 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 3584, index 82 */ G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 3585, index 83 */ G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 4095, index 84 */ G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 4351, index 85 */ G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED } }; /* U+0000 through U+2FAFF */ static const int16_t type_table_part1[763] = { 0 /* page 0 */, 1 /* page 1 */, 2 /* page 2 */, 3 /* page 3 */, 4 /* page 4 */, 5 /* page 5 */, 6 /* page 6 */, 7 /* page 7 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, 8 /* page 9 */, 9 /* page 10 */, 10 /* page 11 */, 11 /* page 12 */, 12 /* page 13 */, 13 /* page 14 */, 14 /* page 15 */, 15 /* page 16 */, 16 /* page 17 */, 17 /* page 18 */, 18 /* page 19 */, 19 /* page 20 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 20 /* page 22 */, 21 /* page 23 */, 22 /* page 24 */, 23 /* page 25 */, 24 /* page 26 */, 25 /* page 27 */, 26 /* page 28 */, 27 /* page 29 */, 28 /* page 30 */, 29 /* page 31 */, 30 /* page 32 */, 31 /* page 33 */, G_UNICODE_MATH_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, 32 /* page 35 */, 33 /* page 36 */, 34 /* page 37 */, 35 /* page 38 */, 36 /* page 39 */, G_UNICODE_OTHER_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, 37 /* page 41 */, G_UNICODE_MATH_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, 38 /* page 43 */, 39 /* page 44 */, 40 /* page 45 */, 41 /* page 46 */, 42 /* page 47 */, 43 /* page 48 */, 44 /* page 49 */, 45 /* page 50 */, G_UNICODE_OTHER_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 46 /* page 77 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 47 /* page 159 */, 48 /* page 160 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 49 /* page 164 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 50 /* page 166 */, 51 /* page 167 */, 52 /* page 168 */, 53 /* page 169 */, 54 /* page 170 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 55 /* page 215 */, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 56 /* page 250 */, 57 /* page 251 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 58 /* page 253 */, 59 /* page 254 */, 60 /* page 255 */, 61 /* page 256 */, 62 /* page 257 */, 63 /* page 258 */, 64 /* page 259 */, 65 /* page 260 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, 66 /* page 264 */, 67 /* page 265 */, 68 /* page 266 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 69 /* page 291 */, 70 /* page 292 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, 71 /* page 464 */, 72 /* page 465 */, 73 /* page 466 */, 74 /* page 467 */, 75 /* page 468 */, 76 /* page 469 */, 77 /* page 470 */, 78 /* page 471 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, 79 /* page 496 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 80 /* page 678 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 81 /* page 762 */ }; /* U+E0000 through U+10FFFF */ static const int16_t type_table_part2[768] = { 82 /* page 3584 */, 83 /* page 3585 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, 84 /* page 4095 */, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, 85 /* page 4351 */ }; static const gunichar attr_data[][256] = { { /* page 0, index 0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x039c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0000, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x1000000, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0000, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178 }, { /* page 1, index 1 */ 0x0101, 0x0100, 0x0103, 0x0102, 0x0105, 0x0104, 0x0107, 0x0106, 0x0109, 0x0108, 0x010b, 0x010a, 0x010d, 0x010c, 0x010f, 0x010e, 0x0111, 0x0110, 0x0113, 0x0112, 0x0115, 0x0114, 0x0117, 0x0116, 0x0119, 0x0118, 0x011b, 0x011a, 0x011d, 0x011c, 0x011f, 0x011e, 0x0121, 0x0120, 0x0123, 0x0122, 0x0125, 0x0124, 0x0127, 0x0126, 0x0129, 0x0128, 0x012b, 0x012a, 0x012d, 0x012c, 0x012f, 0x012e, 0x1000007, 0x0049, 0x0133, 0x0132, 0x0135, 0x0134, 0x0137, 0x0136, 0x0000, 0x013a, 0x0139, 0x013c, 0x013b, 0x013e, 0x013d, 0x0140, 0x013f, 0x0142, 0x0141, 0x0144, 0x0143, 0x0146, 0x0145, 0x0148, 0x0147, 0x1000086, 0x014b, 0x014a, 0x014d, 0x014c, 0x014f, 0x014e, 0x0151, 0x0150, 0x0153, 0x0152, 0x0155, 0x0154, 0x0157, 0x0156, 0x0159, 0x0158, 0x015b, 0x015a, 0x015d, 0x015c, 0x015f, 0x015e, 0x0161, 0x0160, 0x0163, 0x0162, 0x0165, 0x0164, 0x0167, 0x0166, 0x0169, 0x0168, 0x016b, 0x016a, 0x016d, 0x016c, 0x016f, 0x016e, 0x0171, 0x0170, 0x0173, 0x0172, 0x0175, 0x0174, 0x0177, 0x0176, 0x00ff, 0x017a, 0x0179, 0x017c, 0x017b, 0x017e, 0x017d, 0x0053, 0x0243, 0x0253, 0x0183, 0x0182, 0x0185, 0x0184, 0x0254, 0x0188, 0x0187, 0x0256, 0x0257, 0x018c, 0x018b, 0x0000, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0191, 0x0260, 0x0263, 0x01f6, 0x0269, 0x0268, 0x0199, 0x0198, 0x023d, 0x0000, 0x026f, 0x0272, 0x0220, 0x0275, 0x01a1, 0x01a0, 0x01a3, 0x01a2, 0x01a5, 0x01a4, 0x0280, 0x01a8, 0x01a7, 0x0283, 0x0000, 0x0000, 0x01ad, 0x01ac, 0x0288, 0x01b0, 0x01af, 0x028a, 0x028b, 0x01b4, 0x01b3, 0x01b6, 0x01b5, 0x0292, 0x01b9, 0x01b8, 0x0000, 0x0000, 0x01bd, 0x01bc, 0x0000, 0x01f7, 0x0000, 0x0000, 0x0000, 0x0000, 0x01c6, 0x0000, 0x01c4, 0x01c9, 0x0000, 0x01c7, 0x01cc, 0x0000, 0x01ca, 0x01ce, 0x01cd, 0x01d0, 0x01cf, 0x01d2, 0x01d1, 0x01d4, 0x01d3, 0x01d6, 0x01d5, 0x01d8, 0x01d7, 0x01da, 0x01d9, 0x01dc, 0x01db, 0x018e, 0x01df, 0x01de, 0x01e1, 0x01e0, 0x01e3, 0x01e2, 0x01e5, 0x01e4, 0x01e7, 0x01e6, 0x01e9, 0x01e8, 0x01eb, 0x01ea, 0x01ed, 0x01ec, 0x01ef, 0x01ee, 0x10000ad, 0x01f3, 0x0000, 0x01f1, 0x01f5, 0x01f4, 0x0195, 0x01bf, 0x01f9, 0x01f8, 0x01fb, 0x01fa, 0x01fd, 0x01fc, 0x01ff, 0x01fe }, { /* page 2, index 2 */ 0x0201, 0x0200, 0x0203, 0x0202, 0x0205, 0x0204, 0x0207, 0x0206, 0x0209, 0x0208, 0x020b, 0x020a, 0x020d, 0x020c, 0x020f, 0x020e, 0x0211, 0x0210, 0x0213, 0x0212, 0x0215, 0x0214, 0x0217, 0x0216, 0x0219, 0x0218, 0x021b, 0x021a, 0x021d, 0x021c, 0x021f, 0x021e, 0x019e, 0x0000, 0x0223, 0x0222, 0x0225, 0x0224, 0x0227, 0x0226, 0x0229, 0x0228, 0x022b, 0x022a, 0x022d, 0x022c, 0x022f, 0x022e, 0x0231, 0x0230, 0x0233, 0x0232, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c65, 0x023c, 0x023b, 0x019a, 0x2c66, 0x0000, 0x0000, 0x0242, 0x0241, 0x0180, 0x0289, 0x028c, 0x0247, 0x0246, 0x0249, 0x0248, 0x024b, 0x024a, 0x024d, 0x024c, 0x024f, 0x024e, 0x2c6f, 0x2c6d, 0x0000, 0x0181, 0x0186, 0x0000, 0x0189, 0x018a, 0x0000, 0x018f, 0x0000, 0x0190, 0x0000, 0x0000, 0x0000, 0x0000, 0x0193, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000, 0x0000, 0x0000, 0x0197, 0x0196, 0x0000, 0x2c62, 0x0000, 0x0000, 0x0000, 0x019c, 0x0000, 0x2c6e, 0x019d, 0x0000, 0x0000, 0x019f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c64, 0x0000, 0x0000, 0x01a6, 0x0000, 0x0000, 0x01a9, 0x0000, 0x0000, 0x0000, 0x0000, 0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01b7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 3, index 3 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0371, 0x0370, 0x0373, 0x0372, 0x0000, 0x0000, 0x0377, 0x0376, 0x0000, 0x0000, 0x0000, 0x03fd, 0x03fe, 0x03ff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03ac, 0x0000, 0x03ad, 0x03ae, 0x03af, 0x0000, 0x03cc, 0x0000, 0x03cd, 0x03ce, 0x100008f, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 0x03c0, 0x03c1, 0x0000, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x0386, 0x0388, 0x0389, 0x038a, 0x100009e, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, 0x03a1, 0x03a3, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, 0x038e, 0x038f, 0x03d7, 0x0392, 0x0398, 0x0000, 0x0000, 0x0000, 0x03a6, 0x03a0, 0x03cf, 0x03d9, 0x03d8, 0x03db, 0x03da, 0x03dd, 0x03dc, 0x03df, 0x03de, 0x03e1, 0x03e0, 0x03e3, 0x03e2, 0x03e5, 0x03e4, 0x03e7, 0x03e6, 0x03e9, 0x03e8, 0x03eb, 0x03ea, 0x03ed, 0x03ec, 0x03ef, 0x03ee, 0x039a, 0x03a1, 0x03f9, 0x0000, 0x03b8, 0x0395, 0x0000, 0x03f8, 0x03f7, 0x03f2, 0x03fb, 0x03fa, 0x0000, 0x037b, 0x037c, 0x037d }, { /* page 4, index 4 */ 0x0450, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x045d, 0x045e, 0x045f, 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, 0x0461, 0x0460, 0x0463, 0x0462, 0x0465, 0x0464, 0x0467, 0x0466, 0x0469, 0x0468, 0x046b, 0x046a, 0x046d, 0x046c, 0x046f, 0x046e, 0x0471, 0x0470, 0x0473, 0x0472, 0x0475, 0x0474, 0x0477, 0x0476, 0x0479, 0x0478, 0x047b, 0x047a, 0x047d, 0x047c, 0x047f, 0x047e, 0x0481, 0x0480, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x048b, 0x048a, 0x048d, 0x048c, 0x048f, 0x048e, 0x0491, 0x0490, 0x0493, 0x0492, 0x0495, 0x0494, 0x0497, 0x0496, 0x0499, 0x0498, 0x049b, 0x049a, 0x049d, 0x049c, 0x049f, 0x049e, 0x04a1, 0x04a0, 0x04a3, 0x04a2, 0x04a5, 0x04a4, 0x04a7, 0x04a6, 0x04a9, 0x04a8, 0x04ab, 0x04aa, 0x04ad, 0x04ac, 0x04af, 0x04ae, 0x04b1, 0x04b0, 0x04b3, 0x04b2, 0x04b5, 0x04b4, 0x04b7, 0x04b6, 0x04b9, 0x04b8, 0x04bb, 0x04ba, 0x04bd, 0x04bc, 0x04bf, 0x04be, 0x04cf, 0x04c2, 0x04c1, 0x04c4, 0x04c3, 0x04c6, 0x04c5, 0x04c8, 0x04c7, 0x04ca, 0x04c9, 0x04cc, 0x04cb, 0x04ce, 0x04cd, 0x04c0, 0x04d1, 0x04d0, 0x04d3, 0x04d2, 0x04d5, 0x04d4, 0x04d7, 0x04d6, 0x04d9, 0x04d8, 0x04db, 0x04da, 0x04dd, 0x04dc, 0x04df, 0x04de, 0x04e1, 0x04e0, 0x04e3, 0x04e2, 0x04e5, 0x04e4, 0x04e7, 0x04e6, 0x04e9, 0x04e8, 0x04eb, 0x04ea, 0x04ed, 0x04ec, 0x04ef, 0x04ee, 0x04f1, 0x04f0, 0x04f3, 0x04f2, 0x04f5, 0x04f4, 0x04f7, 0x04f6, 0x04f9, 0x04f8, 0x04fb, 0x04fa, 0x04fd, 0x04fc, 0x04ff, 0x04fe }, { /* page 5, index 5 */ 0x0501, 0x0500, 0x0503, 0x0502, 0x0505, 0x0504, 0x0507, 0x0506, 0x0509, 0x0508, 0x050b, 0x050a, 0x050d, 0x050c, 0x050f, 0x050e, 0x0511, 0x0510, 0x0513, 0x0512, 0x0515, 0x0514, 0x0517, 0x0516, 0x0519, 0x0518, 0x051b, 0x051a, 0x051d, 0x051c, 0x051f, 0x051e, 0x0521, 0x0520, 0x0523, 0x0522, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, 0x0566, 0x0567, 0x0568, 0x0569, 0x056a, 0x056b, 0x056c, 0x056d, 0x056e, 0x056f, 0x0570, 0x0571, 0x0572, 0x0573, 0x0574, 0x0575, 0x0576, 0x0577, 0x0578, 0x0579, 0x057a, 0x057b, 0x057c, 0x057d, 0x057e, 0x057f, 0x0580, 0x0581, 0x0582, 0x0583, 0x0584, 0x0585, 0x0586, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f, 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f, 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x1000044, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 6, index 6 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 7, index 7 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 9, index 8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 10, index 9 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 11, index 10 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 12, index 11 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 13, index 12 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 14, index 13 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 15, index 14 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 16, index 15 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2d00, 0x2d01, 0x2d02, 0x2d03, 0x2d04, 0x2d05, 0x2d06, 0x2d07, 0x2d08, 0x2d09, 0x2d0a, 0x2d0b, 0x2d0c, 0x2d0d, 0x2d0e, 0x2d0f, 0x2d10, 0x2d11, 0x2d12, 0x2d13, 0x2d14, 0x2d15, 0x2d16, 0x2d17, 0x2d18, 0x2d19, 0x2d1a, 0x2d1b, 0x2d1c, 0x2d1d, 0x2d1e, 0x2d1f, 0x2d20, 0x2d21, 0x2d22, 0x2d23, 0x2d24, 0x2d25, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 23, index 16 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 24, index 17 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 25, index 18 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 27, index 19 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 28, index 20 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 29, index 21 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa77d, 0x0000, 0x0000, 0x0000, 0x2c63, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 30, index 22 */ 0x1e01, 0x1e00, 0x1e03, 0x1e02, 0x1e05, 0x1e04, 0x1e07, 0x1e06, 0x1e09, 0x1e08, 0x1e0b, 0x1e0a, 0x1e0d, 0x1e0c, 0x1e0f, 0x1e0e, 0x1e11, 0x1e10, 0x1e13, 0x1e12, 0x1e15, 0x1e14, 0x1e17, 0x1e16, 0x1e19, 0x1e18, 0x1e1b, 0x1e1a, 0x1e1d, 0x1e1c, 0x1e1f, 0x1e1e, 0x1e21, 0x1e20, 0x1e23, 0x1e22, 0x1e25, 0x1e24, 0x1e27, 0x1e26, 0x1e29, 0x1e28, 0x1e2b, 0x1e2a, 0x1e2d, 0x1e2c, 0x1e2f, 0x1e2e, 0x1e31, 0x1e30, 0x1e33, 0x1e32, 0x1e35, 0x1e34, 0x1e37, 0x1e36, 0x1e39, 0x1e38, 0x1e3b, 0x1e3a, 0x1e3d, 0x1e3c, 0x1e3f, 0x1e3e, 0x1e41, 0x1e40, 0x1e43, 0x1e42, 0x1e45, 0x1e44, 0x1e47, 0x1e46, 0x1e49, 0x1e48, 0x1e4b, 0x1e4a, 0x1e4d, 0x1e4c, 0x1e4f, 0x1e4e, 0x1e51, 0x1e50, 0x1e53, 0x1e52, 0x1e55, 0x1e54, 0x1e57, 0x1e56, 0x1e59, 0x1e58, 0x1e5b, 0x1e5a, 0x1e5d, 0x1e5c, 0x1e5f, 0x1e5e, 0x1e61, 0x1e60, 0x1e63, 0x1e62, 0x1e65, 0x1e64, 0x1e67, 0x1e66, 0x1e69, 0x1e68, 0x1e6b, 0x1e6a, 0x1e6d, 0x1e6c, 0x1e6f, 0x1e6e, 0x1e71, 0x1e70, 0x1e73, 0x1e72, 0x1e75, 0x1e74, 0x1e77, 0x1e76, 0x1e79, 0x1e78, 0x1e7b, 0x1e7a, 0x1e7d, 0x1e7c, 0x1e7f, 0x1e7e, 0x1e81, 0x1e80, 0x1e83, 0x1e82, 0x1e85, 0x1e84, 0x1e87, 0x1e86, 0x1e89, 0x1e88, 0x1e8b, 0x1e8a, 0x1e8d, 0x1e8c, 0x1e8f, 0x1e8e, 0x1e91, 0x1e90, 0x1e93, 0x1e92, 0x1e95, 0x1e94, 0x10000b6, 0x10000bf, 0x10000c8, 0x10000d1, 0x10000da, 0x1e60, 0x0000, 0x0000, 0x00df, 0x0000, 0x1ea1, 0x1ea0, 0x1ea3, 0x1ea2, 0x1ea5, 0x1ea4, 0x1ea7, 0x1ea6, 0x1ea9, 0x1ea8, 0x1eab, 0x1eaa, 0x1ead, 0x1eac, 0x1eaf, 0x1eae, 0x1eb1, 0x1eb0, 0x1eb3, 0x1eb2, 0x1eb5, 0x1eb4, 0x1eb7, 0x1eb6, 0x1eb9, 0x1eb8, 0x1ebb, 0x1eba, 0x1ebd, 0x1ebc, 0x1ebf, 0x1ebe, 0x1ec1, 0x1ec0, 0x1ec3, 0x1ec2, 0x1ec5, 0x1ec4, 0x1ec7, 0x1ec6, 0x1ec9, 0x1ec8, 0x1ecb, 0x1eca, 0x1ecd, 0x1ecc, 0x1ecf, 0x1ece, 0x1ed1, 0x1ed0, 0x1ed3, 0x1ed2, 0x1ed5, 0x1ed4, 0x1ed7, 0x1ed6, 0x1ed9, 0x1ed8, 0x1edb, 0x1eda, 0x1edd, 0x1edc, 0x1edf, 0x1ede, 0x1ee1, 0x1ee0, 0x1ee3, 0x1ee2, 0x1ee5, 0x1ee4, 0x1ee7, 0x1ee6, 0x1ee9, 0x1ee8, 0x1eeb, 0x1eea, 0x1eed, 0x1eec, 0x1eef, 0x1eee, 0x1ef1, 0x1ef0, 0x1ef3, 0x1ef2, 0x1ef5, 0x1ef4, 0x1ef7, 0x1ef6, 0x1ef9, 0x1ef8, 0x1efb, 0x1efa, 0x1efd, 0x1efc, 0x1eff, 0x1efe }, { /* page 31, index 23 */ 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, 0x1f00, 0x1f01, 0x1f02, 0x1f03, 0x1f04, 0x1f05, 0x1f06, 0x1f07, 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, 0x1f1c, 0x1f1d, 0x0000, 0x0000, 0x1f10, 0x1f11, 0x1f12, 0x1f13, 0x1f14, 0x1f15, 0x0000, 0x0000, 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, 0x1f20, 0x1f21, 0x1f22, 0x1f23, 0x1f24, 0x1f25, 0x1f26, 0x1f27, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f, 0x1f30, 0x1f31, 0x1f32, 0x1f33, 0x1f34, 0x1f35, 0x1f36, 0x1f37, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x0000, 0x0000, 0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x0000, 0x0000, 0x10000e3, 0x1f59, 0x10000ee, 0x1f5b, 0x10000fd, 0x1f5d, 0x100010c, 0x1f5f, 0x0000, 0x1f51, 0x0000, 0x1f53, 0x0000, 0x1f55, 0x0000, 0x1f57, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1f60, 0x1f61, 0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, 0x1ffa, 0x1ffb, 0x0000, 0x0000, 0x10001b7, 0x10001c4, 0x10001d1, 0x10001de, 0x10001eb, 0x10001f8, 0x1000205, 0x1000212, 0x100021f, 0x1000229, 0x1000233, 0x100023d, 0x1000247, 0x1000251, 0x100025b, 0x1000265, 0x100026f, 0x100027c, 0x1000289, 0x1000296, 0x10002a3, 0x10002b0, 0x10002bd, 0x10002ca, 0x10002d7, 0x10002e1, 0x10002eb, 0x10002f5, 0x10002ff, 0x1000309, 0x1000313, 0x100031d, 0x1000327, 0x1000334, 0x1000341, 0x100034e, 0x100035b, 0x1000368, 0x1000375, 0x1000382, 0x100038f, 0x1000399, 0x10003a3, 0x10003ad, 0x10003b7, 0x10003c1, 0x10003cb, 0x10003d5, 0x1fb8, 0x1fb9, 0x100041e, 0x10003df, 0x100042b, 0x0000, 0x100011b, 0x1000466, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x10003eb, 0x0000, 0x0399, 0x0000, 0x0000, 0x0000, 0x1000436, 0x10003f4, 0x1000443, 0x0000, 0x1000126, 0x1000475, 0x1f72, 0x1f73, 0x1f74, 0x1f75, 0x1000400, 0x0000, 0x0000, 0x0000, 0x1fd8, 0x1fd9, 0x1000131, 0x1000140, 0x0000, 0x0000, 0x100014f, 0x100015a, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x0000, 0x0000, 0x0000, 0x0000, 0x1fe8, 0x1fe9, 0x1000169, 0x1000178, 0x1000187, 0x1fec, 0x1000192, 0x100019d, 0x1fe0, 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x100044e, 0x1000409, 0x100045b, 0x0000, 0x10001ac, 0x1000484, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x1000415, 0x0000, 0x0000, 0x0000 }, { /* page 33, index 24 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03c9, 0x0000, 0x0000, 0x0000, 0x006b, 0x00e5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x214e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2132, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2184, 0x2183, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 44, index 25 */ 0x2c30, 0x2c31, 0x2c32, 0x2c33, 0x2c34, 0x2c35, 0x2c36, 0x2c37, 0x2c38, 0x2c39, 0x2c3a, 0x2c3b, 0x2c3c, 0x2c3d, 0x2c3e, 0x2c3f, 0x2c40, 0x2c41, 0x2c42, 0x2c43, 0x2c44, 0x2c45, 0x2c46, 0x2c47, 0x2c48, 0x2c49, 0x2c4a, 0x2c4b, 0x2c4c, 0x2c4d, 0x2c4e, 0x2c4f, 0x2c50, 0x2c51, 0x2c52, 0x2c53, 0x2c54, 0x2c55, 0x2c56, 0x2c57, 0x2c58, 0x2c59, 0x2c5a, 0x2c5b, 0x2c5c, 0x2c5d, 0x2c5e, 0x0000, 0x2c00, 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, 0x2c06, 0x2c07, 0x2c08, 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, 0x2c0f, 0x2c10, 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17, 0x2c18, 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f, 0x2c20, 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27, 0x2c28, 0x2c29, 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x0000, 0x2c61, 0x2c60, 0x026b, 0x1d7d, 0x027d, 0x023a, 0x023e, 0x2c68, 0x2c67, 0x2c6a, 0x2c69, 0x2c6c, 0x2c6b, 0x0251, 0x0271, 0x0250, 0x0000, 0x0000, 0x2c73, 0x2c72, 0x0000, 0x2c76, 0x2c75, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c81, 0x2c80, 0x2c83, 0x2c82, 0x2c85, 0x2c84, 0x2c87, 0x2c86, 0x2c89, 0x2c88, 0x2c8b, 0x2c8a, 0x2c8d, 0x2c8c, 0x2c8f, 0x2c8e, 0x2c91, 0x2c90, 0x2c93, 0x2c92, 0x2c95, 0x2c94, 0x2c97, 0x2c96, 0x2c99, 0x2c98, 0x2c9b, 0x2c9a, 0x2c9d, 0x2c9c, 0x2c9f, 0x2c9e, 0x2ca1, 0x2ca0, 0x2ca3, 0x2ca2, 0x2ca5, 0x2ca4, 0x2ca7, 0x2ca6, 0x2ca9, 0x2ca8, 0x2cab, 0x2caa, 0x2cad, 0x2cac, 0x2caf, 0x2cae, 0x2cb1, 0x2cb0, 0x2cb3, 0x2cb2, 0x2cb5, 0x2cb4, 0x2cb7, 0x2cb6, 0x2cb9, 0x2cb8, 0x2cbb, 0x2cba, 0x2cbd, 0x2cbc, 0x2cbf, 0x2cbe, 0x2cc1, 0x2cc0, 0x2cc3, 0x2cc2, 0x2cc5, 0x2cc4, 0x2cc7, 0x2cc6, 0x2cc9, 0x2cc8, 0x2ccb, 0x2cca, 0x2ccd, 0x2ccc, 0x2ccf, 0x2cce, 0x2cd1, 0x2cd0, 0x2cd3, 0x2cd2, 0x2cd5, 0x2cd4, 0x2cd7, 0x2cd6, 0x2cd9, 0x2cd8, 0x2cdb, 0x2cda, 0x2cdd, 0x2cdc, 0x2cdf, 0x2cde, 0x2ce1, 0x2ce0, 0x2ce3, 0x2ce2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 45, index 26 */ 0x10a0, 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, 0x10a8, 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, 0x10b0, 0x10b1, 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7, 0x10b8, 0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf, 0x10c0, 0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 166, index 27 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa641, 0xa640, 0xa643, 0xa642, 0xa645, 0xa644, 0xa647, 0xa646, 0xa649, 0xa648, 0xa64b, 0xa64a, 0xa64d, 0xa64c, 0xa64f, 0xa64e, 0xa651, 0xa650, 0xa653, 0xa652, 0xa655, 0xa654, 0xa657, 0xa656, 0xa659, 0xa658, 0xa65b, 0xa65a, 0xa65d, 0xa65c, 0xa65f, 0xa65e, 0x0000, 0x0000, 0xa663, 0xa662, 0xa665, 0xa664, 0xa667, 0xa666, 0xa669, 0xa668, 0xa66b, 0xa66a, 0xa66d, 0xa66c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa681, 0xa680, 0xa683, 0xa682, 0xa685, 0xa684, 0xa687, 0xa686, 0xa689, 0xa688, 0xa68b, 0xa68a, 0xa68d, 0xa68c, 0xa68f, 0xa68e, 0xa691, 0xa690, 0xa693, 0xa692, 0xa695, 0xa694, 0xa697, 0xa696, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 167, index 28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa723, 0xa722, 0xa725, 0xa724, 0xa727, 0xa726, 0xa729, 0xa728, 0xa72b, 0xa72a, 0xa72d, 0xa72c, 0xa72f, 0xa72e, 0x0000, 0x0000, 0xa733, 0xa732, 0xa735, 0xa734, 0xa737, 0xa736, 0xa739, 0xa738, 0xa73b, 0xa73a, 0xa73d, 0xa73c, 0xa73f, 0xa73e, 0xa741, 0xa740, 0xa743, 0xa742, 0xa745, 0xa744, 0xa747, 0xa746, 0xa749, 0xa748, 0xa74b, 0xa74a, 0xa74d, 0xa74c, 0xa74f, 0xa74e, 0xa751, 0xa750, 0xa753, 0xa752, 0xa755, 0xa754, 0xa757, 0xa756, 0xa759, 0xa758, 0xa75b, 0xa75a, 0xa75d, 0xa75c, 0xa75f, 0xa75e, 0xa761, 0xa760, 0xa763, 0xa762, 0xa765, 0xa764, 0xa767, 0xa766, 0xa769, 0xa768, 0xa76b, 0xa76a, 0xa76d, 0xa76c, 0xa76f, 0xa76e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa77a, 0xa779, 0xa77c, 0xa77b, 0x1d79, 0xa77f, 0xa77e, 0xa781, 0xa780, 0xa783, 0xa782, 0xa785, 0xa784, 0xa787, 0xa786, 0x0000, 0x0000, 0x0000, 0xa78c, 0xa78b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 168, index 29 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 169, index 30 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 170, index 31 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 251, index 32 */ 0x100000f, 0x1000016, 0x100001d, 0x1000024, 0x100002d, 0x1000036, 0x100003d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x100004f, 0x100005a, 0x1000065, 0x1000070, 0x100007b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 255, index 33 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xff41, 0xff42, 0xff43, 0xff44, 0xff45, 0xff46, 0xff47, 0xff48, 0xff49, 0xff4a, 0xff4b, 0xff4c, 0xff4d, 0xff4e, 0xff4f, 0xff50, 0xff51, 0xff52, 0xff53, 0xff54, 0xff55, 0xff56, 0xff57, 0xff58, 0xff59, 0xff5a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, 0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, 0xff3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 260, index 34 */ 0x10428, 0x10429, 0x1042a, 0x1042b, 0x1042c, 0x1042d, 0x1042e, 0x1042f, 0x10430, 0x10431, 0x10432, 0x10433, 0x10434, 0x10435, 0x10436, 0x10437, 0x10438, 0x10439, 0x1043a, 0x1043b, 0x1043c, 0x1043d, 0x1043e, 0x1043f, 0x10440, 0x10441, 0x10442, 0x10443, 0x10444, 0x10445, 0x10446, 0x10447, 0x10448, 0x10449, 0x1044a, 0x1044b, 0x1044c, 0x1044d, 0x1044e, 0x1044f, 0x10400, 0x10401, 0x10402, 0x10403, 0x10404, 0x10405, 0x10406, 0x10407, 0x10408, 0x10409, 0x1040a, 0x1040b, 0x1040c, 0x1040d, 0x1040e, 0x1040f, 0x10410, 0x10411, 0x10412, 0x10413, 0x10414, 0x10415, 0x10416, 0x10417, 0x10418, 0x10419, 0x1041a, 0x1041b, 0x1041c, 0x1041d, 0x1041e, 0x1041f, 0x10420, 0x10421, 0x10422, 0x10423, 0x10424, 0x10425, 0x10426, 0x10427, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 471, index 35 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009 } }; /* U+0000 through U+2FAFF */ static const int16_t attr_table_part1[763] = { 0 /* page 0 */, 1 /* page 1 */, 2 /* page 2 */, 3 /* page 3 */, 4 /* page 4 */, 5 /* page 5 */, 6 /* page 6 */, 7 /* page 7 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 8 /* page 9 */, 9 /* page 10 */, 10 /* page 11 */, 11 /* page 12 */, 12 /* page 13 */, 13 /* page 14 */, 14 /* page 15 */, 15 /* page 16 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 16 /* page 23 */, 17 /* page 24 */, 18 /* page 25 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 19 /* page 27 */, 20 /* page 28 */, 21 /* page 29 */, 22 /* page 30 */, 23 /* page 31 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 24 /* page 33 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 25 /* page 44 */, 26 /* page 45 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 27 /* page 166 */, 28 /* page 167 */, 29 /* page 168 */, 30 /* page 169 */, 31 /* page 170 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 32 /* page 251 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 33 /* page 255 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 34 /* page 260 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 35 /* page 471 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX }; /* U+E0000 through U+10FFFF */ static const int16_t attr_table_part2[768] = { 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX }; static const gunichar title_table[][3] = { { 0x01c5, 0x01c4, 0x01c6 }, { 0x01c8, 0x01c7, 0x01c9 }, { 0x01cb, 0x01ca, 0x01cc }, { 0x01f2, 0x01f1, 0x01f3 }, { 0x1f88, 0x0000, 0x1f80 }, { 0x1f89, 0x0000, 0x1f81 }, { 0x1f8a, 0x0000, 0x1f82 }, { 0x1f8b, 0x0000, 0x1f83 }, { 0x1f8c, 0x0000, 0x1f84 }, { 0x1f8d, 0x0000, 0x1f85 }, { 0x1f8e, 0x0000, 0x1f86 }, { 0x1f8f, 0x0000, 0x1f87 }, { 0x1f98, 0x0000, 0x1f90 }, { 0x1f99, 0x0000, 0x1f91 }, { 0x1f9a, 0x0000, 0x1f92 }, { 0x1f9b, 0x0000, 0x1f93 }, { 0x1f9c, 0x0000, 0x1f94 }, { 0x1f9d, 0x0000, 0x1f95 }, { 0x1f9e, 0x0000, 0x1f96 }, { 0x1f9f, 0x0000, 0x1f97 }, { 0x1fa8, 0x0000, 0x1fa0 }, { 0x1fa9, 0x0000, 0x1fa1 }, { 0x1faa, 0x0000, 0x1fa2 }, { 0x1fab, 0x0000, 0x1fa3 }, { 0x1fac, 0x0000, 0x1fa4 }, { 0x1fad, 0x0000, 0x1fa5 }, { 0x1fae, 0x0000, 0x1fa6 }, { 0x1faf, 0x0000, 0x1fa7 }, { 0x1fbc, 0x0000, 0x1fb3 }, { 0x1fcc, 0x0000, 0x1fc3 }, { 0x1ffc, 0x0000, 0x1ff3 } }; /* Table of special cases for case conversion; each record contains * First, the best single character mapping to lowercase if Lu, * and to uppercase if Ll, followed by the output mapping for the two cases * other than the case of the codepoint, in the order [Ll],[Lu],[Lt], * encoded in UTF-8, separated and terminated by a null character. */ static const char special_case_table[] = { "\x00\x53\x53\x00\x53\x73\0" /* offset 0 */ "\x69\x69\xcc\x87\x00\xc4\xb0\0" /* offset 7 */ "\x00\x46\x46\x00\x46\x66\0" /* offset 15 */ "\x00\x46\x49\x00\x46\x69\0" /* offset 22 */ "\x00\x46\x4c\x00\x46\x6c\0" /* offset 29 */ "\x00\x46\x46\x49\x00\x46\x66\x69\0" /* offset 36 */ "\x00\x46\x46\x4c\x00\x46\x66\x6c\0" /* offset 45 */ "\x00\x53\x54\x00\x53\x74\0" /* offset 54 */ "\x00\x53\x54\x00\x53\x74\0" /* offset 61 */ "\x00\xd4\xb5\xd5\x92\x00\xd4\xb5\xd6\x82\0" /* offset 68 */ "\x00\xd5\x84\xd5\x86\x00\xd5\x84\xd5\xb6\0" /* offset 79 */ "\x00\xd5\x84\xd4\xb5\x00\xd5\x84\xd5\xa5\0" /* offset 90 */ "\x00\xd5\x84\xd4\xbb\x00\xd5\x84\xd5\xab\0" /* offset 101 */ "\x00\xd5\x8e\xd5\x86\x00\xd5\x8e\xd5\xb6\0" /* offset 112 */ "\x00\xd5\x84\xd4\xbd\x00\xd5\x84\xd5\xad\0" /* offset 123 */ "\x00\xca\xbc\x4e\x00\xca\xbc\x4e\0" /* offset 134 */ "\x00\xce\x99\xcc\x88\xcc\x81\x00\xce\x99\xcc\x88\xcc\x81\0" /* offset 143 */ "\x00\xce\xa5\xcc\x88\xcc\x81\x00\xce\xa5\xcc\x88\xcc\x81\0" /* offset 158 */ "\x00\x4a\xcc\x8c\x00\x4a\xcc\x8c\0" /* offset 173 */ "\x00\x48\xcc\xb1\x00\x48\xcc\xb1\0" /* offset 182 */ "\x00\x54\xcc\x88\x00\x54\xcc\x88\0" /* offset 191 */ "\x00\x57\xcc\x8a\x00\x57\xcc\x8a\0" /* offset 200 */ "\x00\x59\xcc\x8a\x00\x59\xcc\x8a\0" /* offset 209 */ "\x00\x41\xca\xbe\x00\x41\xca\xbe\0" /* offset 218 */ "\x00\xce\xa5\xcc\x93\x00\xce\xa5\xcc\x93\0" /* offset 227 */ "\x00\xce\xa5\xcc\x93\xcc\x80\x00\xce\xa5\xcc\x93\xcc\x80\0" /* offset 238 */ "\x00\xce\xa5\xcc\x93\xcc\x81\x00\xce\xa5\xcc\x93\xcc\x81\0" /* offset 253 */ "\x00\xce\xa5\xcc\x93\xcd\x82\x00\xce\xa5\xcc\x93\xcd\x82\0" /* offset 268 */ "\x00\xce\x91\xcd\x82\x00\xce\x91\xcd\x82\0" /* offset 283 */ "\x00\xce\x97\xcd\x82\x00\xce\x97\xcd\x82\0" /* offset 294 */ "\x00\xce\x99\xcc\x88\xcc\x80\x00\xce\x99\xcc\x88\xcc\x80\0" /* offset 305 */ "\x00\xce\x99\xcc\x88\xcc\x81\x00\xce\x99\xcc\x88\xcc\x81\0" /* offset 320 */ "\x00\xce\x99\xcd\x82\x00\xce\x99\xcd\x82\0" /* offset 335 */ "\x00\xce\x99\xcc\x88\xcd\x82\x00\xce\x99\xcc\x88\xcd\x82\0" /* offset 346 */ "\x00\xce\xa5\xcc\x88\xcc\x80\x00\xce\xa5\xcc\x88\xcc\x80\0" /* offset 361 */ "\x00\xce\xa5\xcc\x88\xcc\x81\x00\xce\xa5\xcc\x88\xcc\x81\0" /* offset 376 */ "\x00\xce\xa1\xcc\x93\x00\xce\xa1\xcc\x93\0" /* offset 391 */ "\x00\xce\xa5\xcd\x82\x00\xce\xa5\xcd\x82\0" /* offset 402 */ "\x00\xce\xa5\xcc\x88\xcd\x82\x00\xce\xa5\xcc\x88\xcd\x82\0" /* offset 413 */ "\x00\xce\xa9\xcd\x82\x00\xce\xa9\xcd\x82\0" /* offset 428 */ "\xe1\xbe\x88\xe1\xbc\x88\xce\x99\x00\xe1\xbe\x88\0" /* offset 439 */ "\xe1\xbe\x89\xe1\xbc\x89\xce\x99\x00\xe1\xbe\x89\0" /* offset 452 */ "\xe1\xbe\x8a\xe1\xbc\x8a\xce\x99\x00\xe1\xbe\x8a\0" /* offset 465 */ "\xe1\xbe\x8b\xe1\xbc\x8b\xce\x99\x00\xe1\xbe\x8b\0" /* offset 478 */ "\xe1\xbe\x8c\xe1\xbc\x8c\xce\x99\x00\xe1\xbe\x8c\0" /* offset 491 */ "\xe1\xbe\x8d\xe1\xbc\x8d\xce\x99\x00\xe1\xbe\x8d\0" /* offset 504 */ "\xe1\xbe\x8e\xe1\xbc\x8e\xce\x99\x00\xe1\xbe\x8e\0" /* offset 517 */ "\xe1\xbe\x8f\xe1\xbc\x8f\xce\x99\x00\xe1\xbe\x8f\0" /* offset 530 */ "\xe1\xbe\x80\x00\xe1\xbc\x88\xce\x99\0" /* offset 543 */ "\xe1\xbe\x81\x00\xe1\xbc\x89\xce\x99\0" /* offset 553 */ "\xe1\xbe\x82\x00\xe1\xbc\x8a\xce\x99\0" /* offset 563 */ "\xe1\xbe\x83\x00\xe1\xbc\x8b\xce\x99\0" /* offset 573 */ "\xe1\xbe\x84\x00\xe1\xbc\x8c\xce\x99\0" /* offset 583 */ "\xe1\xbe\x85\x00\xe1\xbc\x8d\xce\x99\0" /* offset 593 */ "\xe1\xbe\x86\x00\xe1\xbc\x8e\xce\x99\0" /* offset 603 */ "\xe1\xbe\x87\x00\xe1\xbc\x8f\xce\x99\0" /* offset 613 */ "\xe1\xbe\x98\xe1\xbc\xa8\xce\x99\x00\xe1\xbe\x98\0" /* offset 623 */ "\xe1\xbe\x99\xe1\xbc\xa9\xce\x99\x00\xe1\xbe\x99\0" /* offset 636 */ "\xe1\xbe\x9a\xe1\xbc\xaa\xce\x99\x00\xe1\xbe\x9a\0" /* offset 649 */ "\xe1\xbe\x9b\xe1\xbc\xab\xce\x99\x00\xe1\xbe\x9b\0" /* offset 662 */ "\xe1\xbe\x9c\xe1\xbc\xac\xce\x99\x00\xe1\xbe\x9c\0" /* offset 675 */ "\xe1\xbe\x9d\xe1\xbc\xad\xce\x99\x00\xe1\xbe\x9d\0" /* offset 688 */ "\xe1\xbe\x9e\xe1\xbc\xae\xce\x99\x00\xe1\xbe\x9e\0" /* offset 701 */ "\xe1\xbe\x9f\xe1\xbc\xaf\xce\x99\x00\xe1\xbe\x9f\0" /* offset 714 */ "\xe1\xbe\x90\x00\xe1\xbc\xa8\xce\x99\0" /* offset 727 */ "\xe1\xbe\x91\x00\xe1\xbc\xa9\xce\x99\0" /* offset 737 */ "\xe1\xbe\x92\x00\xe1\xbc\xaa\xce\x99\0" /* offset 747 */ "\xe1\xbe\x93\x00\xe1\xbc\xab\xce\x99\0" /* offset 757 */ "\xe1\xbe\x94\x00\xe1\xbc\xac\xce\x99\0" /* offset 767 */ "\xe1\xbe\x95\x00\xe1\xbc\xad\xce\x99\0" /* offset 777 */ "\xe1\xbe\x96\x00\xe1\xbc\xae\xce\x99\0" /* offset 787 */ "\xe1\xbe\x97\x00\xe1\xbc\xaf\xce\x99\0" /* offset 797 */ "\xe1\xbe\xa8\xe1\xbd\xa8\xce\x99\x00\xe1\xbe\xa8\0" /* offset 807 */ "\xe1\xbe\xa9\xe1\xbd\xa9\xce\x99\x00\xe1\xbe\xa9\0" /* offset 820 */ "\xe1\xbe\xaa\xe1\xbd\xaa\xce\x99\x00\xe1\xbe\xaa\0" /* offset 833 */ "\xe1\xbe\xab\xe1\xbd\xab\xce\x99\x00\xe1\xbe\xab\0" /* offset 846 */ "\xe1\xbe\xac\xe1\xbd\xac\xce\x99\x00\xe1\xbe\xac\0" /* offset 859 */ "\xe1\xbe\xad\xe1\xbd\xad\xce\x99\x00\xe1\xbe\xad\0" /* offset 872 */ "\xe1\xbe\xae\xe1\xbd\xae\xce\x99\x00\xe1\xbe\xae\0" /* offset 885 */ "\xe1\xbe\xaf\xe1\xbd\xaf\xce\x99\x00\xe1\xbe\xaf\0" /* offset 898 */ "\xe1\xbe\xa0\x00\xe1\xbd\xa8\xce\x99\0" /* offset 911 */ "\xe1\xbe\xa1\x00\xe1\xbd\xa9\xce\x99\0" /* offset 921 */ "\xe1\xbe\xa2\x00\xe1\xbd\xaa\xce\x99\0" /* offset 931 */ "\xe1\xbe\xa3\x00\xe1\xbd\xab\xce\x99\0" /* offset 941 */ "\xe1\xbe\xa4\x00\xe1\xbd\xac\xce\x99\0" /* offset 951 */ "\xe1\xbe\xa5\x00\xe1\xbd\xad\xce\x99\0" /* offset 961 */ "\xe1\xbe\xa6\x00\xe1\xbd\xae\xce\x99\0" /* offset 971 */ "\xe1\xbe\xa7\x00\xe1\xbd\xaf\xce\x99\0" /* offset 981 */ "\xe1\xbe\xbc\xce\x91\xce\x99\x00\xe1\xbe\xbc\0" /* offset 991 */ "\xe1\xbe\xb3\x00\xce\x91\xce\x99\0" /* offset 1003 */ "\xe1\xbf\x8c\xce\x97\xce\x99\x00\xe1\xbf\x8c\0" /* offset 1012 */ "\xe1\xbf\x83\x00\xce\x97\xce\x99\0" /* offset 1024 */ "\xe1\xbf\xbc\xce\xa9\xce\x99\x00\xe1\xbf\xbc\0" /* offset 1033 */ "\xe1\xbf\xb3\x00\xce\xa9\xce\x99\0" /* offset 1045 */ "\x00\xe1\xbe\xba\xce\x99\x00\xe1\xbe\xba\xcd\x85\0" /* offset 1054 */ "\x00\xce\x86\xce\x99\x00\xce\x86\xcd\x85\0" /* offset 1067 */ "\x00\xe1\xbf\x8a\xce\x99\x00\xe1\xbf\x8a\xcd\x85\0" /* offset 1078 */ "\x00\xce\x89\xce\x99\x00\xce\x89\xcd\x85\0" /* offset 1091 */ "\x00\xe1\xbf\xba\xce\x99\x00\xe1\xbf\xba\xcd\x85\0" /* offset 1102 */ "\x00\xce\x8f\xce\x99\x00\xce\x8f\xcd\x85\0" /* offset 1115 */ "\x00\xce\x91\xcd\x82\xce\x99\x00\xce\x91\xcd\x82\xcd\x85\0" /* offset 1126 */ "\x00\xce\x97\xcd\x82\xce\x99\x00\xce\x97\xcd\x82\xcd\x85\0" /* offset 1141 */ "\x00\xce\xa9\xcd\x82\xce\x99\x00\xce\xa9\xcd\x82\xcd\x85\0" /* offset 1156 */ }; /* Table of casefolding cases that can't be derived by lowercasing */ static const struct { uint16_t ch; char data[7]; } casefold_table[] = { { 0x00b5, "\xce\xbc" }, { 0x00df, "\x73\x73" }, { 0x0130, "\x69\xcc\x87" }, { 0x0149, "\xca\xbc\x6e" }, { 0x017f, "\x73" }, { 0x01f0, "\x6a\xcc\x8c" }, { 0x0345, "\xce\xb9" }, { 0x0390, "\xce\xb9\xcc\x88\xcc\x81" }, { 0x03b0, "\xcf\x85\xcc\x88\xcc\x81" }, { 0x03c2, "\xcf\x83" }, { 0x03d0, "\xce\xb2" }, { 0x03d1, "\xce\xb8" }, { 0x03d5, "\xcf\x86" }, { 0x03d6, "\xcf\x80" }, { 0x03f0, "\xce\xba" }, { 0x03f1, "\xcf\x81" }, { 0x03f5, "\xce\xb5" }, { 0x0587, "\xd5\xa5\xd6\x82" }, { 0x1e96, "\x68\xcc\xb1" }, { 0x1e97, "\x74\xcc\x88" }, { 0x1e98, "\x77\xcc\x8a" }, { 0x1e99, "\x79\xcc\x8a" }, { 0x1e9a, "\x61\xca\xbe" }, { 0x1e9b, "\xe1\xb9\xa1" }, { 0x1e9e, "\x73\x73" }, { 0x1f50, "\xcf\x85\xcc\x93" }, { 0x1f52, "\xcf\x85\xcc\x93\xcc\x80" }, { 0x1f54, "\xcf\x85\xcc\x93\xcc\x81" }, { 0x1f56, "\xcf\x85\xcc\x93\xcd\x82" }, { 0x1f80, "\xe1\xbc\x80\xce\xb9" }, { 0x1f81, "\xe1\xbc\x81\xce\xb9" }, { 0x1f82, "\xe1\xbc\x82\xce\xb9" }, { 0x1f83, "\xe1\xbc\x83\xce\xb9" }, { 0x1f84, "\xe1\xbc\x84\xce\xb9" }, { 0x1f85, "\xe1\xbc\x85\xce\xb9" }, { 0x1f86, "\xe1\xbc\x86\xce\xb9" }, { 0x1f87, "\xe1\xbc\x87\xce\xb9" }, { 0x1f88, "\xe1\xbc\x80\xce\xb9" }, { 0x1f89, "\xe1\xbc\x81\xce\xb9" }, { 0x1f8a, "\xe1\xbc\x82\xce\xb9" }, { 0x1f8b, "\xe1\xbc\x83\xce\xb9" }, { 0x1f8c, "\xe1\xbc\x84\xce\xb9" }, { 0x1f8d, "\xe1\xbc\x85\xce\xb9" }, { 0x1f8e, "\xe1\xbc\x86\xce\xb9" }, { 0x1f8f, "\xe1\xbc\x87\xce\xb9" }, { 0x1f90, "\xe1\xbc\xa0\xce\xb9" }, { 0x1f91, "\xe1\xbc\xa1\xce\xb9" }, { 0x1f92, "\xe1\xbc\xa2\xce\xb9" }, { 0x1f93, "\xe1\xbc\xa3\xce\xb9" }, { 0x1f94, "\xe1\xbc\xa4\xce\xb9" }, { 0x1f95, "\xe1\xbc\xa5\xce\xb9" }, { 0x1f96, "\xe1\xbc\xa6\xce\xb9" }, { 0x1f97, "\xe1\xbc\xa7\xce\xb9" }, { 0x1f98, "\xe1\xbc\xa0\xce\xb9" }, { 0x1f99, "\xe1\xbc\xa1\xce\xb9" }, { 0x1f9a, "\xe1\xbc\xa2\xce\xb9" }, { 0x1f9b, "\xe1\xbc\xa3\xce\xb9" }, { 0x1f9c, "\xe1\xbc\xa4\xce\xb9" }, { 0x1f9d, "\xe1\xbc\xa5\xce\xb9" }, { 0x1f9e, "\xe1\xbc\xa6\xce\xb9" }, { 0x1f9f, "\xe1\xbc\xa7\xce\xb9" }, { 0x1fa0, "\xe1\xbd\xa0\xce\xb9" }, { 0x1fa1, "\xe1\xbd\xa1\xce\xb9" }, { 0x1fa2, "\xe1\xbd\xa2\xce\xb9" }, { 0x1fa3, "\xe1\xbd\xa3\xce\xb9" }, { 0x1fa4, "\xe1\xbd\xa4\xce\xb9" }, { 0x1fa5, "\xe1\xbd\xa5\xce\xb9" }, { 0x1fa6, "\xe1\xbd\xa6\xce\xb9" }, { 0x1fa7, "\xe1\xbd\xa7\xce\xb9" }, { 0x1fa8, "\xe1\xbd\xa0\xce\xb9" }, { 0x1fa9, "\xe1\xbd\xa1\xce\xb9" }, { 0x1faa, "\xe1\xbd\xa2\xce\xb9" }, { 0x1fab, "\xe1\xbd\xa3\xce\xb9" }, { 0x1fac, "\xe1\xbd\xa4\xce\xb9" }, { 0x1fad, "\xe1\xbd\xa5\xce\xb9" }, { 0x1fae, "\xe1\xbd\xa6\xce\xb9" }, { 0x1faf, "\xe1\xbd\xa7\xce\xb9" }, { 0x1fb2, "\xe1\xbd\xb0\xce\xb9" }, { 0x1fb3, "\xce\xb1\xce\xb9" }, { 0x1fb4, "\xce\xac\xce\xb9" }, { 0x1fb6, "\xce\xb1\xcd\x82" }, { 0x1fb7, "\xce\xb1\xcd\x82\xce\xb9" }, { 0x1fbc, "\xce\xb1\xce\xb9" }, { 0x1fbe, "\xce\xb9" }, { 0x1fc2, "\xe1\xbd\xb4\xce\xb9" }, { 0x1fc3, "\xce\xb7\xce\xb9" }, { 0x1fc4, "\xce\xae\xce\xb9" }, { 0x1fc6, "\xce\xb7\xcd\x82" }, { 0x1fc7, "\xce\xb7\xcd\x82\xce\xb9" }, { 0x1fcc, "\xce\xb7\xce\xb9" }, { 0x1fd2, "\xce\xb9\xcc\x88\xcc\x80" }, { 0x1fd3, "\xce\xb9\xcc\x88\xcc\x81" }, { 0x1fd6, "\xce\xb9\xcd\x82" }, { 0x1fd7, "\xce\xb9\xcc\x88\xcd\x82" }, { 0x1fe2, "\xcf\x85\xcc\x88\xcc\x80" }, { 0x1fe3, "\xcf\x85\xcc\x88\xcc\x81" }, { 0x1fe4, "\xcf\x81\xcc\x93" }, { 0x1fe6, "\xcf\x85\xcd\x82" }, { 0x1fe7, "\xcf\x85\xcc\x88\xcd\x82" }, { 0x1ff2, "\xe1\xbd\xbc\xce\xb9" }, { 0x1ff3, "\xcf\x89\xce\xb9" }, { 0x1ff4, "\xcf\x8e\xce\xb9" }, { 0x1ff6, "\xcf\x89\xcd\x82" }, { 0x1ff7, "\xcf\x89\xcd\x82\xce\xb9" }, { 0x1ffc, "\xcf\x89\xce\xb9" }, { 0x2160, "\xe2\x85\xb0" }, { 0x2161, "\xe2\x85\xb1" }, { 0x2162, "\xe2\x85\xb2" }, { 0x2163, "\xe2\x85\xb3" }, { 0x2164, "\xe2\x85\xb4" }, { 0x2165, "\xe2\x85\xb5" }, { 0x2166, "\xe2\x85\xb6" }, { 0x2167, "\xe2\x85\xb7" }, { 0x2168, "\xe2\x85\xb8" }, { 0x2169, "\xe2\x85\xb9" }, { 0x216a, "\xe2\x85\xba" }, { 0x216b, "\xe2\x85\xbb" }, { 0x216c, "\xe2\x85\xbc" }, { 0x216d, "\xe2\x85\xbd" }, { 0x216e, "\xe2\x85\xbe" }, { 0x216f, "\xe2\x85\xbf" }, { 0x24b6, "\xe2\x93\x90" }, { 0x24b7, "\xe2\x93\x91" }, { 0x24b8, "\xe2\x93\x92" }, { 0x24b9, "\xe2\x93\x93" }, { 0x24ba, "\xe2\x93\x94" }, { 0x24bb, "\xe2\x93\x95" }, { 0x24bc, "\xe2\x93\x96" }, { 0x24bd, "\xe2\x93\x97" }, { 0x24be, "\xe2\x93\x98" }, { 0x24bf, "\xe2\x93\x99" }, { 0x24c0, "\xe2\x93\x9a" }, { 0x24c1, "\xe2\x93\x9b" }, { 0x24c2, "\xe2\x93\x9c" }, { 0x24c3, "\xe2\x93\x9d" }, { 0x24c4, "\xe2\x93\x9e" }, { 0x24c5, "\xe2\x93\x9f" }, { 0x24c6, "\xe2\x93\xa0" }, { 0x24c7, "\xe2\x93\xa1" }, { 0x24c8, "\xe2\x93\xa2" }, { 0x24c9, "\xe2\x93\xa3" }, { 0x24ca, "\xe2\x93\xa4" }, { 0x24cb, "\xe2\x93\xa5" }, { 0x24cc, "\xe2\x93\xa6" }, { 0x24cd, "\xe2\x93\xa7" }, { 0x24ce, "\xe2\x93\xa8" }, { 0x24cf, "\xe2\x93\xa9" }, { 0xfb00, "\x66\x66" }, { 0xfb01, "\x66\x69" }, { 0xfb02, "\x66\x6c" }, { 0xfb03, "\x66\x66\x69" }, { 0xfb04, "\x66\x66\x6c" }, { 0xfb05, "\x73\x74" }, { 0xfb06, "\x73\x74" }, { 0xfb13, "\xd5\xb4\xd5\xb6" }, { 0xfb14, "\xd5\xb4\xd5\xa5" }, { 0xfb15, "\xd5\xb4\xd5\xab" }, { 0xfb16, "\xd5\xbe\xd5\xb6" }, { 0xfb17, "\xd5\xb4\xd5\xad" }, }; #endif /* CHARTABLES_H */ LucenePlusPlus-rel_3.0.4/src/core/util/unicode/guniprop.cpp000066400000000000000000000374331217574114600240160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// /* Unicode character properties. * * Copyright (C) 1999 Tom Tromey * Copyright (C) 2000 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "LuceneInc.h" #include "guniprop.h" #define ATTR_TABLE(Page) (((Page) <= G_UNICODE_LAST_PAGE_PART1) \ ? attr_table_part1[Page] \ : attr_table_part2[(Page) - 0xe00]) #define ATTTABLE(Page, Char) \ ((ATTR_TABLE(Page) == G_UNICODE_MAX_TABLE_INDEX) ? 0 : (attr_data[ATTR_TABLE(Page)][Char])) #define TTYPE_PART1(Page, Char) \ ((type_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ ? (type_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \ : (type_data[type_table_part1[Page]][Char])) #define TTYPE_PART2(Page, Char) \ ((type_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ ? (type_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \ : (type_data[type_table_part2[Page]][Char])) #define TYPE(Char) \ (((Char) <= G_UNICODE_LAST_CHAR_PART1) \ ? TTYPE_PART1 ((Char) >> 8, (Char) & 0xff) \ : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \ ? TTYPE_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \ : G_UNICODE_UNASSIGNED)) #define IS(Type, Class) (((guint)1 << (Type)) & (Class)) #define OR(Type, Rest) (((guint)1 << (Type)) | (Rest)) /* Count the number of elements in an array. The array must be defined * as such; using this with a dynamically allocated array will give * incorrect results. */ #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) #define ISALPHA(Type) IS ((Type), \ OR (G_UNICODE_LOWERCASE_LETTER, \ OR (G_UNICODE_UPPERCASE_LETTER, \ OR (G_UNICODE_TITLECASE_LETTER, \ OR (G_UNICODE_MODIFIER_LETTER, \ OR (G_UNICODE_OTHER_LETTER, 0)))))) #define ISALDIGIT(Type) IS ((Type), \ OR (G_UNICODE_DECIMAL_NUMBER, \ OR (G_UNICODE_LETTER_NUMBER, \ OR (G_UNICODE_OTHER_NUMBER, \ OR (G_UNICODE_LOWERCASE_LETTER, \ OR (G_UNICODE_UPPERCASE_LETTER, \ OR (G_UNICODE_TITLECASE_LETTER, \ OR (G_UNICODE_MODIFIER_LETTER, \ OR (G_UNICODE_OTHER_LETTER, 0))))))))) #define ISMARK(Type) IS ((Type), \ OR (G_UNICODE_NON_SPACING_MARK, \ OR (G_UNICODE_COMBINING_MARK, \ OR (G_UNICODE_ENCLOSING_MARK, 0)))) #define ISZEROWIDTHTYPE(Type) IS ((Type), \ OR (G_UNICODE_NON_SPACING_MARK, \ OR (G_UNICODE_ENCLOSING_MARK, \ OR (G_UNICODE_FORMAT, 0)))) #define UTF8_COMPUTE(Char, Mask, Len) \ if (Char < 128) \ { \ Len = 1; \ Mask = 0x7f; \ } \ else if ((Char & 0xe0) == 0xc0) \ { \ Len = 2; \ Mask = 0x1f; \ } \ else if ((Char & 0xf0) == 0xe0) \ { \ Len = 3; \ Mask = 0x0f; \ } \ else if ((Char & 0xf8) == 0xf0) \ { \ Len = 4; \ Mask = 0x07; \ } \ else if ((Char & 0xfc) == 0xf8) \ { \ Len = 5; \ Mask = 0x03; \ } \ else if ((Char & 0xfe) == 0xfc) \ { \ Len = 6; \ Mask = 0x01; \ } \ else \ Len = -1; #define UTF8_GET(Result, Chars, Count, Mask, Len) \ (Result) = (Chars)[0] & (Mask); \ for ((Count) = 1; (Count) < (Len); ++(Count)) \ { \ if (((Chars)[(Count)] & 0xc0) != 0x80) \ { \ (Result) = -1; \ break; \ } \ (Result) <<= 6; \ (Result) |= ((Chars)[(Count)] & 0x3f); \ } /** * g_utf8_get_char: * @p: a pointer to Unicode character encoded as UTF-8 * * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * If @p does not point to a valid UTF-8 encoded character, results are * undefined. If you are not sure that the bytes are complete * valid Unicode characters, you should use g_utf8_get_char_validated() * instead. * * Return value: the resulting character **/ gunichar g_utf8_get_char (const gchar *p) { int i, mask = 0, len; gunichar result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) return (gunichar)-1; UTF8_GET (result, p, i, mask, len); return result; } /** * g_unichar_isalnum: * @c: a Unicode character * * Determines whether a character is alphanumeric. * Given some UTF-8 text, obtain a character value * with g_utf8_get_char(). * * Return value: %TRUE if @c is an alphanumeric character **/ gboolean g_unichar_isalnum (gunichar c) { return ISALDIGIT (TYPE (c)) ? true : false; } /** * g_unichar_isalpha: * @c: a Unicode character * * Determines whether a character is alphabetic (i.e. a letter). * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is an alphabetic character **/ gboolean g_unichar_isalpha (gunichar c) { return ISALPHA (TYPE (c)) ? true : false; } /** * g_unichar_iscntrl: * @c: a Unicode character * * Determines whether a character is a control character. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is a control character **/ gboolean g_unichar_iscntrl (gunichar c) { return TYPE (c) == G_UNICODE_CONTROL; } /** * g_unichar_isdigit: * @c: a Unicode character * * Determines whether a character is numeric (i.e. a digit). This * covers ASCII 0-9 and also digits in other languages/scripts. Given * some UTF-8 text, obtain a character value with g_utf8_get_char(). * * Return value: %TRUE if @c is a digit **/ gboolean g_unichar_isdigit (gunichar c) { return TYPE (c) == G_UNICODE_DECIMAL_NUMBER; } /** * g_unichar_isgraph: * @c: a Unicode character * * Determines whether a character is printable and not a space * (returns %FALSE for control characters, format characters, and * spaces). g_unichar_isprint() is similar, but returns %TRUE for * spaces. Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is printable unless it's a space **/ gboolean g_unichar_isgraph (gunichar c) { return !IS (TYPE(c), OR (G_UNICODE_CONTROL, OR (G_UNICODE_FORMAT, OR (G_UNICODE_UNASSIGNED, OR (G_UNICODE_SURROGATE, OR (G_UNICODE_SPACE_SEPARATOR, 0)))))); } /** * g_unichar_islower: * @c: a Unicode character * * Determines whether a character is a lowercase letter. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is a lowercase letter **/ gboolean g_unichar_islower (gunichar c) { return TYPE (c) == G_UNICODE_LOWERCASE_LETTER; } /** * g_unichar_isprint: * @c: a Unicode character * * Determines whether a character is printable. * Unlike g_unichar_isgraph(), returns %TRUE for spaces. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is printable **/ gboolean g_unichar_isprint (gunichar c) { return !IS (TYPE(c), OR (G_UNICODE_CONTROL, OR (G_UNICODE_FORMAT, OR (G_UNICODE_UNASSIGNED, OR (G_UNICODE_SURROGATE, 0))))); } /** * g_unichar_ispunct: * @c: a Unicode character * * Determines whether a character is punctuation or a symbol. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is a punctuation or symbol character **/ gboolean g_unichar_ispunct (gunichar c) { return IS (TYPE(c), OR (G_UNICODE_CONNECT_PUNCTUATION, OR (G_UNICODE_DASH_PUNCTUATION, OR (G_UNICODE_CLOSE_PUNCTUATION, OR (G_UNICODE_FINAL_PUNCTUATION, OR (G_UNICODE_INITIAL_PUNCTUATION, OR (G_UNICODE_OTHER_PUNCTUATION, OR (G_UNICODE_OPEN_PUNCTUATION, OR (G_UNICODE_CURRENCY_SYMBOL, OR (G_UNICODE_MODIFIER_SYMBOL, OR (G_UNICODE_MATH_SYMBOL, OR (G_UNICODE_OTHER_SYMBOL, 0)))))))))))) ? true : false; } /** * g_unichar_isspace: * @c: a Unicode character * * Determines whether a character is a space, tab, or line separator * (newline, carriage return, etc.). Given some UTF-8 text, obtain a * character value with g_utf8_get_char(). * * (Note: don't use this to do word breaking; you have to use * Pango or equivalent to get word breaking right, the algorithm * is fairly complex.) * * Return value: %TRUE if @c is a space character **/ gboolean g_unichar_isspace (gunichar c) { switch (c) { /* special-case these since Unicode thinks they are not spaces */ case '\t': case '\n': case '\r': case '\f': return true; break; default: { return IS (TYPE(c), OR (G_UNICODE_SPACE_SEPARATOR, OR (G_UNICODE_LINE_SEPARATOR, OR (G_UNICODE_PARAGRAPH_SEPARATOR, 0)))) ? true : false; } break; } } /** * g_unichar_ismark: * @c: a Unicode character * * Determines whether a character is a mark (non-spacing mark, * combining mark, or enclosing mark in Unicode speak). * Given some UTF-8 text, obtain a character value * with g_utf8_get_char(). * * Note: in most cases where isalpha characters are allowed, * ismark characters should be allowed to as they are essential * for writing most European languages as well as many non-Latin * scripts. * * Return value: %TRUE if @c is a mark character * * Since: 2.14 **/ gboolean g_unichar_ismark (gunichar c) { return ISMARK (TYPE (c)) ? true : false; } /** * g_unichar_isupper: * @c: a Unicode character * * Determines if a character is uppercase. * * Return value: %TRUE if @c is an uppercase character **/ gboolean g_unichar_isupper (gunichar c) { return TYPE (c) == G_UNICODE_UPPERCASE_LETTER; } /** * g_unichar_istitle: * @c: a Unicode character * * Determines if a character is titlecase. Some characters in * Unicode which are composites, such as the DZ digraph * have three case variants instead of just two. The titlecase * form is used at the beginning of a word where only the * first letter is capitalized. The titlecase form of the DZ * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z. * * Return value: %TRUE if the character is titlecase **/ gboolean g_unichar_istitle (gunichar c) { unsigned int i; for (i = 0; i < G_N_ELEMENTS (title_table); ++i) if (title_table[i][0] == c) return true; return false; } /** * g_unichar_isxdigit: * @c: a Unicode character. * * Determines if a character is a hexidecimal digit. * * Return value: %TRUE if the character is a hexadecimal digit **/ gboolean g_unichar_isxdigit (gunichar c) { return ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (TYPE (c) == G_UNICODE_DECIMAL_NUMBER)); } /** * g_unichar_isdefined: * @c: a Unicode character * * Determines if a given character is assigned in the Unicode * standard. * * Return value: %TRUE if the character has an assigned value **/ gboolean g_unichar_isdefined (gunichar c) { return !IS (TYPE(c), OR (G_UNICODE_UNASSIGNED, OR (G_UNICODE_SURROGATE, 0))); } /** * g_unichar_toupper: * @c: a Unicode character * * Converts a character to uppercase. * * Return value: the result of converting @c to uppercase. * If @c is not an lowercase or titlecase character, * or has no upper case equivalent @c is returned unchanged. **/ gunichar g_unichar_toupper (gunichar c) { int t = TYPE (c); if (t == G_UNICODE_LOWERCASE_LETTER) { gunichar val = ATTTABLE (c >> 8, c & 0xff); if (val >= 0x1000000) { const gchar *p = special_case_table + val - 0x1000000; val = g_utf8_get_char (p); } /* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR, * do not have an uppercase equivalent, in which case val will be * zero. */ return val ? val : c; } else if (t == G_UNICODE_TITLECASE_LETTER) { unsigned int i; for (i = 0; i < G_N_ELEMENTS (title_table); ++i) { if (title_table[i][0] == c) return title_table[i][1]; } } return c; } /** * g_unichar_tolower: * @c: a Unicode character. * * Converts a character to lower case. * * Return value: the result of converting @c to lower case. * If @c is not an upperlower or titlecase character, * or has no lowercase equivalent @c is returned unchanged. **/ gunichar g_unichar_tolower (gunichar c) { int t = TYPE (c); if (t == G_UNICODE_UPPERCASE_LETTER) { gunichar val = ATTTABLE (c >> 8, c & 0xff); if (val >= 0x1000000) { const gchar *p = special_case_table + val - 0x1000000; return g_utf8_get_char (p); } else { /* Not all uppercase letters are guaranteed to have a lowercase * equivalent. If this is the case, val will be zero. */ return val ? val : c; } } else if (t == G_UNICODE_TITLECASE_LETTER) { unsigned int i; for (i = 0; i < G_N_ELEMENTS (title_table); ++i) { if (title_table[i][0] == c) return title_table[i][2]; } } return c; } /** * g_unichar_totitle: * @c: a Unicode character * * Converts a character to the titlecase. * * Return value: the result of converting @c to titlecase. * If @c is not an uppercase or lowercase character, * @c is returned unchanged. **/ gunichar g_unichar_totitle (gunichar c) { unsigned int i; for (i = 0; i < G_N_ELEMENTS (title_table); ++i) { if (title_table[i][0] == c || title_table[i][1] == c || title_table[i][2] == c) return title_table[i][0]; } if (TYPE (c) == G_UNICODE_LOWERCASE_LETTER) return g_unichar_toupper (c); return c; } /** * g_unichar_digit_value: * @c: a Unicode character * * Determines the numeric value of a character as a decimal * digit. * * Return value: If @c is a decimal digit (according to * g_unichar_isdigit()), its numeric value. Otherwise, -1. **/ int g_unichar_digit_value (gunichar c) { if (TYPE (c) == G_UNICODE_DECIMAL_NUMBER) return ATTTABLE (c >> 8, c & 0xff); return -1; } /** * g_unichar_xdigit_value: * @c: a Unicode character * * Determines the numeric value of a character as a hexadecimal * digit. * * Return value: If @c is a hex digit (according to * g_unichar_isxdigit()), its numeric value. Otherwise, -1. **/ int g_unichar_xdigit_value (gunichar c) { if (c >= 'A' && c <= 'F') return c - 'A' + 10; if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (TYPE (c) == G_UNICODE_DECIMAL_NUMBER) return ATTTABLE (c >> 8, c & 0xff); return -1; } /** * g_unichar_type: * @c: a Unicode character * * Classifies a Unicode character by type. * * Return value: the type of the character. **/ GUnicodeType g_unichar_type (gunichar c) { return (GUnicodeType)TYPE (c); } LucenePlusPlus-rel_3.0.4/src/core/util/unicode/guniprop.h000066400000000000000000000212571217574114600234600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// /* Unicode character properties. * * Copyright (C) 1999 Tom Tromey * Copyright (C) 2000 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "Lucene.h" typedef uint32_t gunichar; typedef uint16_t gunichar2; typedef uint32_t guint; typedef uint8_t guchar; typedef int32_t gint; typedef char gchar; typedef bool gboolean; typedef size_t gsize; typedef size_t gssize; /* These are the possible character classifications. * See http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values */ typedef enum { G_UNICODE_CONTROL, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_PRIVATE_USE, G_UNICODE_SURROGATE, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LINE_SEPARATOR, G_UNICODE_PARAGRAPH_SEPARATOR, G_UNICODE_SPACE_SEPARATOR } GUnicodeType; /* These are the possible line break classifications. * Note that new types may be added in the future. * Implementations may regard unknown values like G_UNICODE_BREAK_UNKNOWN * See http://www.unicode.org/unicode/reports/tr14/ */ typedef enum { G_UNICODE_BREAK_MANDATORY, G_UNICODE_BREAK_CARRIAGE_RETURN, G_UNICODE_BREAK_LINE_FEED, G_UNICODE_BREAK_COMBINING_MARK, G_UNICODE_BREAK_SURROGATE, G_UNICODE_BREAK_ZERO_WIDTH_SPACE, G_UNICODE_BREAK_INSEPARABLE, G_UNICODE_BREAK_NON_BREAKING_GLUE, G_UNICODE_BREAK_CONTINGENT, G_UNICODE_BREAK_SPACE, G_UNICODE_BREAK_AFTER, G_UNICODE_BREAK_BEFORE, G_UNICODE_BREAK_BEFORE_AND_AFTER, G_UNICODE_BREAK_HYPHEN, G_UNICODE_BREAK_NON_STARTER, G_UNICODE_BREAK_OPEN_PUNCTUATION, G_UNICODE_BREAK_CLOSE_PUNCTUATION, G_UNICODE_BREAK_QUOTATION, G_UNICODE_BREAK_EXCLAMATION, G_UNICODE_BREAK_IDEOGRAPHIC, G_UNICODE_BREAK_NUMERIC, G_UNICODE_BREAK_INFIX_SEPARATOR, G_UNICODE_BREAK_SYMBOL, G_UNICODE_BREAK_ALPHABETIC, G_UNICODE_BREAK_PREFIX, G_UNICODE_BREAK_POSTFIX, G_UNICODE_BREAK_COMPLEX_CONTEXT, G_UNICODE_BREAK_AMBIGUOUS, G_UNICODE_BREAK_UNKNOWN, G_UNICODE_BREAK_NEXT_LINE, G_UNICODE_BREAK_WORD_JOINER, G_UNICODE_BREAK_HANGUL_L_JAMO, G_UNICODE_BREAK_HANGUL_V_JAMO, G_UNICODE_BREAK_HANGUL_T_JAMO, G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE } GUnicodeBreakType; typedef enum { /* ISO 15924 code */ G_UNICODE_SCRIPT_INVALID_CODE = -1, G_UNICODE_SCRIPT_COMMON = 0, /* Zyyy */ G_UNICODE_SCRIPT_INHERITED, /* Qaai */ G_UNICODE_SCRIPT_ARABIC, /* Arab */ G_UNICODE_SCRIPT_ARMENIAN, /* Armn */ G_UNICODE_SCRIPT_BENGALI, /* Beng */ G_UNICODE_SCRIPT_BOPOMOFO, /* Bopo */ G_UNICODE_SCRIPT_CHEROKEE, /* Cher */ G_UNICODE_SCRIPT_COPTIC, /* Qaac */ G_UNICODE_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */ G_UNICODE_SCRIPT_DESERET, /* Dsrt */ G_UNICODE_SCRIPT_DEVANAGARI, /* Deva */ G_UNICODE_SCRIPT_ETHIOPIC, /* Ethi */ G_UNICODE_SCRIPT_GEORGIAN, /* Geor (Geon, Geoa) */ G_UNICODE_SCRIPT_GOTHIC, /* Goth */ G_UNICODE_SCRIPT_GREEK, /* Grek */ G_UNICODE_SCRIPT_GUJARATI, /* Gujr */ G_UNICODE_SCRIPT_GURMUKHI, /* Guru */ G_UNICODE_SCRIPT_HAN, /* Hani */ G_UNICODE_SCRIPT_HANGUL, /* Hang */ G_UNICODE_SCRIPT_HEBREW, /* Hebr */ G_UNICODE_SCRIPT_HIRAGANA, /* Hira */ G_UNICODE_SCRIPT_KANNADA, /* Knda */ G_UNICODE_SCRIPT_KATAKANA, /* Kana */ G_UNICODE_SCRIPT_KHMER, /* Khmr */ G_UNICODE_SCRIPT_LAO, /* Laoo */ G_UNICODE_SCRIPT_LATIN, /* Latn (Latf, Latg) */ G_UNICODE_SCRIPT_MALAYALAM, /* Mlym */ G_UNICODE_SCRIPT_MONGOLIAN, /* Mong */ G_UNICODE_SCRIPT_MYANMAR, /* Mymr */ G_UNICODE_SCRIPT_OGHAM, /* Ogam */ G_UNICODE_SCRIPT_OLD_ITALIC, /* Ital */ G_UNICODE_SCRIPT_ORIYA, /* Orya */ G_UNICODE_SCRIPT_RUNIC, /* Runr */ G_UNICODE_SCRIPT_SINHALA, /* Sinh */ G_UNICODE_SCRIPT_SYRIAC, /* Syrc (Syrj, Syrn, Syre) */ G_UNICODE_SCRIPT_TAMIL, /* Taml */ G_UNICODE_SCRIPT_TELUGU, /* Telu */ G_UNICODE_SCRIPT_THAANA, /* Thaa */ G_UNICODE_SCRIPT_THAI, /* Thai */ G_UNICODE_SCRIPT_TIBETAN, /* Tibt */ G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */ G_UNICODE_SCRIPT_YI, /* Yiii */ G_UNICODE_SCRIPT_TAGALOG, /* Tglg */ G_UNICODE_SCRIPT_HANUNOO, /* Hano */ G_UNICODE_SCRIPT_BUHID, /* Buhd */ G_UNICODE_SCRIPT_TAGBANWA, /* Tagb */ /* Unicode-4.0 additions */ G_UNICODE_SCRIPT_BRAILLE, /* Brai */ G_UNICODE_SCRIPT_CYPRIOT, /* Cprt */ G_UNICODE_SCRIPT_LIMBU, /* Limb */ G_UNICODE_SCRIPT_OSMANYA, /* Osma */ G_UNICODE_SCRIPT_SHAVIAN, /* Shaw */ G_UNICODE_SCRIPT_LINEAR_B, /* Linb */ G_UNICODE_SCRIPT_TAI_LE, /* Tale */ G_UNICODE_SCRIPT_UGARITIC, /* Ugar */ /* Unicode-4.1 additions */ G_UNICODE_SCRIPT_NEW_TAI_LUE, /* Talu */ G_UNICODE_SCRIPT_BUGINESE, /* Bugi */ G_UNICODE_SCRIPT_GLAGOLITIC, /* Glag */ G_UNICODE_SCRIPT_TIFINAGH, /* Tfng */ G_UNICODE_SCRIPT_SYLOTI_NAGRI, /* Sylo */ G_UNICODE_SCRIPT_OLD_PERSIAN, /* Xpeo */ G_UNICODE_SCRIPT_KHAROSHTHI, /* Khar */ /* Unicode-5.0 additions */ G_UNICODE_SCRIPT_UNKNOWN, /* Zzzz */ G_UNICODE_SCRIPT_BALINESE, /* Bali */ G_UNICODE_SCRIPT_CUNEIFORM, /* Xsux */ G_UNICODE_SCRIPT_PHOENICIAN, /* Phnx */ G_UNICODE_SCRIPT_PHAGS_PA, /* Phag */ G_UNICODE_SCRIPT_NKO, /* Nkoo */ /* Unicode-5.1 additions */ G_UNICODE_SCRIPT_KAYAH_LI, /* Kali */ G_UNICODE_SCRIPT_LEPCHA, /* Lepc */ G_UNICODE_SCRIPT_REJANG, /* Rjng */ G_UNICODE_SCRIPT_SUNDANESE, /* Sund */ G_UNICODE_SCRIPT_SAURASHTRA, /* Saur */ G_UNICODE_SCRIPT_CHAM, /* Cham */ G_UNICODE_SCRIPT_OL_CHIKI, /* Olck */ G_UNICODE_SCRIPT_VAI, /* Vaii */ G_UNICODE_SCRIPT_CARIAN, /* Cari */ G_UNICODE_SCRIPT_LYCIAN, /* Lyci */ G_UNICODE_SCRIPT_LYDIAN /* Lydi */ } GUnicodeScript; #include "gunichartables.h" gboolean g_unichar_isalnum (gunichar c); gboolean g_unichar_isalpha (gunichar c); gboolean g_unichar_iscntrl (gunichar c); gboolean g_unichar_isdigit (gunichar c); gboolean g_unichar_isgraph (gunichar c); gboolean g_unichar_islower (gunichar c); gboolean g_unichar_isprint (gunichar c); gboolean g_unichar_ispunct (gunichar c); gboolean g_unichar_isspace (gunichar c); gboolean g_unichar_ismark (gunichar c); gboolean g_unichar_isupper (gunichar c); gboolean g_unichar_istitle (gunichar c); gboolean g_unichar_isxdigit (gunichar c); gboolean g_unichar_isdefined (gunichar c); gunichar g_unichar_toupper (gunichar c); gunichar g_unichar_tolower (gunichar c); gunichar g_unichar_totitle (gunichar c); int g_unichar_digit_value (gunichar c); int g_unichar_xdigit_value (gunichar c); GUnicodeType g_unichar_type (gunichar c); LucenePlusPlus-rel_3.0.4/src/demo/000077500000000000000000000000001217574114600170265ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/demo/CMakeLists.txt000066400000000000000000000015361217574114600215730ustar00rootroot00000000000000project(lucene++-demo) #################################### # THE lucene++demo applications #################################### file(GLOB_RECURSE HEADERS ${lucene++-demo_SOURCE_DIR}/../include/*.h) ADD_DEFINITIONS(-DLPP_HAVE_DLL) INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) INCLUDE_DIRECTORIES(${lucene++-base_SOURCE_DIR}/include) ADD_EXECUTABLE(indexfiles EXCLUDE_FROM_ALL ${lucene++-demo_SOURCE_DIR}/indexfiles/main.cpp ${HEADERS} ) TARGET_LINK_LIBRARIES(indexfiles lucene++) ADD_EXECUTABLE(searchfiles EXCLUDE_FROM_ALL ${lucene++-demo_SOURCE_DIR}/searchfiles/main.cpp ${HEADERS} ) TARGET_LINK_LIBRARIES(searchfiles lucene++) ADD_EXECUTABLE(deletefiles EXCLUDE_FROM_ALL ${lucene++-demo_SOURCE_DIR}/deletefiles/main.cpp ${HEADERS} ) TARGET_LINK_LIBRARIES(deletefiles lucene++) ADD_CUSTOM_TARGET( demo DEPENDS indexfiles searchfiles deletefiles ) LucenePlusPlus-rel_3.0.4/src/demo/deletefiles/000077500000000000000000000000001217574114600213135ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/demo/deletefiles/main.cpp000066400000000000000000000025301217574114600227430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #define NOMINMAX #include "targetver.h" #include #include "LuceneHeaders.h" using namespace Lucene; /// Deletes documents from an index that do not contain a term. int main(int argc, char* argv[]) { if (argc == 1) { std::wcout << L"Usage: deletefiles.exe \n"; return 1; } try { DirectoryPtr directory = FSDirectory::open(StringUtils::toUnicode(argv[1])); // we don't want read-only because we are about to delete IndexReaderPtr reader = IndexReader::open(directory, false); TermPtr term = newLucene(L"path", StringUtils::toUnicode(argv[2])); int32_t deleted = reader->deleteDocuments(term); std::wcout << L"Deleted " << deleted << L" documents containing " << term->toString() << L"\n"; reader->close(); directory->close(); } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } return 0; } LucenePlusPlus-rel_3.0.4/src/demo/deletefiles/msvc/000077500000000000000000000000001217574114600222635ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/demo/deletefiles/msvc/deletefiles.vcproj000066400000000000000000000174031217574114600260020ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/demo/indexfiles/000077500000000000000000000000001217574114600211605ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/demo/indexfiles/main.cpp000066400000000000000000000101731217574114600226120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #define NOMINMAX #include "targetver.h" #include #include "LuceneHeaders.h" #include "FileUtils.h" #include "MiscUtils.h" using namespace Lucene; int32_t docNumber = 0; DocumentPtr fileDocument(const String& docFile) { DocumentPtr doc = newLucene(); // Add the path of the file as a field named "path". Use a field that is indexed (ie. searchable), but // don't tokenize the field into words. doc->add(newLucene(L"path", docFile, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // Add the last modified date of the file a field named "modified". Use a field that is indexed (ie. searchable), // but don't tokenize the field into words. doc->add(newLucene(L"modified", DateTools::timeToString(FileUtils::fileModified(docFile), DateTools::RESOLUTION_MINUTE), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // Add the contents of the file to a field named "contents". Specify a Reader, so that the text of the file is // tokenized and indexed, but not stored. Note that FileReader expects the file to be in the system's default // encoding. If that's not the case searching for special characters will fail. doc->add(newLucene(L"contents", newLucene(docFile))); return doc; } void indexDocs(IndexWriterPtr writer, const String& sourceDir) { HashSet dirList(HashSet::newInstance()); if (!FileUtils::listDirectory(sourceDir, false, dirList)) return; for (HashSet::iterator dirFile = dirList.begin(); dirFile != dirList.end(); ++dirFile) { String docFile(FileUtils::joinPath(sourceDir, *dirFile)); if (FileUtils::isDirectory(docFile)) indexDocs(writer, docFile); else { std::wcout << L"Adding [" << ++docNumber << L"]: " << *dirFile << L"\n"; try { writer->addDocument(fileDocument(docFile)); } catch (FileNotFoundException&) { } } } } /// Index all text files under a directory. int main(int argc, char* argv[]) { if (argc != 3) { std::wcout << L"Usage: indexfiles.exe \n"; return 1; } String sourceDir(StringUtils::toUnicode(argv[1])); String indexDir(StringUtils::toUnicode(argv[2])); if (!FileUtils::isDirectory(sourceDir)) { std::wcout << L"Source directory doesn't exist: " << sourceDir << L"\n"; return 1; } if (!FileUtils::isDirectory(indexDir)) { if (!FileUtils::createDirectory(indexDir)) { std::wcout << L"Unable to create directory: " << indexDir << L"\n"; return 1; } } uint64_t beginIndex = MiscUtils::currentTimeMillis(); try { IndexWriterPtr writer = newLucene(FSDirectory::open(indexDir), newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); std::wcout << L"Indexing to directory: " << indexDir << L"...\n"; indexDocs(writer, sourceDir); uint64_t endIndex = MiscUtils::currentTimeMillis(); uint64_t indexDuration = endIndex - beginIndex; std::wcout << L"Index time: " << indexDuration << L" milliseconds\n"; std::wcout << L"Optimizing...\n"; writer->optimize(); uint64_t optimizeDuration = MiscUtils::currentTimeMillis() - endIndex; std::wcout << L"Optimize time: " << optimizeDuration << L" milliseconds\n"; writer->close(); std::wcout << L"Total time: " << indexDuration + optimizeDuration << L" milliseconds\n"; } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } return 0; } LucenePlusPlus-rel_3.0.4/src/demo/indexfiles/msvc/000077500000000000000000000000001217574114600221305ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/demo/indexfiles/msvc/indexfiles.vcproj000066400000000000000000000174011217574114600255120ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/demo/searchfiles/000077500000000000000000000000001217574114600213165ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/demo/searchfiles/main.cpp000066400000000000000000000252401217574114600227510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #define NOMINMAX #include "targetver.h" #include #include #include "LuceneHeaders.h" #include "FilterIndexReader.h" #include "MiscUtils.h" using namespace Lucene; /// Use the norms from one field for all fields. Norms are read into memory, using a byte of memory /// per document per searched field. This can cause search of large collections with a large number /// of fields to run out of memory. If all of the fields contain only a single token, then the norms /// are all identical, then single norm vector may be shared. class OneNormsReader : public FilterIndexReader { public: OneNormsReader(IndexReaderPtr in, const String& field) : FilterIndexReader(in) { this->field = field; } virtual ~OneNormsReader() { } protected: String field; public: virtual ByteArray norms(const String& field) { return in->norms(this->field); } }; /// This demonstrates a typical paging search scenario, where the search engine presents pages of size n /// to the user. The user can then go to the next page if interested in the next hits. /// /// When the query is executed for the first time, then only enough results are collected to fill 5 result /// pages. If the user wants to page beyond this limit, then the query is executed another time and all /// hits are collected. static void doPagingSearch(SearcherPtr searcher, QueryPtr query, int32_t hitsPerPage, bool raw, bool interactive) { // Collect enough docs to show 5 pages TopScoreDocCollectorPtr collector = TopScoreDocCollector::create(5 * hitsPerPage, false); searcher->search(query, collector); Collection hits = collector->topDocs()->scoreDocs; int32_t numTotalHits = collector->getTotalHits(); std::wcout << numTotalHits << L" total matching documents\n"; int32_t start = 0; int32_t end = std::min(numTotalHits, hitsPerPage); while (true) { if (end > hits.size()) { std::wcout << L"Only results 1 - " << hits.size() << L" of " << numTotalHits << L" total matching documents collected.\n"; std::wcout << L"Collect more (y/n) ?"; String line; std::wcin >> line; boost::trim(line); if (line.empty() || boost::starts_with(line, L"n")) break; collector = TopScoreDocCollector::create(numTotalHits, false); searcher->search(query, collector); hits = collector->topDocs()->scoreDocs; } end = std::min(hits.size(), start + hitsPerPage); for (int32_t i = start; i < end; ++i) { if (raw) // output raw format { std::wcout << L"doc=" << hits[i]->doc << L" score=" << hits[i]->score << L"\n"; continue; } DocumentPtr doc = searcher->doc(hits[i]->doc); String path = doc->get(L"path"); if (!path.empty()) { std::wcout << StringUtils::toString(i + 1) + L". " << path << L"\n"; String title = doc->get(L"title"); if (!title.empty()) std::wcout << L" Title: " << doc->get(L"title") << L"\n"; } else std::wcout << StringUtils::toString(i + 1) + L". No path for this document\n"; } if (!interactive) break; if (numTotalHits >= end) { bool quit = false; while (true) { std::wcout << L"Press "; if (start - hitsPerPage >= 0) std::wcout << L"(p)revious page, "; if (start + hitsPerPage < numTotalHits) std::wcout << L"(n)ext page, "; std::wcout << L"(q)uit or enter number to jump to a page: "; String line; std::wcin >> line; boost::trim(line); if (line.empty() || boost::starts_with(line, L"q")) { quit = true; break; } if (boost::starts_with(line, L"p")) { start = std::max((int32_t)0, start - hitsPerPage); break; } else if (boost::starts_with(line, L"n")) { if (start + hitsPerPage < numTotalHits) start += hitsPerPage; break; } else { int32_t page = 0; try { page = StringUtils::toInt(line); } catch (NumberFormatException&) { } if ((page - 1) * hitsPerPage < numTotalHits) { start = std::max((int32_t)0, (page - 1) * hitsPerPage); break; } else std::wcout << L"No such page\n"; } } if (quit) break; end = std::min(numTotalHits, start + hitsPerPage); } } } class StreamingHitCollector : public Collector { public: StreamingHitCollector() { docBase = 0; } virtual ~StreamingHitCollector() { } protected: ScorerPtr scorer; int32_t docBase; public: /// simply print docId and score of every matching document virtual void collect(int32_t doc) { std::wcout << L"doc=" << (doc + docBase) << L" score=" << scorer->score(); } virtual bool acceptsDocsOutOfOrder() { return true; } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { this->docBase = docBase; } virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } }; /// This method uses a custom HitCollector implementation which simply prints out the docId and score of /// every matching document. /// /// This simulates the streaming search use case, where all hits are supposed to be processed, regardless /// of their relevance. static void doStreamingSearch(SearcherPtr searcher, QueryPtr query) { searcher->search(query, newLucene()); } /// Simple command-line based search demo. int main(int argc, char* argv[]) { if (argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0) { std::wcout << L"Usage: searchfiles.exe [-index dir] [-field f] [-repeat n] [-queries file] [-raw] "; std::wcout << L"[-norms field] [-paging hitsPerPage]\n\n"; std::wcout << L"Specify 'false' for hitsPerPage to use streaming instead of paging search.\n"; return 1; } try { String index = L"index"; String field = L"contents"; String queries; int32_t repeat = 0; bool raw = false; String normsField; bool paging = true; int32_t hitsPerPage = 10; for (int32_t i = 0; i < argc; ++i) { if (strcmp(argv[i], "-index") == 0) { index = StringUtils::toUnicode(argv[i + 1]); ++i; } else if (strcmp(argv[i], "-field") == 0) { field = StringUtils::toUnicode(argv[i + 1]); ++i; } else if (strcmp(argv[i], "-queries") == 0) { queries = StringUtils::toUnicode(argv[i + 1]); ++i; } else if (strcmp(argv[i], "-repeat") == 0) { repeat = StringUtils::toInt(StringUtils::toUnicode(argv[i + 1])); ++i; } else if (strcmp(argv[i], "-raw") == 0) raw = true; else if (strcmp(argv[i], "-norms") == 0) { normsField = StringUtils::toUnicode(argv[i + 1]); ++i; } else if (strcmp(argv[i], "-paging") == 0) { if (strcmp(argv[i + 1], "false") == 0) paging = false; else { hitsPerPage = StringUtils::toInt(StringUtils::toUnicode(argv[i + 1])); if (hitsPerPage == 0) paging = false; } ++i; } } // only searching, so read-only=true IndexReaderPtr reader = IndexReader::open(FSDirectory::open(index), true); if (!normsField.empty()) reader = newLucene(reader, normsField); SearcherPtr searcher = newLucene(reader); AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, field, analyzer); ReaderPtr in; if (!queries.empty()) in = newLucene(queries); while (true) { String line; if (!queries.empty()) { wchar_t c = in->read(); while (c != L'\n' && c != L'\r' && c != Reader::READER_EOF) { line += c; c = in->read(); } } else { std::wcout << L"Enter query: "; std::wcin >> line; } boost::trim(line); if (line.empty()) break; QueryPtr query = parser->parse(line); std::wcout << L"Searching for: " << query->toString(field) << L"\n"; if (repeat > 0) // repeat and time as benchmark { int64_t start = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < repeat; ++i) searcher->search(query, FilterPtr(), 100); std::wcout << L"Time: " << (MiscUtils::currentTimeMillis() - start) << L"ms\n"; } if (paging) doPagingSearch(searcher, query, hitsPerPage, raw, queries.empty()); else doStreamingSearch(searcher, query); } reader->close(); } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } return 0; } LucenePlusPlus-rel_3.0.4/src/demo/searchfiles/msvc/000077500000000000000000000000001217574114600222665ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/demo/searchfiles/msvc/searchfiles.vcproj000066400000000000000000000174031217574114600260100ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/msvc/000077500000000000000000000000001217574114600170525ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/msvc/lucene++.sln000066400000000000000000000156771217574114600212110ustar00rootroot00000000000000 Microsoft Visual Studio Solution File, Format Version 10.00 # Visual Studio 2008 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene_tester", "..\test\msvc\lucene_tester.vcproj", "{6D684870-1124-49E1-8F96-7DE7B6114BEA}" ProjectSection(ProjectDependencies) = postProject {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} {46A95AFD-95FD-4280-B22E-1B56F273144B} = {46A95AFD-95FD-4280-B22E-1B56F273144B} EndProjectSection EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "demos", "demos", "{E9344A66-4CC8-4E5B-83BC-8061E8962B46}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "indexfiles", "..\demo\indexfiles\msvc\indexfiles.vcproj", "{688A6720-739F-4EA3-AC5B-AA67A0965103}" ProjectSection(ProjectDependencies) = postProject {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} {46A95AFD-95FD-4280-B22E-1B56F273144B} = {46A95AFD-95FD-4280-B22E-1B56F273144B} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "deletefiles", "..\demo\deletefiles\msvc\deletefiles.vcproj", "{688A6720-739F-4EA3-AC5B-AA67A0965105}" ProjectSection(ProjectDependencies) = postProject {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} {46A95AFD-95FD-4280-B22E-1B56F273144B} = {46A95AFD-95FD-4280-B22E-1B56F273144B} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "searchfiles", "..\demo\searchfiles\msvc\searchfiles.vcproj", "{688A6720-739F-4EA3-AC5B-AA67A0965104}" ProjectSection(ProjectDependencies) = postProject {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} {46A95AFD-95FD-4280-B22E-1B56F273144B} = {46A95AFD-95FD-4280-B22E-1B56F273144B} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene_contrib", "..\contrib\msvc\lucene_contrib.vcproj", "{46A95AFD-95FD-4280-B22E-1B56F273144B}" ProjectSection(ProjectDependencies) = postProject {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene++", "..\core\msvc\lucene++.vcproj", "{46A95AFD-95FD-4280-B22E-1B56F273144A}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug DLL|Win32 = Debug DLL|Win32 Debug Static|Win32 = Debug Static|Win32 Release DLL|Win32 = Release DLL|Win32 Release Static|Win32 = Release Static|Win32 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug Static|Win32.Build.0 = Debug Static|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release Static|Win32.ActiveCfg = Release Static|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release Static|Win32.Build.0 = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug Static|Win32.Build.0 = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release DLL|Win32.Build.0 = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release Static|Win32.ActiveCfg = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release Static|Win32.Build.0 = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug Static|Win32.Build.0 = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release DLL|Win32.Build.0 = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release Static|Win32.ActiveCfg = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release Static|Win32.Build.0 = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug Static|Win32.Build.0 = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release DLL|Win32.Build.0 = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release Static|Win32.ActiveCfg = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release Static|Win32.Build.0 = Release Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug Static|Win32.Build.0 = Debug Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release DLL|Win32.Build.0 = Release DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release Static|Win32.ActiveCfg = Release Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release Static|Win32.Build.0 = Release Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug Static|Win32.Build.0 = Debug Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release DLL|Win32.Build.0 = Release DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release Static|Win32.ActiveCfg = Release Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release Static|Win32.Build.0 = Release Static|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution {688A6720-739F-4EA3-AC5B-AA67A0965103} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} {688A6720-739F-4EA3-AC5B-AA67A0965105} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} {688A6720-739F-4EA3-AC5B-AA67A0965104} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} EndGlobalSection EndGlobal LucenePlusPlus-rel_3.0.4/src/test/000077500000000000000000000000001217574114600170615ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/CMakeLists.txt000066400000000000000000000023461217574114600216260ustar00rootroot00000000000000project(lucene++-tester) #################################### # THE lucene++tester library #################################### file(GLOB_RECURSE lucene_sources ${lucene++-tester_SOURCE_DIR}/*.cpp) file(GLOB_RECURSE HEADERS ${lucene++-tester_SOURCE_DIR}/../include/*.h) file(GLOB_RECURSE HEADERS ${lucene++-tester_SOURCE_DIR}/include/*.h) LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) INCLUDE_DIRECTORIES(${lucene++-base_SOURCE_DIR}/include) INCLUDE_DIRECTORIES(${lucene++-lib_SOURCE_DIR}/include) INCLUDE_DIRECTORIES(${lucene++-contrib_SOURCE_DIR}/include) INCLUDE_DIRECTORIES(${lucene++-tester_SOURCE_DIR}/include) ADD_DEFINITIONS(-DLPP_EXPOSE_INTERNAL) ADD_EXECUTABLE(lucene++-tester EXCLUDE_FROM_ALL ${lucene_sources} ${HEADERS} ) #set properties on the libraries SET_TARGET_PROPERTIES(lucene++-tester PROPERTIES VERSION ${LUCENE++_VERSION} SOVERSION ${LUCENE++_SOVERSION} ) TARGET_LINK_LIBRARIES(lucene++-tester lucene++-static lucene++-c lucene++-contrib-static ${CMAKE_THREAD_LIBS_INIT} ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE} ${LUCENE_BOOST_LIBS} ) ADD_TEST(lucene++-tester ${EXECUTABLE_OUTPUT_PATH}/lucene++-tester -p --test_dir=${lucene++-tester_SOURCE_DIR}/testfiles) LucenePlusPlus-rel_3.0.4/src/test/analysis/000077500000000000000000000000001217574114600207045ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/analysis/AnalyzersTest.cpp000066400000000000000000000101601217574114600242160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "SimpleAnalyzer.h" #include "WhitespaceAnalyzer.h" #include "StopAnalyzer.h" #include "TokenFilter.h" #include "WhitespaceTokenizer.h" #include "StringReader.h" #include "PayloadAttribute.h" #include "Payload.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(AnalyzersTest, BaseTokenStreamFixture) static void verifyPayload(TokenStreamPtr ts) { PayloadAttributePtr payloadAtt = ts->getAttribute(); for (uint8_t b = 1; ; ++b) { bool hasNext = ts->incrementToken(); if (!hasNext) break; BOOST_CHECK_EQUAL(b, payloadAtt->getPayload()->toByteArray()[0]); } } BOOST_AUTO_TEST_CASE(testSimple) { AnalyzerPtr a = newLucene(); checkAnalyzesTo(a, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"foo bar . FOO <> BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"foo.bar.FOO.BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"U.S.A.", newCollection(L"u", L"s", L"a")); checkAnalyzesTo(a, L"C++", newCollection(L"c")); checkAnalyzesTo(a, L"B2B", newCollection(L"b", L"b")); checkAnalyzesTo(a, L"2B", newCollection(L"b")); checkAnalyzesTo(a, L"\"QUOTED\" word", newCollection(L"quoted", L"word")); } BOOST_AUTO_TEST_CASE(testNull) { AnalyzerPtr a = newLucene(); checkAnalyzesTo(a, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"FOO", L"BAR")); checkAnalyzesTo(a, L"foo bar . FOO <> BAR", newCollection(L"foo", L"bar", L".", L"FOO", L"<>", L"BAR")); checkAnalyzesTo(a, L"foo.bar.FOO.BAR", newCollection(L"foo.bar.FOO.BAR")); checkAnalyzesTo(a, L"U.S.A.", newCollection(L"U.S.A.")); checkAnalyzesTo(a, L"C++", newCollection(L"C++")); checkAnalyzesTo(a, L"B2B", newCollection(L"B2B")); checkAnalyzesTo(a, L"2B", newCollection(L"2B")); checkAnalyzesTo(a, L"\"QUOTED\" word", newCollection(L"\"QUOTED\"", L"word")); } BOOST_AUTO_TEST_CASE(testStop) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(a, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"foo a bar such FOO THESE BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); } namespace TestPayloadCopy { DECLARE_SHARED_PTR(PayloadSetter) class PayloadSetter : public TokenFilter { public: PayloadSetter(TokenStreamPtr input) : TokenFilter(input) { payloadAtt = addAttribute(); data = ByteArray::newInstance(1); data[0] = 0; p = newLucene(data, 0, 1); } virtual ~PayloadSetter() { } public: PayloadAttributePtr payloadAtt; ByteArray data; PayloadPtr p; public: virtual bool incrementToken() { bool hasNext = input->incrementToken(); if (!hasNext) return false; payloadAtt->setPayload(p); // reuse the payload / byte[] data[0]++; return true; } }; } /// Make sure old style next() calls result in a new copy of payloads BOOST_AUTO_TEST_CASE(testPayloadCopy) { String s = L"how now brown cow"; TokenStreamPtr ts = newLucene(newLucene(s)); ts = newLucene(ts); verifyPayload(ts); ts = newLucene(newLucene(s)); ts = newLucene(ts); verifyPayload(ts); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/BaseTokenStreamFixture.cpp000066400000000000000000000270361217574114600260160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "TokenStream.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "TypeAttribute.h" #include "PositionIncrementAttribute.h" #include "Analyzer.h" #include "StringReader.h" namespace Lucene { CheckClearAttributesAttribute::CheckClearAttributesAttribute() { clearCalled = false; } CheckClearAttributesAttribute::~CheckClearAttributesAttribute() { } bool CheckClearAttributesAttribute::getAndResetClearCalled() { bool _clearCalled = clearCalled; clearCalled = false; return _clearCalled; } void CheckClearAttributesAttribute::clear() { clearCalled = true; } bool CheckClearAttributesAttribute::equals(LuceneObjectPtr other) { if (Attribute::equals(other)) return true; CheckClearAttributesAttributePtr otherAttribute(boost::dynamic_pointer_cast(other)); if (otherAttribute) return (otherAttribute->clearCalled == clearCalled); return false; } int32_t CheckClearAttributesAttribute::hashCode() { return 76137213 ^ (clearCalled ? 1231 : 1237); } void CheckClearAttributesAttribute::copyTo(AttributePtr target) { CheckClearAttributesAttributePtr clearAttribute(boost::dynamic_pointer_cast(target)); clearAttribute->clear(); } LuceneObjectPtr CheckClearAttributesAttribute::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = other ? other : newLucene(); CheckClearAttributesAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->clearCalled = clearCalled; return cloneAttribute; } BaseTokenStreamFixture::~BaseTokenStreamFixture() { } void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, Collection types, Collection posIncrements, int32_t finalOffset) { BOOST_CHECK(output); CheckClearAttributesAttributePtr checkClearAtt = ts->addAttribute(); BOOST_CHECK(ts->hasAttribute()); TermAttributePtr termAtt = ts->getAttribute(); OffsetAttributePtr offsetAtt; if (startOffsets || endOffsets || finalOffset != -1) { BOOST_CHECK(ts->hasAttribute()); offsetAtt = ts->getAttribute(); } TypeAttributePtr typeAtt; if (types) { BOOST_CHECK(ts->hasAttribute()); typeAtt = ts->getAttribute(); } PositionIncrementAttributePtr posIncrAtt; if (posIncrements) { BOOST_CHECK(ts->hasAttribute()); posIncrAtt = ts->getAttribute(); } ts->reset(); for (int32_t i = 0; i < output.size(); ++i) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts->clearAttributes(); termAtt->setTermBuffer(L"bogusTerm"); if (offsetAtt) offsetAtt->setOffset(14584724, 24683243); if (typeAtt) typeAtt->setType(L"bogusType"); if (posIncrAtt) posIncrAtt->setPositionIncrement(45987657); checkClearAtt->getAndResetClearCalled(); // reset it, because we called clearAttribute() before BOOST_CHECK(ts->incrementToken()); BOOST_CHECK(checkClearAtt->getAndResetClearCalled()); BOOST_CHECK_EQUAL(output[i], termAtt->term()); if (startOffsets) BOOST_CHECK_EQUAL(startOffsets[i], offsetAtt->startOffset()); if (endOffsets) BOOST_CHECK_EQUAL(endOffsets[i], offsetAtt->endOffset()); if (types) BOOST_CHECK_EQUAL(types[i], typeAtt->type()); if (posIncrements) BOOST_CHECK_EQUAL(posIncrements[i], posIncrAtt->getPositionIncrement()); } BOOST_CHECK(!ts->incrementToken()); ts->end(); if (finalOffset != -1) BOOST_CHECK_EQUAL(finalOffset, offsetAtt->endOffset()); ts->close(); } void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output) { checkTokenStreamContents(ts, output, Collection(), Collection(), Collection(), Collection(), -1); } void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection types) { checkTokenStreamContents(ts, output, Collection(), Collection(), types, Collection(), -1); } void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection posIncrements) { checkTokenStreamContents(ts, output, Collection(), Collection(), Collection(), posIncrements, -1); } void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets) { checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), Collection(), -1); } void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, int32_t finalOffset) { checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), Collection(), finalOffset); } void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) { checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), posIncrements, -1); } void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements, int32_t finalOffset) { checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), posIncrements, finalOffset); } void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection types, Collection posIncrements) { checkTokenStreamContents(analyzer->tokenStream(L"dummy", newLucene(input)), output, startOffsets, endOffsets, types, posIncrements, (int32_t)input.length()); } void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output) { checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), Collection(), Collection()); } void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection types) { checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), types, Collection()); } void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection posIncrements) { checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), Collection(), posIncrements); } void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets) { checkAnalyzesTo(analyzer, input, output, startOffsets, endOffsets, Collection(), Collection()); } void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) { checkAnalyzesTo(analyzer, input, output, startOffsets, endOffsets, Collection(), posIncrements); } void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection types, Collection posIncrements) { checkTokenStreamContents(analyzer->reusableTokenStream(L"dummy", newLucene(input)), output, startOffsets, endOffsets, types, posIncrements, (int32_t)input.length()); } void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output) { checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), Collection(), Collection()); } void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection types) { checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), types, Collection()); } void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection posIncrements) { checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), Collection(), posIncrements); } void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets) { checkAnalyzesToReuse(analyzer, input, output, startOffsets, endOffsets, Collection(), Collection()); } void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) { checkAnalyzesToReuse(analyzer, input, output, startOffsets, endOffsets, Collection(), posIncrements); } void BaseTokenStreamFixture::checkOneTerm(AnalyzerPtr analyzer, const String& input, const String& expected) { checkAnalyzesTo(analyzer, input, newCollection(expected)); } void BaseTokenStreamFixture::checkOneTermReuse(AnalyzerPtr analyzer, const String& input, const String& expected) { checkAnalyzesToReuse(analyzer, input, newCollection(expected)); } } LucenePlusPlus-rel_3.0.4/src/test/analysis/CachingTokenFilterTest.cpp000066400000000000000000000074131217574114600257600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "TokenStream.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "CachingTokenFilter.h" #include "IndexReader.h" #include "TermPositions.h" #include "Term.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(CachingTokenFilterTest, BaseTokenStreamFixture) static Collection tokens = newCollection(L"term1", L"term2", L"term3", L"term2"); static void checkTokens(TokenStreamPtr stream) { int32_t count = 0; TermAttributePtr termAtt = stream->getAttribute(); BOOST_CHECK(termAtt); while (stream->incrementToken()) { BOOST_CHECK(count < tokens.size()); BOOST_CHECK_EQUAL(tokens[count], termAtt->term()); ++count; } BOOST_CHECK_EQUAL(tokens.size(), count); } namespace TestCaching { class TestableTokenStream : public TokenStream { public: TestableTokenStream() { index = 0; termAtt = addAttribute(); offsetAtt = addAttribute(); } virtual ~TestableTokenStream() { } protected: int32_t index; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (index == tokens.size()) return false; else { clearAttributes(); termAtt->setTermBuffer(tokens[index++]); offsetAtt->setOffset(0, 0); return true; } } }; } BOOST_AUTO_TEST_CASE(testCaching) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); TokenStreamPtr stream = newLucene(newLucene()); doc->add(newLucene(L"preanalyzed", stream, Field::TERM_VECTOR_NO)); // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); stream->reset(); checkTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed don't reset the stream here, the // DocumentWriter should do that implicitly writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermPositionsPtr termPositions = reader->termPositions(newLucene(L"preanalyzed", L"term1")); BOOST_CHECK(termPositions->next()); BOOST_CHECK_EQUAL(1, termPositions->freq()); BOOST_CHECK_EQUAL(0, termPositions->nextPosition()); termPositions->seek(newLucene(L"preanalyzed", L"term2")); BOOST_CHECK(termPositions->next()); BOOST_CHECK_EQUAL(2, termPositions->freq()); BOOST_CHECK_EQUAL(1, termPositions->nextPosition()); BOOST_CHECK_EQUAL(3, termPositions->nextPosition()); termPositions->seek(newLucene(L"preanalyzed", L"term3")); BOOST_CHECK(termPositions->next()); BOOST_CHECK_EQUAL(1, termPositions->freq()); BOOST_CHECK_EQUAL(2, termPositions->nextPosition()); reader->close(); // 3) reset stream and consume tokens again stream->reset(); checkTokens(stream); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/CharFilterTest.cpp000066400000000000000000000035461217574114600243030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "CharFilter.h" #include "StringReader.h" #include "CharReader.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(CharFilterTest, LuceneTestFixture) class CharFilter1 : public CharFilter { public: CharFilter1(CharStreamPtr in) : CharFilter(in) { } virtual ~CharFilter1() { } protected: virtual int32_t correct(int32_t currentOff) { return currentOff + 1; } }; class CharFilter2 : public CharFilter { public: CharFilter2(CharStreamPtr in) : CharFilter(in) { } virtual ~CharFilter2() { } protected: virtual int32_t correct(int32_t currentOff) { return currentOff + 2; } }; BOOST_AUTO_TEST_CASE(testCharFilter1) { CharStreamPtr cs = newLucene(CharReader::get(newLucene(L""))); BOOST_CHECK_EQUAL(1, cs->correctOffset(0)); } BOOST_AUTO_TEST_CASE(testCharFilter2) { CharStreamPtr cs = newLucene(CharReader::get(newLucene(L""))); BOOST_CHECK_EQUAL(2, cs->correctOffset(0)); } BOOST_AUTO_TEST_CASE(testCharFilter12) { CharStreamPtr cs = newLucene(newLucene(CharReader::get(newLucene(L"")))); BOOST_CHECK_EQUAL(3, cs->correctOffset(0)); } BOOST_AUTO_TEST_CASE(testCharFilter11) { CharStreamPtr cs = newLucene(newLucene(CharReader::get(newLucene(L"")))); BOOST_CHECK_EQUAL(2, cs->correctOffset(0)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/KeywordAnalyzerTest.cpp000066400000000000000000000070551217574114600254110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "RAMDirectory.h" #include "IndexSearcher.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "PerFieldAnalyzerWrapper.h" #include "KeywordAnalyzer.h" #include "QueryParser.h" #include "Query.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "TermDocs.h" #include "Term.h" #include "TokenStream.h" #include "OffsetAttribute.h" #include "IndexReader.h" #include "StringReader.h" using namespace Lucene; class KeywordAnalyzerTestFixture : public BaseTokenStreamFixture { public: KeywordAnalyzerTestFixture() { directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"partnum", L"Q36", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"description", L"Illidium Space Modulator", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); searcher = newLucene(directory, true); } virtual ~KeywordAnalyzerTestFixture() { } protected: RAMDirectoryPtr directory; IndexSearcherPtr searcher; }; BOOST_FIXTURE_TEST_SUITE(KeywordAnalyzerTest, KeywordAnalyzerTestFixture) BOOST_AUTO_TEST_CASE(testPerFieldAnalyzer) { PerFieldAnalyzerWrapperPtr analyzer = newLucene(newLucene()); analyzer->addAnalyzer(L"partnum", newLucene()); QueryParserPtr queryParser = newLucene(LuceneVersion::LUCENE_CURRENT, L"description", analyzer); QueryPtr query = queryParser->parse(L"partnum:Q36 AND SPACE"); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(L"+partnum:Q36 +space", query->toString(L"description")); BOOST_CHECK_EQUAL(1, hits.size()); } BOOST_AUTO_TEST_CASE(testMutipleDocument) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"partnum", L"Q36", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"partnum", L"Q37", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermDocsPtr td = reader->termDocs(newLucene(L"partnum", L"Q36")); BOOST_CHECK(td->next()); td = reader->termDocs(newLucene(L"partnum", L"Q37")); BOOST_CHECK(td->next()); } BOOST_AUTO_TEST_CASE(testOffsets) { TokenStreamPtr stream = newLucene()->tokenStream(L"field", newLucene(L"abcd")); OffsetAttributePtr offsetAtt = stream->addAttribute(); BOOST_CHECK(stream->incrementToken()); BOOST_CHECK_EQUAL(0, offsetAtt->startOffset()); BOOST_CHECK_EQUAL(4, offsetAtt->endOffset()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/LengthFilterTest.cpp000066400000000000000000000024111217574114600246350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "WhitespaceTokenizer.h" #include "TokenStream.h" #include "StringReader.h" #include "LengthFilter.h" #include "TermAttribute.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(LengthFilterTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testFilter) { TokenStreamPtr stream = newLucene(newLucene(L"short toolong evenmuchlongertext a ab toolong foo")); LengthFilterPtr filter = newLucene(stream, 2, 6); TermAttributePtr termAtt = filter->getAttribute(); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(L"short", termAtt->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(L"ab", termAtt->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(L"foo", termAtt->term()); BOOST_CHECK(!filter->incrementToken()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/MappingCharFilterTest.cpp000066400000000000000000000136101217574114600256100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "NormalizeCharMap.h" #include "CharStream.h" #include "MappingCharFilter.h" #include "StringReader.h" #include "WhitespaceTokenizer.h" #include "CharReader.h" using namespace Lucene; class MappingCharFilterTestFixture : public BaseTokenStreamFixture { public: MappingCharFilterTestFixture() { normMap = newLucene(); normMap->add(L"aa", L"a"); normMap->add(L"bbb", L"b"); normMap->add(L"cccc", L"cc"); normMap->add(L"h", L"i"); normMap->add(L"j", L"jj"); normMap->add(L"k", L"kkk"); normMap->add(L"ll", L"llll"); normMap->add(L"empty", L""); } virtual ~MappingCharFilterTestFixture() { } public: NormalizeCharMapPtr normMap; }; BOOST_FIXTURE_TEST_SUITE(MappingCharFilterTest, MappingCharFilterTestFixture) BOOST_AUTO_TEST_CASE(testReaderReset) { CharStreamPtr cs = newLucene(normMap, newLucene(L"x")); CharArray buf = CharArray::newInstance(10); int32_t len = cs->read(buf.get(), 0, 10); BOOST_CHECK_EQUAL(1, len); BOOST_CHECK_EQUAL(L'x', buf[0]) ; len = cs->read(buf.get(), 0, 10); BOOST_CHECK_EQUAL(-1, len); // rewind cs->reset(); len = cs->read(buf.get(), 0, 10); BOOST_CHECK_EQUAL(1, len); BOOST_CHECK_EQUAL(L'x', buf[0]) ; } BOOST_AUTO_TEST_CASE(testNothingChange) { CharStreamPtr cs = newLucene(normMap, newLucene(L"x")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"x"), newCollection(0), newCollection(1)); } BOOST_AUTO_TEST_CASE(test1to1) { CharStreamPtr cs = newLucene(normMap, newLucene(L"h")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"i"), newCollection(0), newCollection(1)); } BOOST_AUTO_TEST_CASE(test1to2) { CharStreamPtr cs = newLucene(normMap, newLucene(L"j")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"jj"), newCollection(0), newCollection(1)); } BOOST_AUTO_TEST_CASE(test1to3) { CharStreamPtr cs = newLucene(normMap, newLucene(L"k")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"kkk"), newCollection(0), newCollection(1)); } BOOST_AUTO_TEST_CASE(test2to4) { CharStreamPtr cs = newLucene(normMap, newLucene(L"ll")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"llll"), newCollection(0), newCollection(2)); } BOOST_AUTO_TEST_CASE(test2to1) { CharStreamPtr cs = newLucene(normMap, newLucene(L"aa")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"a"), newCollection(0), newCollection(2)); } BOOST_AUTO_TEST_CASE(test3to1) { CharStreamPtr cs = newLucene(normMap, newLucene(L"bbb")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"b"), newCollection(0), newCollection(3)); } BOOST_AUTO_TEST_CASE(test4to2) { CharStreamPtr cs = newLucene(normMap, newLucene(L"cccc")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"cc"), newCollection(0), newCollection(4)); } BOOST_AUTO_TEST_CASE(test5to0) { CharStreamPtr cs = newLucene(normMap, newLucene(L"empty")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, Collection::newInstance()); } // // 1111111111222 // 01234567890123456789012 //(in) h i j k ll cccc bbb aa // // 1111111111222 // 01234567890123456789012 //(out) i i jj kkk llll cc b a // // h, 0, 1 => i, 0, 1 // i, 2, 3 => i, 2, 3 // j, 4, 5 => jj, 4, 5 // k, 6, 7 => kkk, 6, 7 // ll, 8,10 => llll, 8,10 // cccc,11,15 => cc,11,15 // bbb,16,19 => b,16,19 // aa,20,22 => a,20,22 BOOST_AUTO_TEST_CASE(testTokenStream) { CharStreamPtr cs = newLucene(normMap, CharReader::get(newLucene(L"h i j k ll cccc bbb aa"))); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"i", L"i", L"jj", L"kkk", L"llll", L"cc", L"b", L"a"), newCollection(0, 2, 4, 6, 8, 11, 16, 20), newCollection(1, 3, 5, 7, 10, 15, 19, 22)); } // // // 0123456789 //(in) aaaa ll h //(out-1) aa llll i //(out-2) a llllllll i // // aaaa,0,4 => a,0,4 // ll,5,7 => llllllll,5,7 // h,8,9 => i,8,9 BOOST_AUTO_TEST_CASE(testChained) { CharStreamPtr cs = newLucene(normMap, (CharStreamPtr)newLucene(normMap, CharReader::get(newLucene(L"aaaa ll h")))); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"a", L"llllllll", L"i"), newCollection(0, 5, 8), newCollection(4, 7, 9)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/NumericTokenStreamTest.cpp000066400000000000000000000051231217574114600260300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "NumericTokenStream.h" #include "TermAttribute.h" #include "TypeAttribute.h" #include "NumericUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(NumericTokenStreamTest, BaseTokenStreamFixture) static int64_t lvalue = 4573245871874382LL; static int32_t ivalue = 123456; BOOST_AUTO_TEST_CASE(testLongStream) { NumericTokenStreamPtr stream = newLucene()->setLongValue(lvalue); // use getAttribute to test if attributes really exist, if not an IAE will be thrown TermAttributePtr termAtt = stream->getAttribute(); TypeAttributePtr typeAtt = stream->getAttribute(); for (int32_t shift = 0; shift < 64; shift += NumericUtils::PRECISION_STEP_DEFAULT) { BOOST_CHECK(stream->incrementToken()); BOOST_CHECK_EQUAL(NumericUtils::longToPrefixCoded(lvalue, shift), termAtt->term()); BOOST_CHECK_EQUAL(shift == 0 ? NumericTokenStream::TOKEN_TYPE_FULL_PREC() : NumericTokenStream::TOKEN_TYPE_LOWER_PREC(), typeAtt->type()); } BOOST_CHECK(!stream->incrementToken()); } BOOST_AUTO_TEST_CASE(testIntStream) { NumericTokenStreamPtr stream = newLucene()->setIntValue(ivalue); // use getAttribute to test if attributes really exist, if not an IAE will be thrown TermAttributePtr termAtt = stream->getAttribute(); TypeAttributePtr typeAtt = stream->getAttribute(); for (int32_t shift = 0; shift < 32; shift += NumericUtils::PRECISION_STEP_DEFAULT) { BOOST_CHECK(stream->incrementToken()); BOOST_CHECK_EQUAL(NumericUtils::intToPrefixCoded(ivalue, shift), termAtt->term()); BOOST_CHECK_EQUAL(shift == 0 ? NumericTokenStream::TOKEN_TYPE_FULL_PREC() : NumericTokenStream::TOKEN_TYPE_LOWER_PREC(), typeAtt->type()); } BOOST_CHECK(!stream->incrementToken()); } BOOST_AUTO_TEST_CASE(testNotInitialized) { NumericTokenStreamPtr stream = newLucene(); BOOST_CHECK_EXCEPTION(stream->reset(), IllegalStateException, check_exception(LuceneException::IllegalState)); BOOST_CHECK_EXCEPTION(stream->incrementToken(), IllegalStateException, check_exception(LuceneException::IllegalState)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/PerFieldAnalzyerWrapperTest.cpp000066400000000000000000000026731217574114600270210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "PerFieldAnalyzerWrapper.h" #include "WhitespaceAnalyzer.h" #include "SimpleAnalyzer.h" #include "TokenStream.h" #include "StringReader.h" #include "TermAttribute.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PerFieldAnalzyerWrapperTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testPerField) { String text = L"Qwerty"; PerFieldAnalyzerWrapperPtr analyzer = newLucene(newLucene()); analyzer->addAnalyzer(L"special", newLucene()); TokenStreamPtr tokenStream = analyzer->tokenStream(L"field", newLucene(text)); TermAttributePtr termAtt = tokenStream->getAttribute(); BOOST_CHECK(tokenStream->incrementToken()); BOOST_CHECK_EQUAL(L"Qwerty", termAtt->term()); tokenStream = analyzer->tokenStream(L"special", newLucene(text)); termAtt = tokenStream->getAttribute(); BOOST_CHECK(tokenStream->incrementToken()); BOOST_CHECK_EQUAL(L"qwerty", termAtt->term()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/StopAnalyzerTest.cpp000066400000000000000000000070301217574114600247030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "StopAnalyzer.h" #include "StringReader.h" #include "TokenStream.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" using namespace Lucene; class StopAnalyzerTestFixture : public BaseTokenStreamFixture { public: StopAnalyzerTestFixture() { stop = newLucene(LuceneVersion::LUCENE_CURRENT); inValidTokens = HashSet::newInstance(); for (HashSet::iterator word = StopAnalyzer::ENGLISH_STOP_WORDS_SET().begin(); word != StopAnalyzer::ENGLISH_STOP_WORDS_SET().end(); ++word) inValidTokens.add(*word); } virtual ~StopAnalyzerTestFixture() { } protected: StopAnalyzerPtr stop; HashSet inValidTokens; }; BOOST_FIXTURE_TEST_SUITE(StopAnalyzerTest, StopAnalyzerTestFixture) BOOST_AUTO_TEST_CASE(testDefaults) { BOOST_CHECK(stop); StringReaderPtr reader = newLucene(L"This is a test of the english stop analyzer"); TokenStreamPtr stream = stop->tokenStream(L"test", reader); BOOST_CHECK(stream); TermAttributePtr termAtt = stream->getAttribute(); while (stream->incrementToken()) BOOST_CHECK(!inValidTokens.contains(termAtt->term())); } BOOST_AUTO_TEST_CASE(testStopList) { HashSet stopWordsSet = HashSet::newInstance(); stopWordsSet.add(L"good"); stopWordsSet.add(L"test"); stopWordsSet.add(L"analyzer"); StopAnalyzerPtr newStop = newLucene(LuceneVersion::LUCENE_24, stopWordsSet); StringReaderPtr reader = newLucene(L"This is a good test of the english stop analyzer"); TokenStreamPtr stream = newStop->tokenStream(L"test", reader); BOOST_CHECK(stream); TermAttributePtr termAtt = stream->getAttribute(); PositionIncrementAttributePtr posIncrAtt = stream->addAttribute(); while (stream->incrementToken()) { String text = termAtt->term(); BOOST_CHECK(!stopWordsSet.contains(text)); BOOST_CHECK_EQUAL(1, posIncrAtt->getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments. } } BOOST_AUTO_TEST_CASE(testStopListPositions) { HashSet stopWordsSet = HashSet::newInstance(); stopWordsSet.add(L"good"); stopWordsSet.add(L"test"); stopWordsSet.add(L"analyzer"); StopAnalyzerPtr newStop = newLucene(LuceneVersion::LUCENE_CURRENT, stopWordsSet); StringReaderPtr reader = newLucene(L"This is a good test of the english stop analyzer with positions"); Collection expectedIncr = newCollection(1, 1, 1, 3, 1, 1, 1, 2, 1); TokenStreamPtr stream = newStop->tokenStream(L"test", reader); BOOST_CHECK(stream); int32_t i = 0; TermAttributePtr termAtt = stream->getAttribute(); PositionIncrementAttributePtr posIncrAtt = stream->addAttribute(); while (stream->incrementToken()) { String text = termAtt->term(); BOOST_CHECK(!stopWordsSet.contains(text)); BOOST_CHECK_EQUAL(expectedIncr[i++], posIncrAtt->getPositionIncrement()); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/StopFilterTest.cpp000066400000000000000000000104211217574114600243410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "TestUtils.h" #include "StringReader.h" #include "TokenStream.h" #include "StopFilter.h" #include "WhitespaceTokenizer.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(StopFilterTest, BaseTokenStreamFixture) static void doTestStopPositons(StopFilterPtr stpf, bool enableIcrements) { stpf->setEnablePositionIncrements(enableIcrements); TermAttributePtr termAtt = stpf->getAttribute(); PositionIncrementAttributePtr posIncrAtt = stpf->getAttribute(); for (int32_t i = 0; i < 20; i += 3) { BOOST_CHECK(stpf->incrementToken()); String w = intToEnglish(i); BOOST_CHECK_EQUAL(w, termAtt->term()); BOOST_CHECK_EQUAL(enableIcrements ? (i == 0 ? 1 : 3) : 1, posIncrAtt->getPositionIncrement()); } BOOST_CHECK(!stpf->incrementToken()); } BOOST_AUTO_TEST_CASE(testExactCase) { StringReaderPtr reader = newLucene(L"Now is The Time"); HashSet stopWords = HashSet::newInstance(); stopWords.add(L"is"); stopWords.add(L"the"); stopWords.add(L"Time"); TokenStreamPtr stream = newLucene(false, newLucene(reader), stopWords, false); TermAttributePtr termAtt = stream->getAttribute(); BOOST_CHECK(stream->incrementToken()); BOOST_CHECK_EQUAL(L"Now", termAtt->term()); BOOST_CHECK(stream->incrementToken()); BOOST_CHECK_EQUAL(L"The", termAtt->term()); BOOST_CHECK(!stream->incrementToken()); } BOOST_AUTO_TEST_CASE(testIgnoreCase) { StringReaderPtr reader = newLucene(L"Now is The Time"); HashSet stopWords = HashSet::newInstance(); stopWords.add(L"is"); stopWords.add(L"the"); stopWords.add(L"Time"); TokenStreamPtr stream = newLucene(false, newLucene(reader), stopWords, true); TermAttributePtr termAtt = stream->getAttribute(); BOOST_CHECK(stream->incrementToken()); BOOST_CHECK_EQUAL(L"Now", termAtt->term()); BOOST_CHECK(!stream->incrementToken()); } BOOST_AUTO_TEST_CASE(testStopPositons) { StringStream buf; Collection stopWords = Collection::newInstance(); for (int32_t i = 0; i < 20; ++i) { String w = intToEnglish(i); buf << w << L" "; if (i % 3 != 0) stopWords.add(w); } HashSet stopSet = HashSet::newInstance(stopWords.begin(), stopWords.end()); // with increments StringReaderPtr reader = newLucene(buf.str()); StopFilterPtr stpf = newLucene(false, newLucene(reader), stopSet); doTestStopPositons(stpf, true); // without increments reader = newLucene(buf.str()); stpf = newLucene(false, newLucene(reader), stopSet); doTestStopPositons(stpf, false); // with increments, concatenating two stop filters Collection stopWords0 = Collection::newInstance(); Collection stopWords1 = Collection::newInstance(); for (int32_t i = 0; i < stopWords.size(); ++i) { if (i % 2 == 0) stopWords0.add(stopWords[i]); else stopWords1.add(stopWords[i]); } HashSet stopSet0 = HashSet::newInstance(stopWords0.begin(), stopWords0.end()); HashSet stopSet1 = HashSet::newInstance(stopWords1.begin(), stopWords1.end()); reader = newLucene(buf.str()); StopFilterPtr stpf0 = newLucene(false, newLucene(reader), stopSet0); // first part of the set stpf0->setEnablePositionIncrements(true); StopFilterPtr stpf01 = newLucene(false, stpf0, stopSet1); // two stop filters concatenated! doTestStopPositons(stpf01, true); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/TeeSinkTokenFilterTest.cpp000066400000000000000000000231741217574114600257700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "BaseTokenStreamFixture.h" #include "TestUtils.h" #include "TeeSinkTokenFilter.h" #include "WhitespaceTokenizer.h" #include "TokenStream.h" #include "TermAttribute.h" #include "StringReader.h" #include "CachingTokenFilter.h" #include "LowerCaseFilter.h" #include "StandardFilter.h" #include "StandardTokenizer.h" #include "PositionIncrementAttribute.h" #include "MiscUtils.h" using namespace Lucene; class TheSinkFilter : public SinkFilter { public: virtual ~TheSinkFilter() { } public: virtual bool accept(AttributeSourcePtr source) { TermAttributePtr termAtt = source->getAttribute(); return boost::iequals(termAtt->term(), L"The"); } }; class DogSinkFilter : public SinkFilter { public: virtual ~DogSinkFilter() { } public: virtual bool accept(AttributeSourcePtr source) { TermAttributePtr termAtt = source->getAttribute(); return boost::iequals(termAtt->term(), L"Dogs"); } }; class TeeSinkTokenFilterTestFixture : public BaseTokenStreamFixture { public: TeeSinkTokenFilterTestFixture() { tokens1 = newCollection(L"The", L"quick", L"Burgundy", L"Fox", L"jumped", L"over", L"the", L"lazy", L"Red", L"Dogs"); tokens2 = newCollection(L"The", L"Lazy", L"Dogs", L"should", L"stay", L"on", L"the", L"porch"); for (int32_t i = 0; i < tokens1.size(); ++i) buffer1 << tokens1[i] << L" "; for (int32_t i = 0; i < tokens2.size(); ++i) buffer2 << tokens2[i] << L" "; theFilter = newLucene(); dogFilter = newLucene(); } virtual ~TeeSinkTokenFilterTestFixture() { } protected: StringStream buffer1; StringStream buffer2; Collection tokens1; Collection tokens2; SinkFilterPtr theFilter; SinkFilterPtr dogFilter; }; BOOST_FIXTURE_TEST_SUITE(TeeSinkTokenFilterTest, TeeSinkTokenFilterTestFixture) BOOST_AUTO_TEST_CASE(testGeneral) { TeeSinkTokenFilterPtr source = newLucene(newLucene(newLucene(buffer1.str()))); TokenStreamPtr sink1 = source->newSinkTokenStream(); TokenStreamPtr sink2 = source->newSinkTokenStream(theFilter); source->addAttribute(); sink1->addAttribute(); sink2->addAttribute(); checkTokenStreamContents(source, tokens1); checkTokenStreamContents(sink1, tokens1); checkTokenStreamContents(sink2, newCollection(L"The", L"the")); } BOOST_AUTO_TEST_CASE(testMultipleSources) { TeeSinkTokenFilterPtr tee1 = newLucene(newLucene(newLucene(buffer1.str()))); SinkTokenStreamPtr dogDetector = tee1->newSinkTokenStream(dogFilter); SinkTokenStreamPtr theDetector = tee1->newSinkTokenStream(theFilter); TokenStreamPtr source1 = newLucene(tee1); tee1->addAttribute(); dogDetector->addAttribute(); theDetector->addAttribute(); TeeSinkTokenFilterPtr tee2 = newLucene(newLucene(newLucene(buffer2.str()))); tee2->addSinkTokenStream(dogDetector); tee2->addSinkTokenStream(theDetector); TokenStreamPtr source2 = tee2; checkTokenStreamContents(source1, tokens1); checkTokenStreamContents(source2, tokens2); checkTokenStreamContents(theDetector, newCollection(L"The", L"the", L"The", L"the")); checkTokenStreamContents(dogDetector, newCollection(L"Dogs", L"Dogs")); source1->reset(); TokenStreamPtr lowerCasing = newLucene(source1); Collection lowerCaseTokens = Collection::newInstance(tokens1.size()); for (int32_t i = 0; i < tokens1.size(); ++i) lowerCaseTokens[i] = StringUtils::toLower((const String&)tokens1[i]); checkTokenStreamContents(lowerCasing, lowerCaseTokens); } namespace TestPerformance { class ModuloTokenFilter : public TokenFilter { public: ModuloTokenFilter(TokenStreamPtr input, int32_t mc) : TokenFilter(input) { modCount = mc; count = 0; } virtual ~ModuloTokenFilter() { } public: int32_t modCount; int32_t count; public: // return every 100 tokens virtual bool incrementToken() { bool hasNext = false; for (hasNext = input->incrementToken(); hasNext && count % modCount != 0; hasNext = input->incrementToken()) ++count; ++count; return hasNext; } }; class ModuloSinkFilter : public SinkFilter { public: ModuloSinkFilter(int32_t mc) { modCount = mc; count = 0; } virtual ~ModuloSinkFilter() { } public: int32_t modCount; int32_t count; public: virtual bool accept(AttributeSourcePtr source) { bool b = (source && count % modCount == 0); ++count; return b; } }; } /// Not an explicit test, just useful to print out some info on performance BOOST_AUTO_TEST_CASE(testPerformance) { Collection tokCount = newCollection(100, 500, 1000, 2000, 5000, 10000); Collection modCounts = newCollection(1, 2, 5, 10, 20, 50, 100, 200, 500); for (int32_t k = 0; k < tokCount.size(); ++k) { StringStream buffer; BOOST_TEST_MESSAGE("-----Tokens: " << tokCount[k] << "-----"); for (int32_t i = 0; i < tokCount[k]; ++i) buffer << StringUtils::toUpper(intToEnglish(i)) << L" "; // make sure we produce the same tokens TeeSinkTokenFilterPtr teeStream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str())))); TokenStreamPtr sink = teeStream->newSinkTokenStream(newLucene(100)); teeStream->consumeAllTokens(); TokenStreamPtr stream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str()))), 100); TermAttributePtr tfTok = stream->addAttribute(); TermAttributePtr sinkTok = sink->addAttribute(); for (int32_t i = 0; stream->incrementToken(); ++i) { BOOST_CHECK(sink->incrementToken()); BOOST_CHECK(tfTok->equals(sinkTok)); } // simulate two fields, each being analyzed once, for 20 documents for (int32_t j = 0; j < modCounts.size(); ++j) { int32_t tfPos = 0; int64_t start = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < 20; ++i) { stream = newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str()))); PositionIncrementAttributePtr posIncrAtt = stream->getAttribute(); while (stream->incrementToken()) tfPos += posIncrAtt->getPositionIncrement(); stream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str()))), modCounts[j]); posIncrAtt = stream->getAttribute(); while (stream->incrementToken()) tfPos += posIncrAtt->getPositionIncrement(); } int64_t finish = MiscUtils::currentTimeMillis(); BOOST_TEST_MESSAGE("ModCount: " << modCounts[j] << " Two fields took " << (finish - start) << " ms"); int32_t sinkPos = 0; // simulate one field with one sink start = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < 20; ++i) { teeStream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str())))); sink = teeStream->newSinkTokenStream(newLucene(modCounts[j])); PositionIncrementAttributePtr posIncrAtt = teeStream->getAttribute(); while (teeStream->incrementToken()) sinkPos += posIncrAtt->getPositionIncrement(); posIncrAtt = sink->getAttribute(); while (sink->incrementToken()) sinkPos += posIncrAtt->getPositionIncrement(); } finish = MiscUtils::currentTimeMillis(); BOOST_TEST_MESSAGE("ModCount: " << modCounts[j] << " Tee fields took " << (finish - start) << " ms"); BOOST_CHECK_EQUAL(sinkPos, tfPos); } BOOST_TEST_MESSAGE("- End Tokens: " << tokCount[k] << "-----"); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/TokenTest.cpp000066400000000000000000000137201217574114600233330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Token.h" #include "Payload.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(TokenTest, LuceneTestFixture) static AttributePtr checkCloneIsEqual(AttributePtr att) { AttributePtr clone = boost::dynamic_pointer_cast(att->clone()); BOOST_CHECK(att->equals(clone)); BOOST_CHECK_EQUAL(att->hashCode(), clone->hashCode()); return clone; } template static AttributePtr checkCopyIsEqual(AttributePtr att) { AttributePtr copy = newLucene(); att->copyTo(copy); BOOST_CHECK(att->equals(copy)); BOOST_CHECK_EQUAL(att->hashCode(), copy->hashCode()); return copy; } BOOST_AUTO_TEST_CASE(testCtor) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); BOOST_CHECK_EQUAL(L"hello", t->term()); BOOST_CHECK_EQUAL(L"word", t->type()); BOOST_CHECK_EQUAL(0, t->getFlags()); t = newLucene(6, 22); t->setTermBuffer(L"hello"); BOOST_CHECK_EQUAL(L"hello", t->term()); BOOST_CHECK_EQUAL(L"(hello,6,22)", t->toString()); BOOST_CHECK_EQUAL(L"word", t->type()); BOOST_CHECK_EQUAL(0, t->getFlags()); t = newLucene(6, 22, 7); t->setTermBuffer(L"hello"); BOOST_CHECK_EQUAL(L"hello", t->term()); BOOST_CHECK_EQUAL(L"(hello,6,22)", t->toString()); BOOST_CHECK_EQUAL(7, t->getFlags()); t = newLucene(6, 22, L"junk"); t->setTermBuffer(L"hello"); BOOST_CHECK_EQUAL(L"hello", t->term()); BOOST_CHECK_EQUAL(L"(hello,6,22,type=junk)", t->toString()); BOOST_CHECK_EQUAL(0, t->getFlags()); } BOOST_AUTO_TEST_CASE(testResize) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); for (int32_t i = 0; i < 2000; ++i) { t->resizeTermBuffer(i); BOOST_CHECK(i <= t->termBuffer().size()); BOOST_CHECK_EQUAL(L"hello", t->term()); } } BOOST_AUTO_TEST_CASE(testGrow) { TokenPtr t = newLucene(); StringStream buf; buf << L"ab"; for (int32_t i = 0; i < 20; ++i) { String content = buf.str(); t->setTermBuffer(content); BOOST_CHECK_EQUAL(content.length(), t->termLength()); BOOST_CHECK_EQUAL(content, t->term()); buf << content; } BOOST_CHECK_EQUAL(1048576, t->termLength()); BOOST_CHECK_EQUAL(1179654, t->termBuffer().size()); // Test for slow growth to a long term t = newLucene(); buf.str(L""); buf << L"a"; for (int32_t i = 0; i < 20000; ++i) { String content = buf.str(); t->setTermBuffer(content); BOOST_CHECK_EQUAL(content.length(), t->termLength()); BOOST_CHECK_EQUAL(content, t->term()); buf << L"a"; } BOOST_CHECK_EQUAL(20000, t->termLength()); BOOST_CHECK_EQUAL(20167, t->termBuffer().size()); } BOOST_AUTO_TEST_CASE(testToString) { TokenPtr t = newLucene(L"", 0, 5); t->setTermBuffer(L"aloha"); BOOST_CHECK_EQUAL(L"(aloha,0,5)", t->toString()); t->setTermBuffer(L"hi there"); BOOST_CHECK_EQUAL(L"(hi there,0,5)", t->toString()); } BOOST_AUTO_TEST_CASE(testTermBufferEquals) { TokenPtr t1a = newLucene(); t1a->setTermBuffer(L"hello"); TokenPtr t1b = newLucene(); t1b->setTermBuffer(L"hello"); TokenPtr t2 = newLucene(); t2->setTermBuffer(L"hello2"); BOOST_CHECK(t1a->equals(t1b)); BOOST_CHECK(!t1a->equals(t2)); BOOST_CHECK(!t2->equals(t1b)); } BOOST_AUTO_TEST_CASE(testMixedStringArray) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); BOOST_CHECK_EQUAL(t->termLength(), 5); BOOST_CHECK_EQUAL(t->term(), L"hello"); t->setTermBuffer(L"hello2"); BOOST_CHECK_EQUAL(t->termLength(), 6); BOOST_CHECK_EQUAL(t->term(), L"hello2"); CharArray test = CharArray::newInstance(6); test[0] = L'h'; test[1] = L'e'; test[2] = L'l'; test[3] = L'l'; test[4] = L'o'; test[5] = L'3'; t->setTermBuffer(test.get(), 0, 6); BOOST_CHECK_EQUAL(t->term(), L"hello3"); CharArray buffer = t->termBuffer(); buffer[1] = L'o'; BOOST_CHECK_EQUAL(t->term(), L"hollo3"); } BOOST_AUTO_TEST_CASE(testClone) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); TokenPtr clone = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); BOOST_CHECK_EQUAL(t->term(), clone->term()); BOOST_CHECK(buf != clone->termBuffer()); ByteArray payload = ByteArray::newInstance(4); payload[0] = 1; payload[1] = 2; payload[2] = 3; payload[3] = 4; PayloadPtr pl = newLucene(payload); t->setPayload(pl); clone = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); BOOST_CHECK(pl->equals(clone->getPayload())); BOOST_CHECK_NE(pl, clone->getPayload()); } BOOST_AUTO_TEST_CASE(testCopyTo) { TokenPtr t = newLucene(); TokenPtr copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); BOOST_CHECK_EQUAL(L"", t->term()); BOOST_CHECK_EQUAL(L"", copy->term()); t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); BOOST_CHECK_EQUAL(t->term(), copy->term()); BOOST_CHECK(buf != copy->termBuffer()); ByteArray payload = ByteArray::newInstance(4); payload[0] = 1; payload[1] = 2; payload[2] = 3; payload[3] = 4; PayloadPtr pl = newLucene(payload); t->setPayload(pl); copy = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); BOOST_CHECK(pl->equals(copy->getPayload())); BOOST_CHECK_NE(pl, copy->getPayload()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/standard/000077500000000000000000000000001217574114600225045ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/analysis/standard/StandardAnalyzerTest.cpp000066400000000000000000000236771217574114600273350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "StandardAnalyzer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(StandardAnalyzerTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testMaxTermLength) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); sa->setMaxTokenLength(5); checkAnalyzesTo(sa, L"ab cd toolong xy z", newCollection(L"ab", L"cd", L"xy", L"z")); } BOOST_AUTO_TEST_CASE(testMaxTermLength2) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"ab cd toolong xy z", newCollection(L"ab", L"cd", L"toolong", L"xy", L"z")); sa->setMaxTokenLength(5); checkAnalyzesTo(sa, L"ab cd toolong xy z", newCollection(L"ab", L"cd", L"xy", L"z"), newCollection(1, 1, 2, 1)); } BOOST_AUTO_TEST_CASE(testMaxTermLength3) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); String longTerm(255, L'a'); checkAnalyzesTo(sa, L"ab cd " + longTerm + L" xy z", newCollection(L"ab", L"cd", longTerm, L"xy", L"z")); checkAnalyzesTo(sa, L"ab cd " + longTerm + L"a xy z", newCollection(L"ab", L"cd", L"xy", L"z")); } BOOST_AUTO_TEST_CASE(testAlphanumeric) { // alphanumeric tokens StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"B2B", newCollection(L"b2b")); checkAnalyzesTo(sa, L"2B", newCollection(L"2b")); } BOOST_AUTO_TEST_CASE(testUnderscores) { // underscores are delimiters, but not in email addresses (below) StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"word_having_underscore", newCollection(L"word", L"having", L"underscore")); checkAnalyzesTo(sa, L"word_with_underscore_and_stopwords", newCollection(L"word", L"underscore", L"stopwords")); } BOOST_AUTO_TEST_CASE(testDelimiters) { // other delimiters: "-", "/", "," StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"some-dashed-phrase", newCollection(L"some", L"dashed", L"phrase")); checkAnalyzesTo(sa, L"dogs,chase,cats", newCollection(L"dogs", L"chase", L"cats")); checkAnalyzesTo(sa, L"ac/dc", newCollection(L"ac", L"dc")); } BOOST_AUTO_TEST_CASE(testApostrophes) { // internal apostrophes: O'Reilly, you're, O'Reilly's possessives are actually removed by StardardFilter, not the tokenizer StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"O'Reilly", newCollection(L"o'reilly")); checkAnalyzesTo(sa, L"you're", newCollection(L"you're")); checkAnalyzesTo(sa, L"she's", newCollection(L"she")); checkAnalyzesTo(sa, L"Jim's", newCollection(L"jim")); checkAnalyzesTo(sa, L"don't", newCollection(L"don't")); checkAnalyzesTo(sa, L"O'Reilly's", newCollection(L"o'reilly")); } BOOST_AUTO_TEST_CASE(testTSADash) { // t and s had been stopwords in Lucene <= 2.0, which made it impossible to correctly search for these terms StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"s-class", newCollection(L"s", L"class")); checkAnalyzesTo(sa, L"t-com", newCollection(L"t", L"com")); // 'a' is still a stopword checkAnalyzesTo(sa, L"a-class", newCollection(L"class")); } BOOST_AUTO_TEST_CASE(testCompanyNames) { // internal apostrophes: O'Reilly, you're, O'Reilly's possessives are actually removed by StardardFilter, not the tokenizer StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"AT&T", newCollection(L"at&t")); checkAnalyzesTo(sa, L"Excite@Home", newCollection(L"excite@home")); } BOOST_AUTO_TEST_CASE(testDomainNames) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); // domain names checkAnalyzesTo(sa, L"www.nutch.org", newCollection(L"www.nutch.org")); // the following should be recognized as HOST BOOST_CHECK_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"www.nutch.org"), newCollection(L""))); // 2.3 should show the bug sa = newLucene(LuceneVersion::LUCENE_23); BOOST_CHECK_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"wwwnutchorg"), newCollection(L""))); // 2.4 should not show the bug sa = newLucene(LuceneVersion::LUCENE_24); BOOST_CHECK_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"www.nutch.org"), newCollection(L""))); } BOOST_AUTO_TEST_CASE(testEMailAddresses) { // email addresses, possibly with underscores, periods, etc StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"test@example.com", newCollection(L"test@example.com")); checkAnalyzesTo(sa, L"first.lastname@example.com", newCollection(L"first.lastname@example.com")); checkAnalyzesTo(sa, L"first_lastname@example.com", newCollection(L"first_lastname@example.com")); } BOOST_AUTO_TEST_CASE(testNumeric) { // floating point, serial, model numbers, ip addresses, etc. // every other segment must have at least one digit StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"21.35", newCollection(L"21.35")); checkAnalyzesTo(sa, L"216.239.63.104", newCollection(L"216.239.63.104")); checkAnalyzesTo(sa, L"1-2-3", newCollection(L"1-2-3")); checkAnalyzesTo(sa, L"a1-b2-c3", newCollection(L"a1-b2-c3")); checkAnalyzesTo(sa, L"a1-b-c3", newCollection(L"a1-b-c3")); checkAnalyzesTo(sa, L"R2D2 C3PO", newCollection(L"r2d2", L"c3po")); } BOOST_AUTO_TEST_CASE(testTextWithNumbers) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"David has 5000 bones", newCollection(L"david", L"has", L"5000", L"bones")); } BOOST_AUTO_TEST_CASE(testVariousText) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"C embedded developers wanted", newCollection(L"c", L"embedded", L"developers", L"wanted")); checkAnalyzesTo(sa, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(sa, L"foo bar . FOO <> BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(sa, L"\"QUOTED\" word", newCollection(L"quoted", L"word")); } BOOST_AUTO_TEST_CASE(testAcronyms) { // acronyms have their dots stripped StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"U.S.A.", newCollection(L"usa")); } BOOST_AUTO_TEST_CASE(testCPlusPlusHash) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"C++", newCollection(L"c")); checkAnalyzesTo(sa, L"C#", newCollection(L"c")); } BOOST_AUTO_TEST_CASE(testComplianceFileName) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"2004.jpg", newCollection(L"2004.jpg"), newCollection(L"")); } BOOST_AUTO_TEST_CASE(testComplianceNumericIncorrect) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"62.46", newCollection(L"62.46"), newCollection(L"")); } BOOST_AUTO_TEST_CASE(testComplianceNumericLong) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"978-0-94045043-1", newCollection(L"978-0-94045043-1"), newCollection(L"")); } BOOST_AUTO_TEST_CASE(testComplianceNumericFile) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"78academyawards/rules/rule02.html", newCollection(L"78academyawards/rules/rule02.html"), newCollection(L"")); } BOOST_AUTO_TEST_CASE(testComplianceNumericWithUnderscores) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs", newCollection(L"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"), newCollection(L"")); } BOOST_AUTO_TEST_CASE(testComplianceNumericWithDash) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"mid-20th", newCollection(L"mid-20th"), newCollection(L"")); } BOOST_AUTO_TEST_CASE(testComplianceManyTokens) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm safari-0-sheikh-zayed-grand-mosque.jpg", newCollection(L"money.cnn.com", L"magazines", L"fortune", L"fortune", L"archive/2007/03/19/8402357", L"index.htm", L"safari-0-sheikh", L"zayed", L"grand", L"mosque.jpg"), newCollection(L"", L"", L"", L"", L"", L"", L"", L"", L"", L"")); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/tokenattributes/000077500000000000000000000000001217574114600241335ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/analysis/tokenattributes/SimpleAttributeTest.cpp000066400000000000000000000117621217574114600306230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FlagsAttribute.h" #include "PositionIncrementAttribute.h" #include "TypeAttribute.h" #include "PayloadAttribute.h" #include "Payload.h" #include "OffsetAttribute.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SimpleAttributeTest, LuceneTestFixture) static AttributePtr checkCloneIsEqual(AttributePtr att) { AttributePtr clone = boost::dynamic_pointer_cast(att->clone()); BOOST_CHECK(att->equals(clone)); BOOST_CHECK_EQUAL(att->hashCode(), clone->hashCode()); return clone; } template static AttributePtr checkCopyIsEqual(AttributePtr att) { AttributePtr copy = newLucene(); att->copyTo(copy); BOOST_CHECK(att->equals(copy)); BOOST_CHECK_EQUAL(att->hashCode(), copy->hashCode()); return copy; } BOOST_AUTO_TEST_CASE(testFlagsAttribute) { FlagsAttributePtr att = newLucene(); BOOST_CHECK_EQUAL(0, att->getFlags()); att->setFlags(1234); BOOST_CHECK_EQUAL(L"flags=1234", att->toString()); FlagsAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); BOOST_CHECK_EQUAL(1234, att2->getFlags()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); BOOST_CHECK_EQUAL(1234, att2->getFlags()); att->clear(); BOOST_CHECK_EQUAL(0, att->getFlags()); } BOOST_AUTO_TEST_CASE(testPositionIncrementAttribute) { PositionIncrementAttributePtr att = newLucene(); BOOST_CHECK_EQUAL(1, att->getPositionIncrement()); att->setPositionIncrement(1234); BOOST_CHECK_EQUAL(L"positionIncrement=1234", att->toString()); PositionIncrementAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); BOOST_CHECK_EQUAL(1234, att2->getPositionIncrement()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); BOOST_CHECK_EQUAL(1234, att2->getPositionIncrement()); att->clear(); BOOST_CHECK_EQUAL(1, att->getPositionIncrement()); } namespace TestTypeAttribute { class TestableTypeAttribute : public TypeAttribute { public: virtual ~TestableTypeAttribute() { } LUCENE_CLASS(TestableTypeAttribute); public: using TypeAttribute::DEFAULT_TYPE; }; } BOOST_AUTO_TEST_CASE(testTypeAttribute) { TypeAttributePtr att = newLucene(); BOOST_CHECK_EQUAL(TestTypeAttribute::TestableTypeAttribute::DEFAULT_TYPE(), att->type()); att->setType(L"hello"); BOOST_CHECK_EQUAL(L"type=hello", att->toString()); TypeAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); BOOST_CHECK_EQUAL(L"hello", att2->type()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); BOOST_CHECK_EQUAL(L"hello", att2->type()); att->clear(); BOOST_CHECK_EQUAL(TestTypeAttribute::TestableTypeAttribute::DEFAULT_TYPE(), att->type()); } BOOST_AUTO_TEST_CASE(testPayloadAttribute) { PayloadAttributePtr att = newLucene(); BOOST_CHECK(!att->getPayload()); ByteArray payload = ByteArray::newInstance(4); payload[0] = 1; payload[1] = 2; payload[2] = 3; payload[3] = 4; PayloadPtr pl = newLucene(payload); att->setPayload(pl); PayloadAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); BOOST_CHECK(pl->equals(att2->getPayload())); BOOST_CHECK_NE(pl, att2->getPayload()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); BOOST_CHECK(pl->equals(att2->getPayload())); BOOST_CHECK_NE(pl, att2->getPayload()); att->clear(); BOOST_CHECK(!att->getPayload()); } BOOST_AUTO_TEST_CASE(testOffsetAttribute) { OffsetAttributePtr att = newLucene(); BOOST_CHECK_EQUAL(0, att->startOffset()); BOOST_CHECK_EQUAL(0, att->endOffset()); att->setOffset(12, 34); // no string test here, because order unknown OffsetAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); BOOST_CHECK_EQUAL(12, att2->startOffset()); BOOST_CHECK_EQUAL(34, att2->endOffset()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); BOOST_CHECK_EQUAL(12, att2->startOffset()); BOOST_CHECK_EQUAL(34, att2->endOffset()); att->clear(); BOOST_CHECK_EQUAL(0, att->startOffset()); BOOST_CHECK_EQUAL(0, att->endOffset()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/analysis/tokenattributes/TermAttributeTest.cpp000066400000000000000000000112051217574114600302710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "TermAttribute.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(TermAttributeTest, LuceneTestFixture) static AttributePtr checkCloneIsEqual(AttributePtr att) { AttributePtr clone = boost::dynamic_pointer_cast(att->clone()); BOOST_CHECK(att->equals(clone)); BOOST_CHECK_EQUAL(att->hashCode(), clone->hashCode()); return clone; } template static AttributePtr checkCopyIsEqual(AttributePtr att) { AttributePtr copy = newLucene(); att->copyTo(copy); BOOST_CHECK(att->equals(copy)); BOOST_CHECK_EQUAL(att->hashCode(), copy->hashCode()); return copy; } BOOST_AUTO_TEST_CASE(testResize) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"hello"); for (int32_t i = 0; i < 2000; ++i) { t->resizeTermBuffer(i); BOOST_CHECK(i <= t->termBuffer().size()); BOOST_CHECK_EQUAL(L"hello", t->term()); } } BOOST_AUTO_TEST_CASE(testGrow) { TermAttributePtr t = newLucene(); StringStream buf; buf << L"ab"; for (int32_t i = 0; i < 20; ++i) { String content = buf.str(); t->setTermBuffer(content); BOOST_CHECK_EQUAL(content.length(), t->termLength()); BOOST_CHECK_EQUAL(content, t->term()); buf << content; } BOOST_CHECK_EQUAL(1048576, t->termLength()); BOOST_CHECK_EQUAL(1179654, t->termBuffer().size()); // Test for slow growth to a long term t = newLucene(); buf.str(L""); buf << L"a"; for (int32_t i = 0; i < 20000; ++i) { String content = buf.str(); t->setTermBuffer(content); BOOST_CHECK_EQUAL(content.length(), t->termLength()); BOOST_CHECK_EQUAL(content, t->term()); buf << L"a"; } BOOST_CHECK_EQUAL(20000, t->termLength()); BOOST_CHECK_EQUAL(20167, t->termBuffer().size()); } BOOST_AUTO_TEST_CASE(testToString) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"aloha"); BOOST_CHECK_EQUAL(L"term=aloha", t->toString()); t->setTermBuffer(L"hi there"); BOOST_CHECK_EQUAL(L"term=hi there", t->toString()); } BOOST_AUTO_TEST_CASE(testMixedStringArray) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"hello"); BOOST_CHECK_EQUAL(t->termLength(), 5); BOOST_CHECK_EQUAL(t->term(), L"hello"); t->setTermBuffer(L"hello2"); BOOST_CHECK_EQUAL(t->termLength(), 6); BOOST_CHECK_EQUAL(t->term(), L"hello2"); CharArray test = CharArray::newInstance(6); test[0] = L'h'; test[1] = L'e'; test[2] = L'l'; test[3] = L'l'; test[4] = L'o'; test[5] = L'3'; t->setTermBuffer(test.get(), 0, 6); BOOST_CHECK_EQUAL(t->term(), L"hello3"); // Make sure if we get the buffer and change a character that term() reflects the change CharArray buffer = t->termBuffer(); buffer[1] = L'o'; BOOST_CHECK_EQUAL(t->term(), L"hollo3"); } BOOST_AUTO_TEST_CASE(testClone) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); TermAttributePtr clone = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); BOOST_CHECK_EQUAL(t->term(), clone->term()); BOOST_CHECK(buf != clone->termBuffer()); } BOOST_AUTO_TEST_CASE(testEquals) { TermAttributePtr t1a = newLucene(); t1a->setTermBuffer(L"hello"); TermAttributePtr t1b = newLucene(); t1b->setTermBuffer(L"hello"); TermAttributePtr t2 = newLucene(); t2->setTermBuffer(L"hello2"); BOOST_CHECK(t1a->equals(t1b)); BOOST_CHECK(!t1a->equals(t2)); BOOST_CHECK(!t2->equals(t1b)); } BOOST_AUTO_TEST_CASE(testCopyTo) { TermAttributePtr t = newLucene(); TermAttributePtr copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); BOOST_CHECK_EQUAL(L"", t->term()); BOOST_CHECK_EQUAL(L"", copy->term()); t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); BOOST_CHECK_EQUAL(t->term(), copy->term()); BOOST_CHECK(buf != copy->termBuffer()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/000077500000000000000000000000001217574114600205215ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/000077500000000000000000000000001217574114600225315ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/000077500000000000000000000000001217574114600240215ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/000077500000000000000000000000001217574114600256445ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/ar/000077500000000000000000000000001217574114600262465ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/ar/ArabicAnalyzerTest.cpp000066400000000000000000000151621217574114600325060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ArabicAnalyzer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(ArabicAnalyzerTest, BaseTokenStreamFixture) /// Some simple tests showing some features of the analyzer, how some regular forms will conflate BOOST_AUTO_TEST_CASE(testBasicFeatures1) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testBasicFeatures2) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa9}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testBasicFeatures3) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8}; const uint8_t second[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testBasicFeatures4) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testBasicFeatures5) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa3, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a, 0xd9, 0x8a, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testBasicFeatures6) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a}; const uint8_t second[] = {0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testBasicFeatures7) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testBasicFeatures8) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testBasicFeatures9) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd8, 0xa7, 0x20, 0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa, 0x20, 0xd8, 0xa3, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; const uint8_t second[] = {0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa}; const uint8_t third[] = {0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second), UTF8_TO_STRING(third))); } BOOST_AUTO_TEST_CASE(testBasicFeatures10) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x8a, 0xd9, 0x86, 0x20, 0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa, 0x20, 0xd8, 0xa3, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; const uint8_t second[] = {0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa}; const uint8_t third[] = {0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second), UTF8_TO_STRING(third))); } /// Simple tests to show things are getting reset correctly, etc. BOOST_AUTO_TEST_CASE(testReusableTokenStream1) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testReusableTokenStream2) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa9}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// Non-arabic text gets treated in a similar way as SimpleAnalyzer. BOOST_AUTO_TEST_CASE(testEnglishInput) { checkAnalyzesTo(newLucene(LuceneVersion::LUCENE_CURRENT), L"English text.", newCollection(L"english", L"text")); } /// Test that custom stopwords work, and are not case-sensitive. BOOST_AUTO_TEST_CASE(testCustomStopwords) { Collection stopWords = newCollection(L"the", L"and", L"a"); ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, HashSet::newInstance(stopWords.begin(), stopWords.end())); checkAnalyzesTo(a, L"The quick brown fox.", newCollection(L"quick", L"brown", L"fox")); } BOOST_AUTO_TEST_SUITE_END() ArabicNormalizationFilterTest.cpp000066400000000000000000000113431217574114600346330ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/ar///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ArabicLetterTokenizer.h" #include "ArabicNormalizationFilter.h" #include "StringReader.h" using namespace Lucene; class ArabicNormalizationFilterFixture : public BaseTokenStreamFixture { public: virtual ~ArabicNormalizationFilterFixture() { } public: void check(const String& input, const String& expected) { ArabicLetterTokenizerPtr tokenStream = newLucene(newLucene(input)); ArabicNormalizationFilterPtr filter = newLucene(tokenStream); checkTokenStreamContents(filter, newCollection(expected)); } }; BOOST_FIXTURE_TEST_SUITE(ArabicNormalizationFilterTest, ArabicNormalizationFilterFixture) BOOST_AUTO_TEST_CASE(testAlifMadda) { const uint8_t first[] = {0xd8, 0xa2, 0xd8, 0xac, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xac, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testAlifHamzaAbove) { const uint8_t first[] = {0xd8, 0xa3, 0xd8, 0xad, 0xd9, 0x85, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xad, 0xd9, 0x85, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testAlifHamzaBelow) { const uint8_t first[] = {0xd8, 0xa5, 0xd8, 0xb9, 0xd8, 0xa7, 0xd8, 0xb0}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xb9, 0xd8, 0xa7, 0xd8, 0xb0}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testAlifMaksura) { const uint8_t first[] = {0xd8, 0xa8, 0xd9, 0x86, 0xd9, 0x89}; const uint8_t second[] = {0xd8, 0xa8, 0xd9, 0x86, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testTehMarbuta) { const uint8_t first[] = {0xd9, 0x81, 0xd8, 0xa7, 0xd8, 0xb7, 0xd9, 0x85, 0xd8, 0xa9}; const uint8_t second[] = {0xd9, 0x81, 0xd8, 0xa7, 0xd8, 0xb7, 0xd9, 0x85, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testTatweel) { const uint8_t first[] = {0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd9, 0x80, 0xd9, 0x80, 0xd9, 0x80, 0xd9, 0x80, 0xd9, 0x80, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xaa}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testFatha) { const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8e, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7}; const uint8_t second[] = {0xd9, 0x85, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testKasra) { const uint8_t first[] = {0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x90, 0xd9, 0x8a}; const uint8_t second[] = {0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testDamma) { const uint8_t first[] = {0xd8, 0xa8, 0xd9, 0x8f, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xaa}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testFathatan) { const uint8_t first[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x8b}; const uint8_t second[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd8, 0xa7}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testKasratan) { const uint8_t first[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x8d}; const uint8_t second[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testDammatan) { const uint8_t first[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x8c}; const uint8_t second[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testSukun) { const uint8_t first[] = {0xd9, 0x86, 0xd9, 0x84, 0xd9, 0x92, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x86}; const uint8_t second[] = {0xd9, 0x86, 0xd9, 0x84, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testShaddah) { const uint8_t first[] = {0xd9, 0x87, 0xd8, 0xaa, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x91}; const uint8_t second[] = {0xd9, 0x87, 0xd8, 0xaa, 0xd9, 0x85, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/ar/ArabicStemFilterTest.cpp000066400000000000000000000146131217574114600327770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ArabicStemFilter.h" #include "ArabicLetterTokenizer.h" #include "StringReader.h" using namespace Lucene; class ArabicStemFilterFixture : public BaseTokenStreamFixture { public: virtual ~ArabicStemFilterFixture() { } public: void check(const String& input, const String& expected) { ArabicLetterTokenizerPtr tokenStream = newLucene(newLucene(input)); ArabicStemFilterPtr filter = newLucene(tokenStream); checkTokenStreamContents(filter, newCollection(expected)); } }; BOOST_FIXTURE_TEST_SUITE(ArabicStemFilterTest, ArabicStemFilterFixture) BOOST_AUTO_TEST_CASE(testAlPrefix) { const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testWalPrefix) { const uint8_t first[] = {0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testBalPrefix) { const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testKalPrefix) { const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testFalPrefix) { const uint8_t first[] = {0xd9, 0x81, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testLlPrefix) { const uint8_t first[] = {0xd9, 0x84, 0xd9, 0x84, 0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xb1}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xb1}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testWaPrefix) { const uint8_t first[] = {0xd9, 0x88, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testAhSuffix) { const uint8_t first[] = {0xd8, 0xb2, 0xd9, 0x88, 0xd8, 0xac, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xb2, 0xd9, 0x88, 0xd8, 0xac}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testAnSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testAtSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testWnSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testYnSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testYhSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x87}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testYpSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xa9}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testHSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x87}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testPSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd8, 0xa9}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testYSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testComboPrefSuf) { const uint8_t first[] = {0xd9, 0x88, 0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testComboSuf) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x87, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testShouldntStem) { const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x88}; const uint8_t second[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x88}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testNonArabic) { check(L"English", L"English"); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/br/000077500000000000000000000000001217574114600262475ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/br/BrazilianStemmerTest.cpp000066400000000000000000000211341217574114600330640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "BrazilianAnalyzer.h" using namespace Lucene; class BrazilianStemmerFixture : public BaseTokenStreamFixture { public: virtual ~BrazilianStemmerFixture() { } public: void check(const String& input, const String& expected) { checkOneTerm(newLucene(LuceneVersion::LUCENE_CURRENT), input, expected); } void checkReuse(AnalyzerPtr a, const String& input, const String& expected) { checkOneTermReuse(a, input, expected); } }; BOOST_FIXTURE_TEST_SUITE(BrazilianStemmerTest, BrazilianStemmerFixture) /// Test the Brazilian Stem Filter, which only modifies the term text. /// It is very similar to the snowball Portuguese algorithm but not exactly the same. BOOST_AUTO_TEST_CASE(testWithSnowballExamples) { check(L"boa", L"boa"); check(L"boainain", L"boainain"); check(L"boas", L"boas"); const uint8_t boas[] = {0x62, 0xc3, 0xb4, 0x61, 0x73}; check(UTF8_TO_STRING(boas), L"boas"); // removes diacritic: different from snowball Portuguese check(L"boassu", L"boassu"); check(L"boataria", L"boat"); check(L"boate", L"boat"); check(L"boates", L"boat"); check(L"boatos", L"boat"); check(L"bob", L"bob"); check(L"boba", L"bob"); check(L"bobagem", L"bobag"); check(L"bobagens", L"bobagens"); const uint8_t bobalho[] = {0x62, 0x6f, 0x62, 0x61, 0x6c, 0x68, 0xc3, 0xb5, 0x65, 0x73}; check(UTF8_TO_STRING(bobalho), L"bobalho"); // removes diacritic: different from snowball Portuguese check(L"bobear", L"bob"); check(L"bobeira", L"bobeir"); check(L"bobinho", L"bobinh"); check(L"bobinhos", L"bobinh"); check(L"bobo", L"bob"); check(L"bobs", L"bobs"); check(L"boca", L"boc"); check(L"bocadas", L"boc"); check(L"bocadinho", L"bocadinh"); check(L"bocado", L"boc"); const uint8_t bocaiuv[] = {0x62, 0x6f, 0x63, 0x61, 0x69, 0xc3, 0xba, 0x76, 0x61}; check(UTF8_TO_STRING(bocaiuv), L"bocaiuv"); // removes diacritic: different from snowball Portuguese const uint8_t bocal[] = {0x62, 0x6f, 0xc3, 0xa7, 0x61, 0x6c}; check(UTF8_TO_STRING(bocal), L"bocal"); // removes diacritic: different from snowball Portuguese check(L"bocarra", L"bocarr"); check(L"bocas", L"boc"); check(L"bode", L"bod"); check(L"bodoque", L"bodoqu"); check(L"body", L"body"); check(L"boeing", L"boeing"); check(L"boem", L"boem"); check(L"boemia", L"boem"); const uint8_t boemi[] = {0x62, 0x6f, 0xc3, 0xaa, 0x6d, 0x69, 0x6f}; check(UTF8_TO_STRING(boemi), L"boemi"); // removes diacritic: different from snowball Portuguese const uint8_t bogot[] = {0x62, 0x6f, 0x67, 0x6f, 0x74, 0xc3, 0xa1}; check(UTF8_TO_STRING(bogot), L"bogot"); // removes diacritic: different from snowball Portuguese check(L"boi", L"boi"); const uint8_t boi[] = {0x62, 0xc3, 0xb3, 0x69, 0x61}; check(UTF8_TO_STRING(boi), L"boi"); // removes diacritic: different from snowball Portuguese check(L"boiando", L"boi"); check(L"quiabo", L"quiab"); check(L"quicaram", L"quic"); check(L"quickly", L"quickly"); check(L"quieto", L"quiet"); check(L"quietos", L"quiet"); check(L"quilate", L"quilat"); check(L"quilates", L"quilat"); check(L"quilinhos", L"quilinh"); check(L"quilo", L"quil"); check(L"quilombo", L"quilomb"); const uint8_t quilometricas[] = {0x71, 0x75, 0x69, 0x6c, 0x6f, 0x6d, 0xc3, 0xa9, 0x74, 0x72, 0x69, 0x63, 0x61, 0x73}; check(UTF8_TO_STRING(quilometricas), L"quilometr"); // removes diacritic: different from snowball Portuguese const uint8_t quilometricos[] = {0x71, 0x75, 0x69, 0x6c, 0x6f, 0x6d, 0xc3, 0xa9, 0x74, 0x72, 0x69, 0x63, 0x6f, 0x73}; check(UTF8_TO_STRING(quilometricos), L"quilometr"); // removes diacritic: different from snowball Portuguese const uint8_t quilometro[] = {0x71, 0x75, 0x69, 0x6c, 0xc3, 0xb4, 0x6d, 0x65, 0x74, 0x72, 0x6f}; check(UTF8_TO_STRING(quilometro), L"quilometr"); // removes diacritic: different from snowball Portuguese const uint8_t quilometros[] = {0x71, 0x75, 0x69, 0x6c, 0xc3, 0xb4, 0x6d, 0x65, 0x74, 0x72, 0x6f, 0x73}; check(UTF8_TO_STRING(quilometros), L"quilometr"); // removes diacritic: different from snowball Portuguese check(L"quilos", L"quil"); check(L"quimica", L"quimic"); check(L"quilos", L"quil"); check(L"quimica", L"quimic"); check(L"quimicas", L"quimic"); check(L"quimico", L"quimic"); check(L"quimicos", L"quimic"); check(L"quimioterapia", L"quimioterap"); const uint8_t quimioterap[] = {0x71, 0x75, 0x69, 0x6d, 0x69, 0x6f, 0x74, 0x65, 0x72, 0xc3, 0xa1, 0x70, 0x69, 0x63, 0x6f, 0x73}; check(UTF8_TO_STRING(quimioterap), L"quimioterap"); // removes diacritic: different from snowball Portuguese check(L"quimono", L"quimon"); check(L"quincas", L"quinc"); const uint8_t quinha[] = {0x71, 0x75, 0x69, 0x6e, 0x68, 0xc3, 0xa3, 0x6f}; check(UTF8_TO_STRING(quinha), L"quinha"); // removes diacritic: different from snowball Portuguese check(L"quinhentos", L"quinhent"); check(L"quinn", L"quinn"); check(L"quino", L"quin"); check(L"quinta", L"quint"); check(L"quintal", L"quintal"); check(L"quintana", L"quintan"); check(L"quintanilha", L"quintanilh"); const uint8_t quinta[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0xc3, 0xa3, 0x6f}; check(UTF8_TO_STRING(quinta), L"quinta"); // removes diacritic: different from snowball Portuguese const uint8_t quintessente[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0x65, 0x73, 0x73, 0xc3, 0xaa, 0x6e, 0x63, 0x69, 0x61}; check(UTF8_TO_STRING(quintessente), L"quintessente"); // removes diacritic: different from snowball Portuguese check(L"quintino", L"quintin"); check(L"quinto", L"quint"); check(L"quintos", L"quint"); check(L"quintuplicou", L"quintuplic"); check(L"quinze", L"quinz"); check(L"quinzena", L"quinzen"); check(L"quiosque", L"quiosqu"); } BOOST_AUTO_TEST_CASE(testNormalization) { check(L"Brasil", L"brasil"); // lowercase by default const uint8_t brasil[] = {0x42, 0x72, 0x61, 0x73, 0xc3, 0xad, 0x6c, 0x69, 0x61}; check(UTF8_TO_STRING(brasil), L"brasil"); // remove diacritics const uint8_t quimio5terapicos[] = {0x71, 0x75, 0x69, 0x6d, 0x69, 0x6f, 0x35, 0x74, 0x65, 0x72, 0xc3, 0xa1, 0x70, 0x69, 0x63, 0x6f, 0x73}; check(UTF8_TO_STRING(quimio5terapicos), L"quimio5terapicos"); // contains non-letter, diacritic will still be removed const uint8_t aa[] = {0xc3, 0xa1, 0xc3, 0xa1}; check(UTF8_TO_STRING(aa), UTF8_TO_STRING(aa)); // token is too short: diacritics are not removed const uint8_t aaa[] = {0xc3, 0xa1, 0xc3, 0xa1, 0xc3, 0xa1}; check(UTF8_TO_STRING(aaa), L"aaa"); // normally, diacritics are removed } BOOST_AUTO_TEST_CASE(testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"boa", L"boa"); checkReuse(a, L"boainain", L"boainain"); checkReuse(a, L"boas", L"boas"); const uint8_t boas[] = {0x62, 0xc3, 0xb4, 0x61, 0x73}; checkReuse(a, UTF8_TO_STRING(boas), L"boas"); // removes diacritic: different from snowball Portuguese } BOOST_AUTO_TEST_CASE(testStemExclusionTable) { BrazilianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); HashSet exclusions = HashSet::newInstance(); const uint8_t quintessencia[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0x65, 0x73, 0x73, 0xc3, 0xaa, 0x6e, 0x63, 0x69, 0x61}; exclusions.add(UTF8_TO_STRING(quintessencia)); a->setStemExclusionTable(exclusions); checkReuse(a, UTF8_TO_STRING(quintessencia), UTF8_TO_STRING(quintessencia)); // excluded words will be completely unchanged } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. BOOST_AUTO_TEST_CASE(testExclusionTableReuse) { BrazilianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t quintessencia[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0x65, 0x73, 0x73, 0xc3, 0xaa, 0x6e, 0x63, 0x69, 0x61}; checkReuse(a, UTF8_TO_STRING(quintessencia), L"quintessente"); HashSet exclusions = HashSet::newInstance(); exclusions.add(UTF8_TO_STRING(quintessencia)); a->setStemExclusionTable(exclusions); checkReuse(a, UTF8_TO_STRING(quintessencia), UTF8_TO_STRING(quintessencia)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/cjk/000077500000000000000000000000001217574114600264135ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/cjk/CJKTokenizerTest.cpp000066400000000000000000000472001217574114600322640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "CJKTokenizer.h" #include "CJKAnalyzer.h" using namespace Lucene; class CJKTokenizerFixture : public BaseTokenStreamFixture { public: virtual ~CJKTokenizerFixture() { } public: struct TestToken { TestToken(const String& termText = L"", int32_t start = 0, int32_t end = 0, int32_t type = 0) { this->termText = termText; this->start = start; this->end = end; this->type = CJKTokenizer::TOKEN_TYPE_NAMES[type]; } String termText; int32_t start; int32_t end; String type; }; void checkCJKToken(const String& str, Collection out_tokens) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); Collection terms = Collection::newInstance(out_tokens.size()); Collection startOffsets = Collection::newInstance(out_tokens.size()); Collection endOffsets = Collection::newInstance(out_tokens.size()); Collection types = Collection::newInstance(out_tokens.size()); for (int32_t i = 0; i < out_tokens.size(); ++i) { terms[i] = out_tokens[i].termText; startOffsets[i] = out_tokens[i].start; endOffsets[i] = out_tokens[i].end; types[i] = out_tokens[i].type; } checkAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, Collection()); } void checkCJKTokenReusable(AnalyzerPtr analyzer, const String& str, Collection out_tokens) { Collection terms = Collection::newInstance(out_tokens.size()); Collection startOffsets = Collection::newInstance(out_tokens.size()); Collection endOffsets = Collection::newInstance(out_tokens.size()); Collection types = Collection::newInstance(out_tokens.size()); for (int32_t i = 0; i < out_tokens.size(); ++i) { terms[i] = out_tokens[i].termText; startOffsets[i] = out_tokens[i].start; endOffsets[i] = out_tokens[i].end; types[i] = out_tokens[i].type; } checkAnalyzesToReuse(analyzer, str, terms, startOffsets, endOffsets, types, Collection()); } }; BOOST_FIXTURE_TEST_SUITE(CJKTokenizerTest, CJKTokenizerFixture) BOOST_AUTO_TEST_CASE(testJa1) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b, 0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad, 0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d, 0xe5, 0x8d, 0x81}; const uint8_t token1[] = {0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c}; const uint8_t token2[] = {0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89}; const uint8_t token3[] = {0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b}; const uint8_t token4[] = {0xe5, 0x9b, 0x9b, 0xe4, 0xba, 0x94}; const uint8_t token5[] = {0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad}; const uint8_t token6[] = {0xe5, 0x85, 0xad, 0xe4, 0xb8, 0x83}; const uint8_t token7[] = {0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab}; const uint8_t token8[] = {0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d}; const uint8_t token9[] = {0xe4, 0xb9, 0x9d, 0xe5, 0x8d, 0x81}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token5), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 7, 9, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token9), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } BOOST_AUTO_TEST_CASE(testJa2) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0x20, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b, 0x20, 0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad, 0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d, 0x20, 0xe5, 0x8d, 0x81}; const uint8_t token1[] = {0xe4, 0xb8, 0x80}; const uint8_t token2[] = {0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89}; const uint8_t token3[] = {0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b}; const uint8_t token4[] = {0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad}; const uint8_t token5[] = {0xe5, 0x85, 0xad, 0xe4, 0xb8, 0x83}; const uint8_t token6[] = {0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab}; const uint8_t token7[] = {0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d}; const uint8_t token8[] = {0xe5, 0x8d, 0x81}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token5), 7, 9, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 12, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } BOOST_AUTO_TEST_CASE(testC) { String str = L"abc defgh ijklmn opqrstu vwxy z"; Collection out_tokens = newCollection( TestToken(L"abc", 0, 3, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"defgh", 4, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"ijklmn", 10, 16, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"opqrstu", 17, 24, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"vwxy", 25, 29, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"z", 30, 31, CJKTokenizer::SINGLE_TOKEN_TYPE) ); checkCJKToken(str, out_tokens); } BOOST_AUTO_TEST_CASE(testMix) { const uint8_t str[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0x63, 0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; const uint8_t token1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t token2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; const uint8_t token3[] = {0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88}; const uint8_t token4[] = {0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a}; const uint8_t token6[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t token7[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t token8[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; const uint8_t token9[] = {0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"abc", 5, 8, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token9), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } BOOST_AUTO_TEST_CASE(testMix2) { const uint8_t str[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0xe3, 0x82, 0x93, 0x63, 0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; const uint8_t token1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t token2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; const uint8_t token3[] = {0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88}; const uint8_t token4[] = {0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a}; const uint8_t token6[] = {0xe3, 0x82, 0x93}; const uint8_t token8[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t token9[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t token10[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"ab", 5, 7, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 7, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"c", 8, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token9), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token10), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } BOOST_AUTO_TEST_CASE(testSingleChar) { const uint8_t str[] = {0xe4, 0xb8, 0x80}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(str), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } /// Full-width text is normalized to half-width BOOST_AUTO_TEST_CASE(testFullWidth) { const uint8_t str[] = {0xef, 0xbc, 0xb4, 0xef, 0xbd, 0x85, 0xef, 0xbd, 0x93, 0xef, 0xbd, 0x94, 0x20, 0xef, 0xbc, 0x91, 0xef, 0xbc, 0x92, 0xef, 0xbc, 0x93, 0xef, 0xbc, 0x94}; Collection out_tokens = newCollection( TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"1234", 5, 9, CJKTokenizer::SINGLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } /// Non-english text (not just CJK) is treated the same as CJK: C1C2 C2C3 BOOST_AUTO_TEST_CASE(testNonIdeographic) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0x20, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb1}; const uint8_t token1[] = {0xe4, 0xb8, 0x80}; const uint8_t token2[] = {0xd8, 0xb1, 0xd9, 0x88}; const uint8_t token3[] = {0xd9, 0x88, 0xd8, 0xa8}; const uint8_t token4[] = {0xd8, 0xa8, 0xd8, 0xb1}; const uint8_t token5[] = {0xd8, 0xb1, 0xd8, 0xaa}; const uint8_t token6[] = {0xd9, 0x85, 0xd9, 0x88}; const uint8_t token7[] = {0xd9, 0x88, 0xd9, 0x8a}; const uint8_t token8[] = {0xd9, 0x8a, 0xd8, 0xb1}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token5), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } /// Non-english text with non-letters (non-spacing marks,etc) is treated as C1C2 C2C3, /// except for words are split around non-letters. BOOST_AUTO_TEST_CASE(testNonIdeographicNonLetter) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0x20, 0xd8, 0xb1, 0xd9, 0x8f, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb1}; const uint8_t token1[] = {0xe4, 0xb8, 0x80}; const uint8_t token2[] = {0xd8, 0xb1}; const uint8_t token3[] = {0xd9, 0x88, 0xd8, 0xa8}; const uint8_t token4[] = {0xd8, 0xa8, 0xd8, 0xb1}; const uint8_t token5[] = {0xd8, 0xb1, 0xd8, 0xaa}; const uint8_t token6[] = {0xd9, 0x85, 0xd9, 0x88}; const uint8_t token7[] = {0xd9, 0x88, 0xd9, 0x8a}; const uint8_t token8[] = {0xd9, 0x8a, 0xd8, 0xb1}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 2, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token5), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } BOOST_AUTO_TEST_CASE(testTokenStream) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t token1[] = {0xe4, 0xb8, 0x80, 0xe4, 0xb8, 0x81, 0xe4, 0xb8, 0x82}; const uint8_t token2[] = {0xe4, 0xb8, 0x80, 0xe4, 0xb8, 0x81}; const uint8_t token3[] = {0xe4, 0xb8, 0x81, 0xe4, 0xb8, 0x82}; checkAnalyzesTo(analyzer, UTF8_TO_STRING(token1), newCollection(UTF8_TO_STRING(token2), UTF8_TO_STRING(token3))); } BOOST_AUTO_TEST_CASE(testReusableTokenStream) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0x63, 0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; const uint8_t firstToken1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t firstToken2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; const uint8_t firstToken3[] = {0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88}; const uint8_t firstToken4[] = {0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a}; const uint8_t firstToken6[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t firstToken7[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t firstToken8[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; const uint8_t firstToken9[] = {0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(firstToken1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"abc", 5, 8, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken9), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKTokenReusable(analyzer, UTF8_TO_STRING(first), out_tokens); const uint8_t second[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0xe3, 0x82, 0x93, 0x63, 0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; const uint8_t secondToken1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t secondToken2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; const uint8_t secondToken3[] = {0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88}; const uint8_t secondToken4[] = {0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a}; const uint8_t secondToken6[] = {0xe3, 0x82, 0x93}; const uint8_t secondToken8[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t secondToken9[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t secondToken10[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; Collection out_tokens2 = newCollection( TestToken(UTF8_TO_STRING(secondToken1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"ab", 5, 7, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken6), 7, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"c", 8, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken8), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken9), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken10), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKTokenReusable(analyzer, UTF8_TO_STRING(second), out_tokens2); } BOOST_AUTO_TEST_CASE(testFinalOffset) { const uint8_t token1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; checkCJKToken(UTF8_TO_STRING(token1), newCollection(TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE))); const uint8_t token2[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0x20, 0x20, 0x20}; checkCJKToken(UTF8_TO_STRING(token2), newCollection(TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE))); checkCJKToken(L"test", newCollection(TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE))); checkCJKToken(L"test ", newCollection(TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE))); const uint8_t token3[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0x74, 0x65, 0x73, 0x74}; checkCJKToken(UTF8_TO_STRING(token3), newCollection( TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"test", 2, 6, CJKTokenizer::SINGLE_TOKEN_TYPE))); const uint8_t token4[] = {0x74, 0x65, 0x73, 0x74, 0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0x20, 0x20, 0x20, 0x20}; checkCJKToken(UTF8_TO_STRING(token4), newCollection( TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token1), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE))); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/cn/000077500000000000000000000000001217574114600262445ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/cn/ChineseTokenizerTest.cpp000066400000000000000000000131411217574114600330610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ChineseTokenizer.h" #include "ChineseFilter.h" #include "ChineseAnalyzer.h" #include "StringReader.h" #include "OffsetAttribute.h" #include "WhitespaceTokenizer.h" using namespace Lucene; /// Analyzer that just uses ChineseTokenizer, not ChineseFilter. /// Convenience to show the behaviour of the tokenizer class JustChineseTokenizerAnalyzer : public Analyzer { public: virtual ~JustChineseTokenizerAnalyzer() { } public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(reader); } }; /// Analyzer that just uses ChineseFilter, not ChineseTokenizer. /// Convenience to show the behavior of the filter. class JustChineseFilterAnalyzer : public Analyzer { public: virtual ~JustChineseFilterAnalyzer() { } public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(newLucene(reader)); } }; BOOST_FIXTURE_TEST_SUITE(ChineseTokenizerTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testOtherLetterOffset) { const uint8_t token[] = {0x61, 0xe5, 0xa4, 0xa9, 0x62}; ChineseTokenizerPtr tokenizer = newLucene(newLucene(UTF8_TO_STRING(token))); int32_t correctStartOffset = 0; int32_t correctEndOffset = 1; OffsetAttributePtr offsetAtt = tokenizer->getAttribute(); while (tokenizer->incrementToken()) { BOOST_CHECK_EQUAL(correctStartOffset, offsetAtt->startOffset()); BOOST_CHECK_EQUAL(correctEndOffset, offsetAtt->endOffset()); ++correctStartOffset; ++correctEndOffset; } } BOOST_AUTO_TEST_CASE(testReusableTokenStream1) { AnalyzerPtr a = newLucene(); const uint8_t input[] = {0xe4, 0xb8, 0xad, 0xe5, 0x8d, 0x8e, 0xe4, 0xba, 0xba, 0xe6, 0xb0, 0x91, 0xe5, 0x85, 0xb1, 0xe5, 0x92, 0x8c, 0xe5, 0x9b, 0xbd}; const uint8_t token1[] = {0xe4, 0xb8, 0xad}; const uint8_t token2[] = {0xe5, 0x8d, 0x8e}; const uint8_t token3[] = {0xe4, 0xba, 0xba}; const uint8_t token4[] = {0xe6, 0xb0, 0x91}; const uint8_t token5[] = {0xe5, 0x85, 0xb1}; const uint8_t token6[] = {0xe5, 0x92, 0x8c}; const uint8_t token7[] = {0xe5, 0x9b, 0xbd}; checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4), UTF8_TO_STRING(token5), UTF8_TO_STRING(token6), UTF8_TO_STRING(token7) ), newCollection(0, 1, 2, 3, 4, 5, 6), newCollection(1, 2, 3, 4, 5, 6, 7)); } BOOST_AUTO_TEST_CASE(testReusableTokenStream2) { AnalyzerPtr a = newLucene(); const uint8_t input[] = {0xe5, 0x8c, 0x97, 0xe4, 0xba, 0xac, 0xe5, 0xb8, 0x82}; const uint8_t token1[] = {0xe5, 0x8c, 0x97}; const uint8_t token2[] = {0xe4, 0xba, 0xac}; const uint8_t token3[] = {0xe5, 0xb8, 0x82}; checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3) ), newCollection(0, 1, 2), newCollection(1, 2, 3)); } /// ChineseTokenizer tokenizes numbers as one token, but they are filtered by ChineseFilter BOOST_AUTO_TEST_CASE(testNumerics) { AnalyzerPtr justTokenizer = newLucene(); const uint8_t input[] = {0xe4, 0xb8, 0xad, 0x31, 0x32, 0x33, 0x34}; const uint8_t token1[] = {0xe4, 0xb8, 0xad}; checkAnalyzesTo(justTokenizer, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1), L"1234")); // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token. AnalyzerPtr a = newLucene(); checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1))); } /// ChineseTokenizer tokenizes english similar to SimpleAnalyzer. /// It will lowercase terms automatically. /// /// ChineseFilter has an english stopword list, it also removes any single character tokens. /// The stopword list is case-sensitive. BOOST_AUTO_TEST_CASE(testEnglish) { AnalyzerPtr chinese = newLucene(); checkAnalyzesTo(chinese, L"This is a Test. b c d", newCollection(L"test")); AnalyzerPtr justTokenizer = newLucene(); checkAnalyzesTo(justTokenizer, L"This is a Test. b c d", newCollection(L"this", L"is", L"a", L"test", L"b", L"c", L"d")); AnalyzerPtr justFilter = newLucene(); checkAnalyzesTo(justFilter, L"This is a Test. b c d", newCollection(L"This", L"Test.")); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/cz/000077500000000000000000000000001217574114600262605ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/cz/CzechAnalyzerTest.cpp000066400000000000000000000031231217574114600323650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "CzechAnalyzer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(CzechAnalyzerTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testStopWord) { CzechAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(analyzer, L"Pokud mluvime o volnem", newCollection(L"mluvime", L"volnem")); } BOOST_AUTO_TEST_CASE(testReusableTokenStream1) { CzechAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesToReuse(analyzer, L"Pokud mluvime o volnem", newCollection(L"mluvime", L"volnem")); } BOOST_AUTO_TEST_CASE(testReusableTokenStream2) { CzechAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = {0xc4, 0x8c, 0x65, 0x73, 0x6b, 0xc3, 0xa1, 0x20, 0x52, 0x65, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x6b, 0x61}; const uint8_t token1[] = {0xc4, 0x8d, 0x65, 0x73, 0x6b, 0xc3, 0xa1}; const uint8_t token2[] = {0x72, 0x65, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x6b, 0x61}; checkAnalyzesToReuse(analyzer, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1), UTF8_TO_STRING(token2))); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/de/000077500000000000000000000000001217574114600262345ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/de/GermanStemFilterTest.cpp000066400000000000000000000112371217574114600330140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "GermanAnalyzer.h" #include "WhitespaceTokenizer.h" using namespace Lucene; class GermanStemFilterFixture : public BaseTokenStreamFixture { public: virtual ~GermanStemFilterFixture() { } public: void check(const String& input, const String& expected) { checkOneTerm(newLucene(LuceneVersion::LUCENE_CURRENT), input, expected); } void checkReuse(AnalyzerPtr a, const String& input, const String& expected) { checkOneTermReuse(a, input, expected); } }; BOOST_FIXTURE_TEST_SUITE(GermanStemFilterTest, GermanStemFilterFixture) /// Test the German stemmer. The stemming algorithm is known to work less than perfect, as it doesn't /// use any word lists with exceptions. We also check some of the cases where the algorithm is wrong. BOOST_AUTO_TEST_CASE(testStemming) { const uint8_t haufig[] = {0x68, 0xc3, 0xa4, 0x75, 0x66, 0x69, 0x67}; check(UTF8_TO_STRING(haufig), L"haufig"); // German special characters are replaced const uint8_t abschliess1[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e}; check(UTF8_TO_STRING(abschliess1), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem const uint8_t abschliess2[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e, 0x64, 0x65, 0x72}; check(UTF8_TO_STRING(abschliess2), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem const uint8_t abschliess3[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e, 0x64, 0x65, 0x73}; check(UTF8_TO_STRING(abschliess3), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem const uint8_t abschliess4[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e, 0x64, 0x65, 0x6e}; check(UTF8_TO_STRING(abschliess4), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem check(L"Tisch", L"tisch"); check(L"Tische", L"tisch"); check(L"Tischen", L"tisch"); check(L"Haus", L"hau"); check(L"Hauses", L"hau"); const uint8_t hau1[] = {0x48, 0xc3, 0xa4, 0x75, 0x73, 0x65, 0x72}; check(UTF8_TO_STRING(hau1), L"hau"); const uint8_t hau2[] = {0x48, 0xc3, 0xa4, 0x75, 0x73, 0x65, 0x72, 0x6e}; check(UTF8_TO_STRING(hau2), L"hau"); // Here's a case where overstemming occurs, ie. a word is mapped to the same stem as unrelated words check(L"hauen", L"hau"); // Here's a case where understemming occurs, i.e. two related words are not mapped to the same stem. // This is the case with basically all irregular forms check(L"Drama", L"drama"); check(L"Dramen", L"dram"); const uint8_t ausmass[] = {0x41, 0x75, 0x73, 0x6d, 0x61, 0xc3, 0x9f}; check(UTF8_TO_STRING(ausmass), L"ausmass"); // Fake words to test if suffixes are cut off check(L"xxxxxe", L"xxxxx"); check(L"xxxxxs", L"xxxxx"); check(L"xxxxxn", L"xxxxx"); check(L"xxxxxt", L"xxxxx"); check(L"xxxxxem", L"xxxxx"); check(L"xxxxxet", L"xxxxx"); check(L"xxxxxnd", L"xxxxx"); // The suffixes are also removed when combined check(L"xxxxxetende", L"xxxxx"); // Words that are shorter than four charcters are not changed check(L"xxe", L"xxe"); // -em and -er are not removed from words shorter than five characters check(L"xxem", L"xxem"); check(L"xxer", L"xxer"); // -nd is not removed from words shorter than six characters check(L"xxxnd", L"xxxnd"); } BOOST_AUTO_TEST_CASE(testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"Tisch", L"tisch"); checkReuse(a, L"Tische", L"tisch"); checkReuse(a, L"Tischen", L"tisch"); } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. BOOST_AUTO_TEST_CASE(testExclusionTableReuse) { GermanAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"tischen", L"tisch"); HashSet exclusions = HashSet::newInstance(); exclusions.add(L"tischen"); a->setStemExclusionTable(exclusions); checkReuse(a, L"tischen", L"tischen"); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/el/000077500000000000000000000000001217574114600262445ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/el/GreekAnalyzerTest.cpp000066400000000000000000000273271217574114600323660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "GreekAnalyzer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(GreekAnalyzerTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testAnalyzer1) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x82 }; const uint8_t token1[] = {0xce, 0xbc, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1}; const uint8_t token3[] = {0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xb7}; const uint8_t token4[] = {0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token5[] = {0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb1}; const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd}; const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb7, 0xcf, 0x83}; const uint8_t token8[] = {0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x89, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x83}; // Verify the correct analysis of capitals and small accented letters checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4), UTF8_TO_STRING(token5), UTF8_TO_STRING(token6), UTF8_TO_STRING(token7), UTF8_TO_STRING(token8) )); } BOOST_AUTO_TEST_CASE(testAnalyzer2) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1, 0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf, 0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce, 0x95, 0xce, 0xa3 }; const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1}; const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83}; const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5, 0xcf, 0x83}; // Verify the correct analysis of small letters with diaeresis and the elimination of punctuation marks checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3) )); } BOOST_AUTO_TEST_CASE(testAnalyzer3) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95, 0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce, 0xbf, 0xce, 0xb9, 0x20, 0xce, 0xac, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9 }; const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83}; const uint8_t token2[] = {0xce, 0xb1, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token3[] = {0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token4[] = {0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9}; // Verify the correct analysis of capital accented letters and capital letters with diaeresis, // as well as the elimination of stop words checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4) )); } BOOST_AUTO_TEST_CASE(testReusableTokenStream1) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x82 }; const uint8_t token1[] = {0xce, 0xbc, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1}; const uint8_t token3[] = {0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xb7}; const uint8_t token4[] = {0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token5[] = {0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb1}; const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd}; const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb7, 0xcf, 0x83}; const uint8_t token8[] = {0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x89, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x83}; // Verify the correct analysis of capitals and small accented letters checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4), UTF8_TO_STRING(token5), UTF8_TO_STRING(token6), UTF8_TO_STRING(token7), UTF8_TO_STRING(token8) )); } BOOST_AUTO_TEST_CASE(testReusableTokenStream2) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1, 0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf, 0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce, 0x95, 0xce, 0xa3 }; const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1}; const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83}; const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5, 0xcf, 0x83}; // Verify the correct analysis of small letters with diaeresis and the elimination of punctuation marks checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3) )); } BOOST_AUTO_TEST_CASE(testReusableTokenStream3) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95, 0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce, 0xbf, 0xce, 0xb9, 0x20, 0xce, 0xac, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9 }; const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83}; const uint8_t token2[] = {0xce, 0xb1, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token3[] = {0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token4[] = {0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9}; // Verify the correct analysis of capital accented letters and capital letters with diaeresis, // as well as the elimination of stop words checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4) )); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/fa/000077500000000000000000000000001217574114600262325ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/fa/PersianAnalyzerTest.cpp000066400000000000000000001115511217574114600327110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "PersianAnalyzer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PersianAnalyzerTest, BaseTokenStreamFixture) /// These tests show how the combination of tokenization (breaking on zero-width /// non-joiner), normalization (such as treating arabic YEH and farsi YEH the /// same), and stopwords creates a light-stemming effect for verbs. /// /// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar /// active present indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs3) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active future indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs4) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present progressive indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs5) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite progressive indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs6) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active perfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs7) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective perfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs8) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs9) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs10) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs11) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs12) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs13) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs14) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs15) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs16) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs17) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive perfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs18) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective perfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs19) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs20) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs21) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive future indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs22) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present progressive indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs23) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite progressive indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbs24) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs25) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs26) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs27) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs28) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs29) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbs30) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// This test shows how the combination of tokenization and stopwords creates a /// light-stemming effect for verbs. /// /// In this case, these forms are presented with alternative orthography, using /// arabic yeh and whitespace. This yeh phenomenon is common for legacy text /// due to some previous bugs in Microsoft Windows. /// /// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar /// active present subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective3) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active future indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective4) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present progressive indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective5) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite progressive indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective6) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active perfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective7) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective perfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective8) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective9) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective10) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective11) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective12) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective13) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective14) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective15) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective16) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective17) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive perfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective18) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective perfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective19) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective20) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective21) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive future indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective22) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present progressive indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective23) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite progressive indicative BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective24) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective25) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective26) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective27) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective28) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective29) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present subjunctive BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective30) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// These tests show how the combination of tokenization (breaking on zero-width /// non-joiner or space) and stopwords creates a light-stemming effect for /// nouns, removing the plural -ha. BOOST_AUTO_TEST_CASE(testBehaviorNouns1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0x20, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testBehaviorNouns2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0xe2, 0x80, 0x8c, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// Test showing that non-Persian text is treated very much like SimpleAnalyzer (lowercased, etc) BOOST_AUTO_TEST_CASE(testBehaviorNonPersian) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(a, L"English test.", newCollection(L"english", L"test")); } BOOST_AUTO_TEST_CASE(testReusableTokenStream1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } BOOST_AUTO_TEST_CASE(testReusableTokenStream2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0xe2, 0x80, 0x8c, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// Test that custom stopwords work, and are not case-sensitive. BOOST_AUTO_TEST_CASE(testCustomStopwords) { Collection stopWords = newCollection(L"the", L"and", L"a"); PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, HashSet::newInstance(stopWords.begin(), stopWords.end())); checkAnalyzesTo(a, L"The quick brown fox.", newCollection(L"quick", L"brown", L"fox")); } BOOST_AUTO_TEST_SUITE_END() PersianNormalizationFilterTest.cpp000066400000000000000000000052761217574114600350470ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/fa///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ArabicLetterTokenizer.h" #include "PersianNormalizationFilter.h" #include "StringReader.h" using namespace Lucene; class PersianNormalizationFilterFixture : public BaseTokenStreamFixture { public: virtual ~PersianNormalizationFilterFixture() { } public: void check(const String& input, const String& expected) { ArabicLetterTokenizerPtr tokenStream = newLucene(newLucene(input)); PersianNormalizationFilterPtr filter = newLucene(tokenStream); checkTokenStreamContents(filter, newCollection(expected)); } }; BOOST_FIXTURE_TEST_SUITE(PersianNormalizationFilterTest, PersianNormalizationFilterFixture) BOOST_AUTO_TEST_CASE(testFarsiYeh) { const uint8_t first[] = {0xd9, 0x87, 0xd8, 0xa7, 0xdb, 0x8c}; const uint8_t second[] = {0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testYehBarree) { const uint8_t first[] = {0xd9, 0x87, 0xd8, 0xa7, 0xdb, 0x92}; const uint8_t second[] = {0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testKeheh) { const uint8_t first[] = {0xda, 0xa9, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x86}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testHehYeh) { const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xdb, 0x80}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testHehHamzaAbove) { const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x87, 0xd9, 0x94}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_CASE(testHehGoal) { const uint8_t first[] = {0xd8, 0xb2, 0xd8, 0xa7, 0xd8, 0xaf, 0xdb, 0x81}; const uint8_t second[] = {0xd8, 0xb2, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/fr/000077500000000000000000000000001217574114600262535ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/fr/ElisionTest.cpp000066400000000000000000000031731217574114600312250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "StandardTokenizer.h" #include "StringReader.h" #include "ElisionFilter.h" #include "TermAttribute.h" using namespace Lucene; class ElisionFixture : public BaseTokenStreamFixture { public: virtual ~ElisionFixture() { } public: Collection addTerms(TokenFilterPtr filter) { Collection terms = Collection::newInstance(); TermAttributePtr termAtt = filter->getAttribute(); while (filter->incrementToken()) terms.add(termAtt->term()); return terms; } }; BOOST_FIXTURE_TEST_SUITE(ElisionTest, ElisionFixture) BOOST_AUTO_TEST_CASE(testElision) { String test = L"Plop, juste pour voir l'embrouille avec O'brian. M'enfin."; TokenizerPtr tokenizer = newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(test)); HashSet articles = HashSet::newInstance(); articles.add(L"l"); articles.add(L"M"); TokenFilterPtr filter = newLucene(tokenizer, articles); Collection terms = addTerms(filter); BOOST_CHECK_EQUAL(L"embrouille", terms[4]); BOOST_CHECK_EQUAL(L"O'brian", terms[6]); BOOST_CHECK_EQUAL(L"enfin", terms[7]); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/fr/FrenchAnalyzerTest.cpp000066400000000000000000000070641217574114600325410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "FrenchAnalyzer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(FrenchAnalyzerTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testAnalyzer) { AnalyzerPtr fa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(fa, L"", Collection::newInstance()); checkAnalyzesTo(fa, L"chien chat cheval", newCollection(L"chien", L"chat", L"cheval")); checkAnalyzesTo(fa, L"chien CHAT CHEVAL", newCollection(L"chien", L"chat", L"cheval")); checkAnalyzesTo(fa, L" chien ,? + = - CHAT /: > CHEVAL", newCollection(L"chien", L"chat", L"cheval")); checkAnalyzesTo(fa, L"chien++", newCollection(L"chien")); checkAnalyzesTo(fa, L"mot \"entreguillemet\"", newCollection(L"mot", L"entreguillemet")); // let's do some French specific tests now /// I would expect this to stay one term as in French the minus sign is often used for composing words checkAnalyzesTo(fa, L"Jean-Fran\u00e7ois", newCollection(L"jean", L"fran\u00e7ois")); // stopwords checkAnalyzesTo(fa, L"le la chien les aux chat du des \u00e0 cheval", newCollection(L"chien", L"chat", L"cheval")); // some nouns and adjectives checkAnalyzesTo(fa, L"lances chismes habitable chiste \u00e9l\u00e9ments captifs", newCollection( L"lanc", L"chism", L"habit", L"chist", L"\u00e9l\u00e9ment", L"captif")); // some verbs checkAnalyzesTo(fa, L"finissions souffrirent rugissante", newCollection(L"fin", L"souffr", L"rug")); // aujourd'hui stays one term which is OK checkAnalyzesTo(fa, L"C3PO aujourd\'hui oeuf \u00ef\u00e2\u00f6\u00fb\u00e0\u00e4 anticonstitutionnellement Java++ ", newCollection(L"c3po", L"aujourd\'hui", L"oeuf", L"\u00ef\u00e2\u00f6\u00fb\u00e0\u00e4", L"anticonstitutionnel", L"jav")); // here 1940-1945 stays as one term, 1940:1945 not ? checkAnalyzesTo(fa, L"33Bis 1940-1945 1940:1945 (---i+++)*", newCollection(L"33bis", L"1940-1945", L"1940", L"1945", L"i")); } BOOST_AUTO_TEST_CASE(testReusableTokenStream) { AnalyzerPtr fa = newLucene(LuceneVersion::LUCENE_CURRENT); // stopwords checkAnalyzesToReuse(fa, L"le la chien les aux chat du des \u00e0 cheval", newCollection(L"chien", L"chat", L"cheval")); // some nouns and adjectives checkAnalyzesToReuse(fa, L"lances chismes habitable chiste \u00e9l\u00e9ments captifs", newCollection(L"lanc", L"chism", L"habit", L"chist", L"\u00e9l\u00e9ment", L"captif")); } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. BOOST_AUTO_TEST_CASE(testExclusionTableReuse) { FrenchAnalyzerPtr fa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesToReuse(fa, L"habitable", newCollection(L"habit")); HashSet exclusions = HashSet::newInstance(); exclusions.add(L"habitable"); fa->setStemExclusionTable(exclusions); checkAnalyzesToReuse(fa, L"habitable", newCollection(L"habitable")); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/nl/000077500000000000000000000000001217574114600262555ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/nl/DutchStemmerTest.cpp000066400000000000000000000121601217574114600322250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "DutchAnalyzer.h" #include "WhitespaceTokenizer.h" using namespace Lucene; class DutchStemmerFixture : public BaseTokenStreamFixture { public: virtual ~DutchStemmerFixture() { } public: void check(const String& input, const String& expected) { checkOneTerm(newLucene(LuceneVersion::LUCENE_CURRENT), input, expected); } void checkReuse(AnalyzerPtr a, const String& input, const String& expected) { checkOneTermReuse(a, input, expected); } }; /// Test the Dutch Stem Filter, which only modifies the term text. /// The code states that it uses the snowball algorithm, but tests reveal some differences. BOOST_FIXTURE_TEST_SUITE(DutchStemmerTest, DutchStemmerFixture) BOOST_AUTO_TEST_CASE(testWithSnowballExamples) { check(L"lichaamsziek", L"lichaamsziek"); check(L"lichamelijk", L"licham"); check(L"lichamelijke", L"licham"); check(L"lichamelijkheden", L"licham"); check(L"lichamen", L"licham"); check(L"lichere", L"licher"); check(L"licht", L"licht"); check(L"lichtbeeld", L"lichtbeeld"); check(L"lichtbruin", L"lichtbruin"); check(L"lichtdoorlatende", L"lichtdoorlat"); check(L"lichte", L"licht"); check(L"lichten", L"licht"); check(L"lichtende", L"lichtend"); check(L"lichtenvoorde", L"lichtenvoord"); check(L"lichter", L"lichter"); check(L"lichtere", L"lichter"); check(L"lichters", L"lichter"); check(L"lichtgevoeligheid", L"lichtgevoel"); check(L"lichtgewicht", L"lichtgewicht"); check(L"lichtgrijs", L"lichtgrijs"); check(L"lichthoeveelheid", L"lichthoevel"); check(L"lichtintensiteit", L"lichtintensiteit"); check(L"lichtje", L"lichtj"); check(L"lichtjes", L"lichtjes"); check(L"lichtkranten", L"lichtkrant"); check(L"lichtkring", L"lichtkring"); check(L"lichtkringen", L"lichtkring"); check(L"lichtregelsystemen", L"lichtregelsystem"); check(L"lichtste", L"lichtst"); check(L"lichtstromende", L"lichtstrom"); check(L"lichtte", L"licht"); check(L"lichtten", L"licht"); check(L"lichttoetreding", L"lichttoetred"); check(L"lichtverontreinigde", L"lichtverontreinigd"); check(L"lichtzinnige", L"lichtzinn"); check(L"lid", L"lid"); check(L"lidia", L"lidia"); check(L"lidmaatschap", L"lidmaatschap"); check(L"lidstaten", L"lidstat"); check(L"lidvereniging", L"lidveren"); check(L"opgingen", L"opging"); check(L"opglanzing", L"opglanz"); check(L"opglanzingen", L"opglanz"); check(L"opglimlachten", L"opglimlacht"); check(L"opglimpen", L"opglimp"); check(L"opglimpende", L"opglimp"); check(L"opglimping", L"opglimp"); check(L"opglimpingen", L"opglimp"); check(L"opgraven", L"opgrav"); check(L"opgrijnzen", L"opgrijnz"); check(L"opgrijzende", L"opgrijz"); check(L"opgroeien", L"opgroei"); check(L"opgroeiende", L"opgroei"); check(L"opgroeiplaats", L"opgroeiplat"); check(L"ophaal", L"ophal"); check(L"ophaaldienst", L"ophaaldienst"); check(L"ophaalkosten", L"ophaalkost"); check(L"ophaalsystemen", L"ophaalsystem"); check(L"ophaalt", L"ophaalt"); check(L"ophaaltruck", L"ophaaltruck"); check(L"ophalen", L"ophal"); check(L"ophalend", L"ophal"); check(L"ophalers", L"ophaler"); check(L"ophef", L"ophef"); check(L"opheffen", L"ophef"); // versus snowball 'opheff' check(L"opheffende", L"ophef"); // versus snowball 'opheff' check(L"opheffing", L"ophef"); // versus snowball 'opheff' check(L"opheldering", L"ophelder"); check(L"ophemelde", L"ophemeld"); check(L"ophemelen", L"ophemel"); check(L"opheusden", L"opheusd"); check(L"ophief", L"ophief"); check(L"ophield", L"ophield"); check(L"ophieven", L"ophiev"); check(L"ophoepelt", L"ophoepelt"); check(L"ophoog", L"ophog"); check(L"ophoogzand", L"ophoogzand"); check(L"ophopen", L"ophop"); check(L"ophoping", L"ophop"); check(L"ophouden", L"ophoud"); } BOOST_AUTO_TEST_CASE(testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"lichaamsziek", L"lichaamsziek"); checkReuse(a, L"lichamelijk", L"licham"); checkReuse(a, L"lichamelijke", L"licham"); checkReuse(a, L"lichamelijkheden", L"licham"); } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. BOOST_AUTO_TEST_CASE(testExclusionTableReuse) { DutchAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"lichamelijk", L"licham"); HashSet exclusions = HashSet::newInstance(); exclusions.add(L"lichamelijk"); a->setStemExclusionTable(exclusions); checkReuse(a, L"lichamelijk", L"lichamelijk"); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/reverse/000077500000000000000000000000001217574114600273175ustar00rootroot00000000000000ReverseStringFilterTest.cpp000066400000000000000000000045401217574114600345570ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/reverse///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ReverseStringFilter.h" #include "WhitespaceTokenizer.h" #include "StringReader.h" #include "TermAttribute.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(ReverseStringFilterTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testFilter) { TokenStreamPtr stream = newLucene(newLucene(L"Do have a nice day")); // 1-4 length string ReverseStringFilterPtr filter = newLucene(stream); TermAttributePtr text = filter->getAttribute(); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(L"oD", text->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(L"evah", text->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(L"a", text->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(L"ecin", text->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(L"yad", text->term()); BOOST_CHECK(!filter->incrementToken()); } BOOST_AUTO_TEST_CASE(testFilterWithMark) { TokenStreamPtr stream = newLucene(newLucene(L"Do have a nice day")); // 1-4 length string ReverseStringFilterPtr filter = newLucene(stream, (wchar_t)0x0001); TermAttributePtr text = filter->getAttribute(); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"oD", text->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"evah", text->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"a", text->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"ecin", text->term()); BOOST_CHECK(filter->incrementToken()); BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"yad", text->term()); BOOST_CHECK(!filter->incrementToken()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/ru/000077500000000000000000000000001217574114600262725ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/ru/RussianAnalyzerTest.cpp000066400000000000000000000130121217574114600327650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "TestUtils.h" #include "BaseTokenStreamFixture.h" #include "RussianAnalyzer.h" #include "RussianLetterTokenizer.h" #include "InputStreamReader.h" #include "FileReader.h" #include "TermAttribute.h" #include "StringReader.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(RussianAnalyzerTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testUnicode) { RussianAnalyzerPtr ra = newLucene(LuceneVersion::LUCENE_CURRENT); String testFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"testUTF8.txt")); InputStreamReaderPtr inWords = newLucene(newLucene(testFile)); String sampleFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"resUTF8.htm")); InputStreamReaderPtr sampleUnicode = newLucene(newLucene(sampleFile)); TokenStreamPtr in = ra->tokenStream(L"all", inWords); RussianLetterTokenizerPtr sample = newLucene(sampleUnicode); TermAttributePtr text = in->getAttribute(); TermAttributePtr sampleText = sample->getAttribute(); while (true) { if (!in->incrementToken()) break; sample->incrementToken(); BOOST_CHECK_EQUAL(text->term(), sampleText->term()); } inWords->close(); sampleUnicode->close(); } BOOST_AUTO_TEST_CASE(testDigitsInRussianCharset) { ReaderPtr reader = newLucene(L"text 1000"); RussianAnalyzerPtr ra = newLucene(LuceneVersion::LUCENE_CURRENT); TokenStreamPtr stream = ra->tokenStream(L"", reader); TermAttributePtr termText = stream->getAttribute(); BOOST_CHECK(stream->incrementToken()); BOOST_CHECK_EQUAL(L"text", termText->term()); BOOST_CHECK(stream->incrementToken()); BOOST_CHECK_EQUAL(L"1000", termText->term()); BOOST_CHECK(!stream->incrementToken()); } BOOST_AUTO_TEST_CASE(testReusableTokenStream1) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xd0, 0x92, 0xd0, 0xbc, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd0, 0xbc, 0x20, 0xd0, 0xbe, 0x20, 0xd1, 0x81, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xb5, 0x20, 0xd1, 0x8d, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xba, 0xd1, 0x82, 0xd1, 0x80, 0xd0, 0xbe, 0xd0, 0xbc, 0xd0, 0xb0, 0xd0, 0xb3, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x82, 0xd0, 0xbd, 0xd0, 0xbe, 0xd0, 0xb9, 0x20, 0xd1, 0x8d, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x80, 0xd0, 0xb3, 0xd0, 0xb8, 0xd0, 0xb8, 0x20, 0xd0, 0xb8, 0xd0, 0xbc, 0xd0, 0xb5, 0xd0, 0xbb, 0xd0, 0xb8, 0x20, 0xd0, 0xbf, 0xd1, 0x80, 0xd0, 0xb5, 0xd0, 0xb4, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb2, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xbd, 0xd0, 0xb8, 0xd0, 0xb5, 0x20, 0xd0, 0xb5, 0xd1, 0x89, 0xd0, 0xb5 }; const uint8_t token1[] = {0xd0, 0xb2, 0xd0, 0xbc, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82}; const uint8_t token2[] = {0xd1, 0x81, 0xd0, 0xb8, 0xd0, 0xbb}; const uint8_t token3[] = {0xd1, 0x8d, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xba, 0xd1, 0x82, 0xd1, 0x80, 0xd0, 0xbe, 0xd0, 0xbc, 0xd0, 0xb0, 0xd0, 0xb3, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x82, 0xd0, 0xbd}; const uint8_t token4[] = {0xd1, 0x8d, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x80, 0xd0, 0xb3}; const uint8_t token5[] = {0xd0, 0xb8, 0xd0, 0xbc, 0xd0, 0xb5, 0xd0, 0xbb}; const uint8_t token6[] = {0xd0, 0xbf, 0xd1, 0x80, 0xd0, 0xb5, 0xd0, 0xb4, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb2, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xbd}; checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4), UTF8_TO_STRING(token5), UTF8_TO_STRING(token6) )); } BOOST_AUTO_TEST_CASE(testReusableTokenStream2) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xd0, 0x9d, 0xd0, 0xbe, 0x20, 0xd0, 0xb7, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xbd, 0xd0, 0xb8, 0xd0, 0xb5, 0x20, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xbe, 0x20, 0xd1, 0x85, 0xd1, 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xbe, 0xd1, 0x81, 0xd1, 0x8c, 0x20, 0xd0, 0xb2, 0x20, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb9, 0xd0, 0xbd, 0xd0, 0xb5 }; const uint8_t token1[] = {0xd0, 0xb7, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xbd}; const uint8_t token2[] = {0xd1, 0x85, 0xd1, 0x80, 0xd0, 0xb0, 0xd0, 0xbd}; const uint8_t token3[] = {0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb9, 0xd0, 0xbd}; checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3) )); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/analyzers/common/analysis/ru/RussianStemTest.cpp000066400000000000000000000037271217574114600321240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "TestUtils.h" #include "BaseTokenStreamFixture.h" #include "RussianStemmer.h" #include "FileReader.h" #include "BufferedReader.h" #include "InputStreamReader.h" #include "FileUtils.h" using namespace Lucene; class RussianStemmerFixture : public BaseTokenStreamFixture { public: RussianStemmerFixture() { words = Collection::newInstance(); stems = Collection::newInstance(); String wordsFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"wordsUTF8.txt")); String stemsFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"stemsUTF8.txt")); BufferedReaderPtr inWords = newLucene(newLucene(newLucene(wordsFile))); String word; while (inWords->readLine(word)) words.add(word); inWords->close(); BufferedReaderPtr inStems = newLucene(newLucene(newLucene(stemsFile))); String stem; while (inStems->readLine(stem)) stems.add(stem); inStems->close(); } virtual ~RussianStemmerFixture() { } protected: Collection words; Collection stems; }; BOOST_FIXTURE_TEST_SUITE(RussianStemTest, RussianStemmerFixture) BOOST_AUTO_TEST_CASE(testStem) { BOOST_CHECK_EQUAL(words.size(), stems.size()); for (int32_t i = 0; i < words.size(); ++i) { String realStem = RussianStemmer::stemWord(words[i]); BOOST_CHECK_EQUAL(stems[i], realStem); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/highlighter/000077500000000000000000000000001217574114600230175ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/highlighter/HighlighterTest.cpp000066400000000000000000002751321217574114600266330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "BaseTokenStreamFixture.h" #include "Highlighter.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "StandardAnalyzer.h" #include "Document.h" #include "NumericField.h" #include "SimpleAnalyzer.h" #include "QueryParser.h" #include "TopDocs.h" #include "QueryScorer.h" #include "TokenStream.h" #include "SimpleFragmenter.h" #include "SimpleSpanFragmenter.h" #include "SimpleHTMLFormatter.h" #include "StringReader.h" #include "TokenSources.h" #include "MultiTermQuery.h" #include "WhitespaceAnalyzer.h" #include "TokenGroup.h" #include "NumericRangeQuery.h" #include "PhraseQuery.h" #include "MultiPhraseQuery.h" #include "SpanNearQuery.h" #include "SpanNotQuery.h" #include "SpanTermQuery.h" #include "QueryTermScorer.h" #include "WeightedSpanTerm.h" #include "WeightedTerm.h" #include "BooleanQuery.h" #include "WildcardQuery.h" #include "NullFragmenter.h" #include "TermRangeFilter.h" #include "LowerCaseTokenizer.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "TextFragment.h" #include "SimpleHTMLEncoder.h" #include "MultiSearcher.h" #include "ScoreDoc.h" #include "Term.h" #include "FilteredQuery.h" #include "Token.h" #include "TermQuery.h" using namespace Lucene; class HighlighterTestFixture; namespace HighlighterTest { class TestFormatter : public Formatter, public LuceneObject { public: TestFormatter(HighlighterTestFixture* fixture); virtual ~TestFormatter(); LUCENE_CLASS(TestFormatter); protected: HighlighterTestFixture* fixture; public: virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); }; } class HighlighterTestFixture : public BaseTokenStreamFixture { public: HighlighterTestFixture() { numHighlights = 0; analyzer = newLucene(TEST_VERSION); texts = newCollection( L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", L"JFK has been shot", L"John Kennedy has been shot", L"This text has a typo in referring to Keneddy", L"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", L"y z x y z a b", L"lets is a the lets is a the lets is a the lets" ); ramDir = newLucene(); IndexWriterPtr writer = newLucene(ramDir, newLucene(TEST_VERSION), true, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < texts.size(); ++i) addDoc(writer, texts[i]); DocumentPtr doc = newLucene(); NumericFieldPtr nfield = newLucene(NUMERIC_FIELD_NAME, Field::STORE_YES, true); nfield->setIntValue(1); doc->add(nfield); writer->addDocument(doc, analyzer); nfield = newLucene(NUMERIC_FIELD_NAME, Field::STORE_YES, true); nfield->setIntValue(3); doc = newLucene(); doc->add(nfield); writer->addDocument(doc, analyzer); nfield = newLucene(NUMERIC_FIELD_NAME, Field::STORE_YES, true); nfield->setIntValue(5); doc = newLucene(); doc->add(nfield); writer->addDocument(doc, analyzer); nfield = newLucene(NUMERIC_FIELD_NAME, Field::STORE_YES, true); nfield->setIntValue(7); doc = newLucene(); doc->add(nfield); writer->addDocument(doc, analyzer); writer->optimize(); writer->close(); reader = IndexReader::open(ramDir, true); dir = newLucene(); a = newLucene(); } virtual ~HighlighterTestFixture() { } public: IndexReaderPtr reader; QueryPtr query; RAMDirectoryPtr ramDir; IndexSearcherPtr searcher; public: int32_t numHighlights; AnalyzerPtr analyzer; TopDocsPtr hits; Collection texts; DirectoryPtr dir; AnalyzerPtr a; static const LuceneVersion::Version TEST_VERSION; static const String FIELD_NAME; static const String NUMERIC_FIELD_NAME; public: void addDoc(IndexWriterPtr writer, const String& text) { DocumentPtr doc = newLucene(); FieldPtr field = newLucene(FIELD_NAME, text, Field::STORE_YES, Field::INDEX_ANALYZED); doc->add(field); writer->addDocument(doc); } String highlightField(QueryPtr query, const String& fieldName, const String& text) { TokenStreamPtr tokenStream = newLucene(TEST_VERSION)->tokenStream(fieldName, newLucene(text)); // Assuming "", "" used to highlight SimpleHTMLFormatterPtr formatter = newLucene(); QueryScorerPtr scorer = newLucene(query, fieldName, FIELD_NAME); HighlighterPtr highlighter = newLucene(formatter, scorer); highlighter->setTextFragmenter(newLucene(INT_MAX)); String rv = highlighter->getBestFragments(tokenStream, text, 1, L"(FIELD TEXT TRUNCATED)"); return rv.empty() ? text : rv; } void doSearching(const String& queryString) { QueryParserPtr parser = newLucene(TEST_VERSION, FIELD_NAME, analyzer); parser->setEnablePositionIncrements(true); parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); query = parser->parse(queryString); doSearching(query); } void doSearching(QueryPtr unReWrittenQuery) { searcher = newLucene(ramDir, true); // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a rewritten query query = unReWrittenQuery->rewrite(reader); hits = searcher->search(query, FilterPtr(), 1000); } void checkExpectedHighlightCount(int32_t maxNumFragmentsRequired, int32_t expectedHighlights, Collection expected) { Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); BOOST_CHECK_EQUAL(numHighlights, expectedHighlights); } BOOST_CHECK_EQUAL(results.size(), expected.size()); for (int32_t i = 0; i < results.size(); ++i) BOOST_CHECK_EQUAL(results[i], expected[i]); } void makeIndex() { IndexWriterPtr writer = newLucene(dir, a, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc(L"t_text1", L"random words for highlighting tests del")); writer->addDocument(doc(L"t_text1", L"more random words for second field del")); writer->addDocument(doc(L"t_text1", L"random words for highlighting tests del")); writer->addDocument(doc(L"t_text1", L"more random words for second field")); writer->optimize(); writer->close(); } DocumentPtr doc(const String& f, const String& v) { DocumentPtr doc = newLucene(); doc->add(newLucene(f, v, Field::STORE_YES, Field::INDEX_ANALYZED)); return doc; } void deleteDocument() { IndexWriterPtr writer = newLucene(dir, a, false, IndexWriter::MaxFieldLengthLIMITED); writer->deleteDocuments(newLucene(L"t_text1", L"del")); writer->close(); } void searchIndex() { String q = L"t_text1:random"; QueryParserPtr parser = newLucene(TEST_VERSION, L"t_text1", a ); QueryPtr query = parser->parse(q); IndexSearcherPtr searcher = newLucene(dir, true); // This scorer can return negative idf -> null fragment HighlighterScorerPtr scorer = newLucene(query, searcher->getIndexReader(), L"t_text1"); HighlighterPtr h = newLucene(scorer); TopDocsPtr hits = searcher->search(query, FilterPtr(), 10); for (int32_t i = 0; i < hits->totalHits; ++i) { DocumentPtr doc = searcher->doc(hits->scoreDocs[i]->doc); String result = h->getBestFragment(a, L"t_text1", doc->get(L"t_text1")); BOOST_CHECK_EQUAL(L"more random words for second field", result); } searcher->close(); } }; const LuceneVersion::Version HighlighterTestFixture::TEST_VERSION = LuceneVersion::LUCENE_CURRENT; const String HighlighterTestFixture::FIELD_NAME = L"contents"; const String HighlighterTestFixture::NUMERIC_FIELD_NAME = L"nfield"; namespace HighlighterTest { TestFormatter::TestFormatter(HighlighterTestFixture* fixture) { this->fixture = fixture; } TestFormatter::~TestFormatter() { } String TestFormatter::highlightTerm(const String& originalText, TokenGroupPtr tokenGroup) { if (tokenGroup->getTotalScore() <= 0) return originalText; ++fixture->numHighlights; // update stats used in assertions return L"" + originalText + L""; } DECLARE_SHARED_PTR(TestHighlightRunner) class TestHighlightRunner : public LuceneObject { public: TestHighlightRunner(HighlighterTestFixture* fixture) { this->fixture = fixture; mode = QUERY; frag = newLucene(20); } virtual ~TestHighlightRunner() { } LUCENE_CLASS(TestHighlightRunner); protected: HighlighterTestFixture* fixture; static const int32_t QUERY; static const int32_t QUERY_TERM; public: int32_t mode; FragmenterPtr frag; public: virtual HighlighterPtr getHighlighter(QueryPtr query, const String& fieldName, TokenStreamPtr stream, FormatterPtr formatter) { return getHighlighter(query, fieldName, stream, formatter, true); } virtual HighlighterPtr getHighlighter(QueryPtr query, const String& fieldName, TokenStreamPtr stream, FormatterPtr formatter, bool expanMultiTerm) { HighlighterScorerPtr scorer; if (mode == QUERY) { scorer = newLucene(query, fieldName); if (!expanMultiTerm) boost::dynamic_pointer_cast(scorer)->setExpandMultiTermQuery(false); } else if (mode == QUERY_TERM) scorer = newLucene(query); else BOOST_FAIL("Unknown highlight mode"); return newLucene(formatter, scorer); } virtual HighlighterPtr getHighlighter(Collection weightedTerms, FormatterPtr formatter) { if (mode == QUERY) { Collection weightedSpanTerms = Collection::newInstance(weightedTerms.size()); for (int32_t i = 0; i < weightedTerms.size(); ++i) weightedSpanTerms[i] = boost::dynamic_pointer_cast(weightedTerms[i]); return newLucene(formatter, newLucene(weightedSpanTerms)); } else if (mode == QUERY_TERM) return newLucene(formatter, newLucene(weightedTerms)); else BOOST_FAIL("Unknown highlight mode"); return HighlighterPtr(); } virtual void doStandardHighlights(AnalyzerPtr analyzer, IndexSearcherPtr searcher, TopDocsPtr hits, QueryPtr query, FormatterPtr formatter, Collection expected, bool expandMT = false) { Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; HighlighterScorerPtr scorer; TokenStreamPtr tokenStream = analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); if (mode == QUERY) scorer = newLucene(query); else if (mode == QUERY_TERM) scorer = newLucene(query); HighlighterPtr highlighter = newLucene(formatter, scorer); highlighter->setTextFragmenter(frag); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } BOOST_CHECK_EQUAL(results.size(), expected.size()); for (int32_t i = 0; i < results.size(); ++i) BOOST_CHECK_EQUAL(results[i], expected[i]); } virtual void run(Collection expected) = 0; virtual void start(Collection expected = Collection()) { run(expected); mode = QUERY_TERM; run(expected); } }; const int32_t TestHighlightRunner::QUERY = 0; const int32_t TestHighlightRunner::QUERY_TERM = 1; } BOOST_FIXTURE_TEST_SUITE(HighlighterTest, HighlighterTestFixture) BOOST_AUTO_TEST_CASE(testQueryScorerHits) { AnalyzerPtr analyzer = newLucene(); QueryParserPtr qp = newLucene(TEST_VERSION, FIELD_NAME, analyzer); query = qp->parse(L"\"very long\""); searcher = newLucene(ramDir, true); TopDocsPtr hits = searcher->search(query, 10); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->scoreDocs.size(); ++i) { DocumentPtr doc = searcher->doc(hits->scoreDocs[i]->doc); String storedField = doc->get(FIELD_NAME); TokenStreamPtr stream = TokenSources::getAnyTokenStream(searcher->getIndexReader(), hits->scoreDocs[i]->doc, FIELD_NAME, doc, analyzer); FragmenterPtr fragmenter = newLucene(scorer); highlighter->setTextFragmenter(fragmenter); results.add(highlighter->getBestFragment(stream, storedField)); } BOOST_CHECK_EQUAL(results.size(), 2); BOOST_CHECK_EQUAL(results[0], L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); } BOOST_AUTO_TEST_CASE(testHighlightingWithDefaultField) { String s1 = L"I call our world Flatland, not because we call it so,"; QueryParserPtr parser = newLucene(TEST_VERSION, FIELD_NAME, newLucene(TEST_VERSION)); // Verify that a query against the default field results in text being highlighted regardless of the field name. QueryPtr q = parser->parse(L"\"world Flatland\"~3"); String expected = L"I call our world Flatland, not because we call it so,"; String observed = highlightField(q, L"SOME_FIELD_NAME", s1); BOOST_CHECK_EQUAL(expected, observed); // Verify that a query against a named field does not result in any ighlighting when the query field name differs // from the name of the field being highlighted, which in this example happens to be the default field name. q = parser->parse(L"text:\"world Flatland\"~3"); expected = s1; observed = highlightField(q, FIELD_NAME, s1); BOOST_CHECK_EQUAL(s1, highlightField(q, FIELD_NAME, s1)); } BOOST_AUTO_TEST_CASE(testSimpleSpanHighlighter) { doSearching(L"Kennedy"); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } BOOST_CHECK_EQUAL(results.size(), 3); BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy... to Kennedy"); BOOST_CHECK_EQUAL(results[2], L" kennedy has been shot"); } BOOST_AUTO_TEST_CASE(testRepeatingTermsInMultBooleans) { String content = L"x y z a b c d e f g b c g"; String ph1 = L"\"a b c d\""; String ph2 = L"\"b c g\""; String f1 = L"f1"; String f2 = L"f2"; String f1c = f1 + L":"; String f2c = f2 + L":"; String q = L"(" + f1c + ph1 + L" OR " + f2c + ph1 + L") AND (" + f1c + ph2 + L" OR " + f2c + ph2 + L")"; AnalyzerPtr analyzer = newLucene(); QueryParserPtr qp = newLucene(TEST_VERSION, f1, analyzer); QueryPtr query = qp->parse(q); QueryScorerPtr scorer = newLucene(query, f1); scorer->setExpandMultiTermQuery(false); HighlighterPtr h = newLucene(newLucene(this), scorer); h->getBestFragment(analyzer, f1, content); BOOST_CHECK_EQUAL(numHighlights, 7); } BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting) { doSearching(L"\"very long and contains\""); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } BOOST_CHECK_EQUAL(results.size(), 1); BOOST_CHECK_EQUAL(results[0], L"Hello this is a piece of text that is very long and contains too much preamble"); BOOST_CHECK_EQUAL(numHighlights, 3); numHighlights = 0; doSearching(L"\"This piece of text refers to Kennedy\""); maxNumFragmentsRequired = 2; scorer = newLucene(query, FIELD_NAME); highlighter = newLucene(newLucene(this), scorer); results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } BOOST_CHECK_EQUAL(results.size(), 1); BOOST_CHECK_EQUAL(results[0], L"This piece of text refers to Kennedy at the beginning then has a longer piece"); BOOST_CHECK_EQUAL(numHighlights, 4); numHighlights = 0; doSearching(L"\"lets is a the lets is a the lets is a the lets\""); maxNumFragmentsRequired = 2; scorer = newLucene(query, FIELD_NAME); highlighter = newLucene(newLucene(this), scorer); results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } BOOST_CHECK_EQUAL(results.size(), 1); BOOST_CHECK_EQUAL(results[0], L"lets is a the lets is a the lets is a the lets"); BOOST_CHECK_EQUAL(numHighlights, 4); } BOOST_AUTO_TEST_CASE(testSpanRegexQuery) { // todo } BOOST_AUTO_TEST_CASE(testRegexQuery) { // todo } BOOST_AUTO_TEST_CASE(testNumericRangeQuery) { // doesn't currently highlight, but make sure it doesn't cause exception either query = NumericRangeQuery::newIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true); searcher = newLucene(ramDir, true); hits = searcher->search(query, 100); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(NUMERIC_FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } BOOST_CHECK_EQUAL(results.size(), 2); BOOST_CHECK_EQUAL(results[0], L""); BOOST_CHECK_EQUAL(results[1], L""); BOOST_CHECK_EQUAL(numHighlights, 0); } BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting2) { doSearching(L"\"text piece long\"~5"); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(40)); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } BOOST_CHECK_EQUAL(results.size(), 2); BOOST_CHECK_EQUAL(results[0], L"Hello this is a piece of text that is very long and contains too much preamble"); BOOST_CHECK_EQUAL(results[1], L" at the beginning then has a longer piece of text that is very long in the middle"); BOOST_CHECK_EQUAL(numHighlights, 6); } BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting3) { doSearching(L"\"x y z\""); int32_t maxNumFragmentsRequired = 2; Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); BOOST_CHECK_EQUAL(numHighlights, 3); } BOOST_CHECK_EQUAL(results.size(), 1); BOOST_CHECK_EQUAL(results[0], L"y z x y z a b"); } BOOST_AUTO_TEST_CASE(testSimpleSpanFragmenter) { doSearching(L"\"piece of text that is very long\""); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(scorer, 5)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } BOOST_CHECK_EQUAL(results.size(), 2); BOOST_CHECK_EQUAL(results[0], L" this is a piece of text"); BOOST_CHECK_EQUAL(results[1], L" piece of text that is very long"); doSearching(L"\"been shot\""); maxNumFragmentsRequired = 2; scorer = newLucene(query, FIELD_NAME); highlighter = newLucene(newLucene(this), scorer); results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(scorer, 20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } BOOST_CHECK_EQUAL(numHighlights, 14); BOOST_CHECK_EQUAL(results.size(), 3); BOOST_CHECK_EQUAL(results[0], L"JFK has been shot"); BOOST_CHECK_EQUAL(results[1], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[2], L" kennedy has been shot"); } /// position sensitive query added after position insensitive query BOOST_AUTO_TEST_CASE(testPosTermStdTerm) { doSearching(L"y \"x y z\""); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); BOOST_CHECK_EQUAL(numHighlights, 4); } BOOST_CHECK_EQUAL(results.size(), 1); BOOST_CHECK_EQUAL(results[0], L"y z x y z a b"); } BOOST_AUTO_TEST_CASE(testQueryScorerMultiPhraseQueryHighlighting) { MultiPhraseQueryPtr mpq = newLucene(); mpq->add(newCollection(newLucene(FIELD_NAME, L"wordx"), newLucene(FIELD_NAME, L"wordb"))); mpq->add(newLucene(FIELD_NAME, L"wordy")); doSearching(mpq); int32_t maxNumFragmentsRequired = 2; Collection expected = newCollection(L"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc"); checkExpectedHighlightCount(maxNumFragmentsRequired, 6, expected); } BOOST_AUTO_TEST_CASE(testQueryScorerMultiPhraseQueryHighlightingWithGap) { MultiPhraseQueryPtr mpq = newLucene(); // The toString of MultiPhraseQuery doesn't work so well with these out-of-order additions, but the Query itself seems to match accurately. mpq->add(newCollection(newLucene(FIELD_NAME, L"wordz")), 2); mpq->add(newCollection(newLucene(FIELD_NAME, L"wordx")), 0); doSearching(mpq); int32_t maxNumFragmentsRequired = 1; int32_t expectedHighlights = 2; Collection expected = newCollection(L"wordx wordy wordz wordx wordy wordx"); checkExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights, expected); } namespace TestNearSpanSimpleQuery { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { mode = QUERY; doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); } }; } BOOST_AUTO_TEST_CASE(testNearSpanSimpleQuery) { doSearching(newLucene(newCollection( newLucene(newLucene(FIELD_NAME, L"beginning")), newLucene(newLucene(FIELD_NAME, L"kennedy"))), 3, false)); TestHighlightRunnerPtr helper = newLucene(this); Collection expected = newCollection(L" refers to Kennedy at the beginning"); helper->run(expected); BOOST_CHECK_EQUAL(numHighlights, 2); } BOOST_AUTO_TEST_CASE(testSimpleQueryTermScorerHighlighter) { doSearching(L"Kennedy"); HighlighterPtr highlighter = newLucene(newLucene(query)); highlighter->setTextFragmenter(newLucene(40)); int32_t maxNumFragmentsRequired = 2; Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } BOOST_CHECK_EQUAL(results.size(), 3); BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy... to Kennedy"); BOOST_CHECK_EQUAL(results[2], L" kennedy has been shot"); } namespace TestSpanHighlighting { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { mode = QUERY; doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); } }; } BOOST_AUTO_TEST_CASE(testSpanHighlighting) { QueryPtr query1 = newLucene(newCollection( newLucene(newLucene(FIELD_NAME, L"wordx")), newLucene(newLucene(FIELD_NAME, L"wordy"))), 1, false); QueryPtr query2 = newLucene(newCollection( newLucene(newLucene(FIELD_NAME, L"wordy")), newLucene(newLucene(FIELD_NAME, L"wordc"))), 1, false); BooleanQueryPtr bquery = newLucene(); bquery->add(query1, BooleanClause::SHOULD); bquery->add(query2, BooleanClause::SHOULD); doSearching(bquery); TestHighlightRunnerPtr helper = newLucene(this); Collection expected = newCollection(L"wordx wordy wordz wordx wordy wordx"); helper->run(expected); BOOST_CHECK_EQUAL(numHighlights, 7); } namespace TestNotSpanSimpleQuery { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { mode = QUERY; doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); } }; } BOOST_AUTO_TEST_CASE(testNotSpanSimpleQuery) { doSearching(newLucene(newLucene(newCollection( newLucene(newLucene(FIELD_NAME, L"shot")), newLucene(newLucene(FIELD_NAME, L"kennedy"))), 3, false), newLucene(newLucene(FIELD_NAME, L"john")))); TestHighlightRunnerPtr helper = newLucene(this); Collection expected = newCollection( L"John Kennedy has been shot", L" kennedy has been shot" ); helper->run(expected); BOOST_CHECK_EQUAL(numHighlights, 4); } namespace TestGetBestFragmentsSimpleQuery { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"Kennedy"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); BOOST_CHECK_EQUAL(fixture->numHighlights, 4); } }; } BOOST_AUTO_TEST_CASE(testGetBestFragmentsSimpleQuery) { TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetFuzzyFragments { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"Kinnedy~"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected, true); BOOST_CHECK_EQUAL(fixture->numHighlights, 5); } }; } BOOST_AUTO_TEST_CASE(testGetFuzzyFragments) { TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot", L" to Keneddy" ) ); } namespace TestGetWildCardFragments { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"K?nnedy"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); BOOST_CHECK_EQUAL(fixture->numHighlights, 4); } }; } BOOST_AUTO_TEST_CASE(testGetWildCardFragments) { TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetMidWildCardFragments { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"K*dy"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); BOOST_CHECK_EQUAL(fixture->numHighlights, 5); } }; } BOOST_AUTO_TEST_CASE(testGetMidWildCardFragments) { TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L" to Keneddy", L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetRangeFragments { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; String queryString = HighlighterTestFixture::FIELD_NAME + L":[kannedy TO kznnedy]"; // Need to explicitly set the QueryParser property to use TermRangeQuery rather than RangeFilters QueryParserPtr parser = newLucene(HighlighterTestFixture::TEST_VERSION, HighlighterTestFixture::FIELD_NAME, fixture->analyzer); parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); fixture->query = parser->parse(queryString); fixture->doSearching(fixture->query); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); BOOST_CHECK_EQUAL(fixture->numHighlights, 5); } }; } BOOST_AUTO_TEST_CASE(testGetRangeFragments) { TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L" to Keneddy", L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } BOOST_AUTO_TEST_CASE(testConstantScoreMultiTermQuery) { numHighlights = 0; query = newLucene(newLucene(FIELD_NAME, L"ken*")); boost::dynamic_pointer_cast(query)->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); searcher = newLucene(ramDir, true); // can't rewrite ConstantScore if you want to highlight it - it rewrites to ConstantScoreQuery which cannot be highlighted // query = unReWrittenQuery.rewrite(reader); hits = searcher->search(query, FilterPtr(), 1000); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } BOOST_CHECK_EQUAL(numHighlights, 5); BOOST_CHECK_EQUAL(results.size(), 4); BOOST_CHECK_EQUAL(results[0], L" kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L" refers to Kennedy... to Kennedy"); BOOST_CHECK_EQUAL(results[2], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[3], L" to Keneddy"); // try null field hits = searcher->search(query, FilterPtr(), 1000); numHighlights = 0; results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, L""); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } BOOST_CHECK_EQUAL(numHighlights, 5); BOOST_CHECK_EQUAL(results.size(), 4); BOOST_CHECK_EQUAL(results[0], L" kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L" refers to Kennedy... to Kennedy"); BOOST_CHECK_EQUAL(results[2], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[3], L" to Keneddy"); // try default field hits = searcher->search(query, FilterPtr(), 1000); numHighlights = 0; results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, L"random_field", FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } BOOST_CHECK_EQUAL(numHighlights, 5); BOOST_CHECK_EQUAL(results.size(), 4); BOOST_CHECK_EQUAL(results[0], L" kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L" refers to Kennedy... to Kennedy"); BOOST_CHECK_EQUAL(results[2], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[3], L" to Keneddy"); } namespace TestGetBestFragmentsPhrase { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"\"John Kennedy\""); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); // Currently highlights "John" and "Kennedy" separately BOOST_CHECK_EQUAL(fixture->numHighlights, 2); } }; } BOOST_AUTO_TEST_CASE(testGetBestFragmentsPhrase) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } namespace TestGetBestFragmentsQueryScorer { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; Collection clauses = newCollection( newLucene(newLucene(L"contents", L"john")), newLucene(newLucene(L"contents", L"kennedy")) ); SpanNearQueryPtr snq = newLucene(clauses, 1, true); fixture->doSearching(snq); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); // Currently highlights "John" and "Kennedy" separately BOOST_CHECK_EQUAL(fixture->numHighlights, 2); } }; } BOOST_AUTO_TEST_CASE(testGetBestFragmentsQueryScorer) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } namespace TestOffByOne { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { TermQueryPtr query = newLucene(newLucene(L"data", L"help")); HighlighterPtr hg = newLucene(newLucene(), newLucene(query)); hg->setTextFragmenter(newLucene()); String match = hg->getBestFragment(fixture->analyzer, L"data", L"help me [54-65]"); BOOST_CHECK_EQUAL(L"help me [54-65]", match); } }; } BOOST_AUTO_TEST_CASE(testOffByOne) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestGetBestFragmentsFilteredQuery { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; TermRangeFilterPtr rf = newLucene(L"contents", L"john", L"john", true, true); Collection clauses = newCollection( newLucene(newLucene(L"contents", L"john")), newLucene(newLucene(L"contents", L"kennedy")) ); SpanNearQueryPtr snq = newLucene(clauses, 1, true); FilteredQueryPtr fq = newLucene(snq, rf); fixture->doSearching(fq); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); // Currently highlights "John" and "Kennedy" separately BOOST_CHECK_EQUAL(fixture->numHighlights, 2); } }; } BOOST_AUTO_TEST_CASE(testGetBestFragmentsFilteredQuery) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } namespace TestGetBestFragmentsFilteredPhraseQuery { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; TermRangeFilterPtr rf = newLucene(L"contents", L"john", L"john", true, true); PhraseQueryPtr pq = newLucene(); pq->add(newLucene(L"contents", L"john")); pq->add(newLucene(L"contents", L"kennedy")); FilteredQueryPtr fq = newLucene(pq, rf); fixture->doSearching(fq); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); // Currently highlights "John" and "Kennedy" separately BOOST_CHECK_EQUAL(fixture->numHighlights, 2); } }; } BOOST_AUTO_TEST_CASE(testGetBestFragmentsFilteredPhraseQuery) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } namespace TestGetBestFragmentsMultiTerm { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"John Kenn*"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); BOOST_CHECK_EQUAL(fixture->numHighlights, 5); } }; } BOOST_AUTO_TEST_CASE(testGetBestFragmentsMultiTerm) { TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetBestFragmentsWithOr { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"JFK OR Kennedy"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); BOOST_CHECK_EQUAL(fixture->numHighlights, 5); } }; } BOOST_AUTO_TEST_CASE(testGetBestFragmentsWithOr) { TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"JFK has been shot", L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetBestSingleFragment { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->doSearching(L"Kennedy"); fixture->numHighlights = 0; Collection results = Collection::newInstance(); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragment(tokenStream, text)); } BOOST_CHECK_EQUAL(fixture->numHighlights, 4); BOOST_CHECK_EQUAL(results.size(), 3); BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy"); BOOST_CHECK_EQUAL(results[2], L" kennedy has been shot"); fixture->numHighlights = 0; results = Collection::newInstance(); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); results.add(highlighter->getBestFragment(fixture->analyzer, HighlighterTestFixture::FIELD_NAME, text)); } BOOST_CHECK_EQUAL(fixture->numHighlights, 4); BOOST_CHECK_EQUAL(results.size(), 3); BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); BOOST_CHECK_EQUAL(results[2], L" is really here which says kennedy has been shot"); fixture->numHighlights = 0; results = Collection::newInstance(); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); highlighter->setTextFragmenter(newLucene(40)); Collection result = highlighter->getBestFragments(fixture->analyzer, HighlighterTestFixture::FIELD_NAME, text, 10); results.addAll(result.begin(), result.end()); } BOOST_CHECK_EQUAL(fixture->numHighlights, 4); BOOST_CHECK_EQUAL(results.size(), 3); BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy"); BOOST_CHECK_EQUAL(results[2], L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"); } }; } BOOST_AUTO_TEST_CASE(testGetBestSingleFragment) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestGetBestSingleFragmentWithWeights { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { Collection wTerms = Collection::newInstance(2); wTerms[0] = newLucene(10.0, L"hello"); Collection positionSpans = newCollection(newLucene(0, 0)); boost::dynamic_pointer_cast(wTerms[0])->addPositionSpans(positionSpans); wTerms[1] = newLucene(1.0, L"kennedy"); positionSpans = newCollection(newLucene(14, 14)); boost::dynamic_pointer_cast(wTerms[1])->addPositionSpans(positionSpans); HighlighterPtr highlighter = getHighlighter(wTerms, newLucene(fixture)); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(fixture->texts[0])); highlighter->setTextFragmenter(newLucene(2)); String result = highlighter->getBestFragment(tokenStream, fixture->texts[0]); boost::trim(result); BOOST_CHECK_EQUAL(L"Hello", result); wTerms[1]->setWeight(50.0); tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(fixture->texts[0])); highlighter = getHighlighter(wTerms, newLucene(fixture)); highlighter->setTextFragmenter(newLucene(2)); result = highlighter->getBestFragment(tokenStream, fixture->texts[0]); boost::trim(result); BOOST_CHECK_EQUAL(L"kennedy", result); } }; } BOOST_AUTO_TEST_CASE(testGetBestSingleFragmentWithWeights) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestOverlapAnalyzer { class SynonymTokenizer : public TokenStream { public: SynonymTokenizer(TokenStreamPtr realStream, MapStringString synonyms) { this->realStream = realStream; this->synonyms = synonyms; this->synonymToken = 0; this->realTermAtt = realStream->addAttribute(); this->realPosIncrAtt = realStream->addAttribute(); this->realOffsetAtt = realStream->addAttribute(); this->termAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->offsetAtt = addAttribute(); } virtual ~SynonymTokenizer() { } protected: TokenStreamPtr realStream; TokenPtr currentRealToken; TokenPtr cRealToken; MapStringString synonyms; Collection synonymTokens; int32_t synonymToken; TermAttributePtr realTermAtt; PositionIncrementAttributePtr realPosIncrAtt; OffsetAttributePtr realOffsetAtt; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (!currentRealToken) { bool next = realStream->incrementToken(); if (!next) return false; clearAttributes(); termAtt->setTermBuffer(realTermAtt->term()); offsetAtt->setOffset(realOffsetAtt->startOffset(), realOffsetAtt->endOffset()); posIncrAtt->setPositionIncrement(realPosIncrAtt->getPositionIncrement()); if (!synonyms.contains(realTermAtt->term())) return true; String expansions = synonyms.get(realTermAtt->term()); synonymTokens = StringUtils::split(expansions, L","); synonymToken = 0; if (!synonymTokens.empty()) { currentRealToken = newLucene(realOffsetAtt->startOffset(), realOffsetAtt->endOffset()); currentRealToken->setTermBuffer(realTermAtt->term()); } return true; } else { String tok = synonymTokens[synonymToken++]; clearAttributes(); termAtt->setTermBuffer(tok); offsetAtt->setOffset(currentRealToken->startOffset(), currentRealToken->endOffset()); posIncrAtt->setPositionIncrement(0); if (synonymToken == synonymTokens.size()) { currentRealToken.reset(); synonymTokens.reset(); synonymToken = 0; } return true; } } }; class SynonymAnalyzer : public Analyzer { public: SynonymAnalyzer(MapStringString synonyms) { this->synonyms = synonyms; } virtual ~SynonymAnalyzer() { } protected: MapStringString synonyms; public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { LowerCaseTokenizerPtr stream = newLucene(reader); stream->addAttribute(); stream->addAttribute(); stream->addAttribute(); return newLucene(stream, synonyms); } }; class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { MapStringString synonyms = MapStringString::newInstance(); synonyms.put(L"football", L"soccer,footie"); AnalyzerPtr analyzer = newLucene(synonyms); String srchkey = L"football"; String s = L"football-soccer in the euro 2004 footie competition"; QueryParserPtr parser = newLucene(HighlighterTestFixture::TEST_VERSION, L"bookid", analyzer); QueryPtr query = parser->parse(srchkey); TokenStreamPtr tokenStream = analyzer->tokenStream(L"", newLucene(s)); HighlighterPtr highlighter = getHighlighter(query, L"", tokenStream, newLucene(fixture)); // Get 3 best fragments and separate with a "..." tokenStream = analyzer->tokenStream(L"", newLucene(s)); String result = highlighter->getBestFragments(tokenStream, s, 3, L"..."); String expectedResult = L"football-soccer in the euro 2004 footie competition"; BOOST_CHECK_EQUAL(expectedResult, result); } }; } /// tests a "complex" analyzer that produces multiple overlapping tokens BOOST_AUTO_TEST_CASE(testOverlapAnalyzer) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestGetSimpleHighlight { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"Kennedy"); Collection results = Collection::newInstance(); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); results.add(highlighter->getBestFragment(tokenStream, text)); } BOOST_CHECK_EQUAL(fixture->numHighlights, 4); BOOST_CHECK_EQUAL(results.size(), 3); BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); BOOST_CHECK_EQUAL(results[2], L" is really here which says kennedy has been shot"); } }; } BOOST_AUTO_TEST_CASE(testGetSimpleHighlight) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestGetTextFragments { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->doSearching(L"Kennedy"); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); highlighter->setTextFragmenter(newLucene(20)); Collection stringResults = highlighter->getBestFragments(tokenStream, text, 10); tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); Collection fragmentResults = highlighter->getBestTextFragments(tokenStream, text, true, 10); BOOST_CHECK_EQUAL(fragmentResults.size(), stringResults.size()); for (int32_t j = 0; j < stringResults.size(); ++j) BOOST_CHECK_EQUAL(fragmentResults[j]->toString(), stringResults[j]); } } }; } BOOST_AUTO_TEST_CASE(testGetTextFragments) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestMaxSizeHighlight { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"meat"); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(fixture->texts[0])); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); highlighter->setMaxDocCharsToAnalyze(30); highlighter->getBestFragment(tokenStream, fixture->texts[0]); BOOST_CHECK_EQUAL(fixture->numHighlights, 0); } }; } BOOST_AUTO_TEST_CASE(testMaxSizeHighlight) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestMaxSizeHighlightTruncates { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { String goodWord = L"goodtoken"; HashSet stopWords = HashSet::newInstance(); stopWords.add(L"stoppedtoken"); TermQueryPtr query = newLucene(newLucene(L"data", goodWord)); StringStream buffer; buffer << goodWord; for (int32_t i = 0; i < 10000; ++i) { // only one stopword buffer << L" " << *stopWords.begin(); } SimpleHTMLFormatterPtr fm = newLucene(); HighlighterPtr hg = getHighlighter(query, L"data", newLucene(HighlighterTestFixture::TEST_VERSION, stopWords)->tokenStream(L"data", newLucene(buffer.str())), fm); hg->setTextFragmenter(newLucene()); hg->setMaxDocCharsToAnalyze(100); String match = hg->getBestFragment(newLucene(HighlighterTestFixture::TEST_VERSION, stopWords), L"data", buffer.str()); BOOST_CHECK((int32_t)match.length() < hg->getMaxDocCharsToAnalyze()); // add another tokenized word to the overall length - but set way beyond the length of text under consideration // (after a large slug of stop words + whitespace) buffer << L" " << goodWord; match = hg->getBestFragment(newLucene(HighlighterTestFixture::TEST_VERSION, stopWords), L"data", buffer.str()); BOOST_CHECK((int32_t)match.length() < hg->getMaxDocCharsToAnalyze()); } }; } BOOST_AUTO_TEST_CASE(testMaxSizeHighlightTruncates) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestMaxSizeEndHighlight { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { HashSet stopWords = HashSet::newInstance(); stopWords.add(L"in"); stopWords.add(L"it"); TermQueryPtr query = newLucene(newLucene(L"text", L"searchterm")); String text = L"this is a text with searchterm in it"; SimpleHTMLFormatterPtr fm = newLucene(); HighlighterPtr hg = getHighlighter(query, L"text", newLucene(HighlighterTestFixture::TEST_VERSION, stopWords)->tokenStream(L"text", newLucene(text)), fm); hg->setTextFragmenter(newLucene()); hg->setMaxDocCharsToAnalyze(36); String match = hg->getBestFragment(newLucene(HighlighterTestFixture::TEST_VERSION, stopWords), L"text", text); BOOST_CHECK(boost::ends_with(match, L"in it")); } }; } BOOST_AUTO_TEST_CASE(testMaxSizeEndHighlight) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestUnRewrittenQuery { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; // test to show how rewritten query can still be used fixture->searcher = newLucene(fixture->ramDir, true); AnalyzerPtr analyzer = newLucene(HighlighterTestFixture::TEST_VERSION); QueryParserPtr parser = newLucene(HighlighterTestFixture::TEST_VERSION, HighlighterTestFixture::FIELD_NAME, analyzer); QueryPtr query = parser->parse(L"JF? or Kenned*"); TopDocsPtr hits = fixture->searcher->search(query, FilterPtr(), 1000); int32_t maxNumFragmentsRequired = 3; for (int32_t i = 0; i < hits->totalHits; ++i) { String text = fixture->searcher->doc(hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture), false); highlighter->setTextFragmenter(newLucene(40)); highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"..."); } // We expect to have zero highlights if the query is multi-terms and is not rewritten BOOST_CHECK_EQUAL(fixture->numHighlights, 0); } }; } BOOST_AUTO_TEST_CASE(testUnRewrittenQuery) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestNoFragments { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->doSearching(L"AnInvalidQueryWhichShouldYieldNoResults"); for (int32_t i = 0; i < fixture->texts.size(); ++i) { String text = fixture->texts[i]; TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); String result = highlighter->getBestFragment(tokenStream, text); BOOST_CHECK(result.empty()); } } }; } BOOST_AUTO_TEST_CASE(testNoFragments) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestEncoding { class NullScorer : public HighlighterScorer, public LuceneObject { public: virtual ~NullScorer() { } public: virtual void startFragment(TextFragmentPtr newFragment) { } virtual double getTokenScore() { return 0.0; } virtual double getFragmentScore() { return 1.0; } virtual TokenStreamPtr init(TokenStreamPtr tokenStream) { return TokenStreamPtr(); } }; } /// Demonstrates creation of an XHTML compliant doc using new encoding facilities. BOOST_AUTO_TEST_CASE(testEncoding) { String rawDocContent = L"\"Smith & sons' prices < 3 and >4\" claims article"; // run the highlighter on the raw content (scorer does not score any tokens for // highlighting but scores a single fragment for selection HighlighterPtr highlighter = newLucene(newLucene(this), newLucene(), newLucene()); highlighter->setTextFragmenter(newLucene(2000)); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(rawDocContent)); String encodedSnippet = highlighter->getBestFragments(tokenStream, rawDocContent, 1, L""); BOOST_CHECK_EQUAL(encodedSnippet, L""Smith & sons' prices < 3 and >4" claims article"); } BOOST_AUTO_TEST_CASE(testMultiSearcher) { // setup index 1 RAMDirectoryPtr ramDir1 = newLucene(); IndexWriterPtr writer1 = newLucene(ramDir1, newLucene(TEST_VERSION), true, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr d = newLucene(); FieldPtr f = newLucene(FIELD_NAME, L"multiOne", Field::STORE_YES, Field::INDEX_ANALYZED); d->add(f); writer1->addDocument(d); writer1->optimize(); writer1->close(); IndexReaderPtr reader1 = IndexReader::open(ramDir1, true); // setup index 2 RAMDirectoryPtr ramDir2 = newLucene(); IndexWriterPtr writer2 = newLucene(ramDir2, newLucene(TEST_VERSION), true, IndexWriter::MaxFieldLengthUNLIMITED); d = newLucene(); f = newLucene(FIELD_NAME, L"multiTwo", Field::STORE_YES, Field::INDEX_ANALYZED); d->add(f); writer2->addDocument(d); writer2->optimize(); writer2->close(); IndexReaderPtr reader2 = IndexReader::open(ramDir2, true); Collection searchers = newCollection( newLucene(ramDir1, true), newLucene(ramDir2, true) ); MultiSearcherPtr multiSearcher = newLucene(searchers); QueryParserPtr parser = newLucene(TEST_VERSION, FIELD_NAME, newLucene(TEST_VERSION)); parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); query = parser->parse(L"multi*"); // at this point the multisearcher calls combine(query[]) hits = multiSearcher->search(query, FilterPtr(), 1000); Collection expandedQueries = newCollection( query->rewrite(reader1), query->rewrite(reader2) ); query = query->combine(expandedQueries); // create an instance of the highlighter with the tags used to surround highlighted text HighlighterPtr highlighter = newLucene(newLucene(this), newLucene(query)); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = multiSearcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); String highlightedText = highlighter->getBestFragment(tokenStream, text); results.add(highlightedText); } BOOST_CHECK_EQUAL(results.size(), 2); BOOST_CHECK_EQUAL(results[0], L"multiOne"); BOOST_CHECK_EQUAL(results[1], L"multiTwo"); BOOST_CHECK_EQUAL(numHighlights, 2); } namespace TestFieldSpecificHighlighting { class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { String docMainText = L"fred is one of the people"; QueryParserPtr parser = newLucene(HighlighterTestFixture::TEST_VERSION, HighlighterTestFixture::FIELD_NAME, fixture->analyzer); QueryPtr query = parser->parse(L"fred category:people"); // highlighting respects fieldnames used in query HighlighterScorerPtr fieldSpecificScorer; if (mode == QUERY) fieldSpecificScorer = newLucene(query, HighlighterTestFixture::FIELD_NAME); else if (mode == QUERY_TERM) fieldSpecificScorer = newLucene(query, L"contents"); HighlighterPtr fieldSpecificHighlighter = newLucene(newLucene(), fieldSpecificScorer); fieldSpecificHighlighter->setTextFragmenter(newLucene()); String result = fieldSpecificHighlighter->getBestFragment(fixture->analyzer, HighlighterTestFixture::FIELD_NAME, docMainText); BOOST_CHECK_EQUAL(result, L"fred is one of the people"); // highlighting does not respect fieldnames used in query HighlighterScorerPtr fieldInSpecificScorer; if (mode == QUERY) fieldInSpecificScorer = newLucene(query, L""); else if (mode == QUERY_TERM) fieldInSpecificScorer = newLucene(query); HighlighterPtr fieldInSpecificHighlighter = newLucene(newLucene(), fieldInSpecificScorer); fieldInSpecificHighlighter->setTextFragmenter(newLucene()); result = fieldInSpecificHighlighter->getBestFragment(fixture->analyzer, HighlighterTestFixture::FIELD_NAME, docMainText); BOOST_CHECK_EQUAL(result, L"fred is one of the people"); fixture->reader->close(); } }; } BOOST_AUTO_TEST_CASE(testFieldSpecificHighlighting) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestOverlapAnalyzer2 { class TS2 : public TokenStream { public: TS2() { termAtt = addAttribute(); posIncrAtt = addAttribute(); offsetAtt = addAttribute(); lst = Collection::newInstance(); TokenPtr t = createToken(L"hi", 0, 2); t->setPositionIncrement(1); lst.add(t); t = createToken(L"hispeed", 0, 8); t->setPositionIncrement(1); lst.add(t); t = createToken(L"speed", 3, 8); t->setPositionIncrement(0); lst.add(t); t = createToken(L"10", 8, 10); t->setPositionIncrement(1); lst.add(t); t = createToken(L"foo", 11, 14); t->setPositionIncrement(1); lst.add(t); tokenPos = 0; } virtual ~TS2() { } protected: Collection lst; int32_t tokenPos; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (tokenPos < (int32_t)lst.size()) { TokenPtr token = lst[tokenPos++]; clearAttributes(); termAtt->setTermBuffer(token->term()); posIncrAtt->setPositionIncrement(token->getPositionIncrement()); offsetAtt->setOffset(token->startOffset(), token->endOffset()); return true; } return false; } protected: TokenPtr createToken(const String& term, int32_t start, int32_t offset) { TokenPtr token = newLucene(start, offset); token->setTermBuffer(term); return token; } }; /// same token-stream as above, but the bigger token comes first this time class TS2a : public TokenStream { public: TS2a() { termAtt = addAttribute(); posIncrAtt = addAttribute(); offsetAtt = addAttribute(); lst = Collection::newInstance(); TokenPtr t = createToken(L"hispeed", 0, 8); t->setPositionIncrement(1); lst.add(t); t = createToken(L"hi", 0, 2); t->setPositionIncrement(0); lst.add(t); t = createToken(L"speed", 3, 8); t->setPositionIncrement(1); lst.add(t); t = createToken(L"10", 8, 10); t->setPositionIncrement(1); lst.add(t); t = createToken(L"foo", 11, 14); t->setPositionIncrement(1); lst.add(t); tokenPos = 0; } virtual ~TS2a() { } protected: Collection lst; int32_t tokenPos; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (tokenPos < (int32_t)lst.size()) { TokenPtr token = lst[tokenPos++]; clearAttributes(); termAtt->setTermBuffer(token->term()); posIncrAtt->setPositionIncrement(token->getPositionIncrement()); offsetAtt->setOffset(token->startOffset(), token->endOffset()); return true; } return false; } protected: TokenPtr createToken(const String& term, int32_t start, int32_t offset) { TokenPtr token = newLucene(start, offset); token->setTermBuffer(term); return token; } }; class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { String s = L"Hi-Speed10 foo"; QueryPtr query; HighlighterPtr highlighter; String result; query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"foo"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"10"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hi"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"speed"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hispeed"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hi speed"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); // same tests, just put the bigger overlapping token first query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"foo"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"10"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hi"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"speed"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hispeed"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hi speed"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); } TokenStreamPtr getTS2() { return newLucene(); } TokenStreamPtr getTS2a() { return newLucene(); } }; } BOOST_AUTO_TEST_CASE(testOverlapAnalyzer2) { TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } BOOST_AUTO_TEST_CASE(testWeightedTermsWithDeletes) { makeIndex(); deleteDocument(); searchIndex(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/memory/000077500000000000000000000000001217574114600220315ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/memory/MemoryIndexTest.cpp000066400000000000000000000175651217574114600256530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "TestUtils.h" #include "BaseTokenStreamFixture.h" #include "BufferedReader.h" #include "FileReader.h" #include "StopAnalyzer.h" #include "SimpleAnalyzer.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "MemoryIndex.h" #include "IndexSearcher.h" #include "TermDocs.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "QueryParser.h" #include "TopDocs.h" #include "Random.h" #include "FileUtils.h" using namespace Lucene; /// Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour, /// returning the same results for queries on some randomish indexes. class MemoryIndexTestFixture : public BaseTokenStreamFixture { public: MemoryIndexTestFixture() { fileDir = FileUtils::joinPath(getTestDir(), L"memory"); queries = HashSet::newInstance(); HashSet test1 = readQueries(L"testqueries.txt"); queries.addAll(test1.begin(), test1.end()); HashSet test2 = readQueries(L"testqueries2.txt"); queries.addAll(test2.begin(), test2.end()); random = newLucene(123); buffer = CharArray::newInstance(20); /// Some terms to be indexed, in addition to random words. /// These terms are commonly used in the queries. TEST_TERMS = Collection::newInstance(); TEST_TERMS.add(L"term"); TEST_TERMS.add(L"tErm"); TEST_TERMS.add(L"TERM"); TEST_TERMS.add(L"telm"); TEST_TERMS.add(L"stop"); TEST_TERMS.add(L"drop"); TEST_TERMS.add(L"roll"); TEST_TERMS.add(L"phrase"); TEST_TERMS.add(L"a"); TEST_TERMS.add(L"c"); TEST_TERMS.add(L"bar"); TEST_TERMS.add(L"blar"); TEST_TERMS.add(L"gack"); TEST_TERMS.add(L"weltbank"); TEST_TERMS.add(L"worlbank"); TEST_TERMS.add(L"hello"); TEST_TERMS.add(L"on"); TEST_TERMS.add(L"the"); TEST_TERMS.add(L"apache"); TEST_TERMS.add(L"Apache"); TEST_TERMS.add(L"copyright"); TEST_TERMS.add(L"Copyright"); } virtual ~MemoryIndexTestFixture() { } protected: String fileDir; HashSet queries; RandomPtr random; CharArray buffer; static const int32_t ITERATIONS; Collection TEST_TERMS; public: /// read a set of queries from a resource file HashSet readQueries(const String& resource) { HashSet queries = HashSet::newInstance(); BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(fileDir, resource))); String line; while (reader->readLine(line)) { boost::trim(line); if (!line.empty() && !boost::starts_with(line, L"#") && !boost::starts_with(line, L"//")) queries.add(line); } reader->close(); return queries; } /// Build a randomish document for both RAMDirectory and MemoryIndex, and run all the queries against it. void checkAgainstRAMDirectory() { StringStream fooField; StringStream termField; // add up to 250 terms to field "foo" int32_t fieldCount = random->nextInt(250) + 1; for (int32_t i = 0; i < fieldCount; ++i) fooField << L" " << randomTerm(); // add up to 250 terms to field "foo" int32_t termCount = random->nextInt(250) + 1; for (int32_t i = 0; i < termCount; ++i) termField << L" " << randomTerm(); RAMDirectoryPtr ramdir = newLucene(); AnalyzerPtr analyzer = randomAnalyzer(); IndexWriterPtr writer = newLucene(ramdir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); FieldPtr field1 = newLucene(L"foo", fooField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); FieldPtr field2 = newLucene(L"term", termField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); doc->add(field1); doc->add(field2); writer->addDocument(doc); writer->close(); MemoryIndexPtr memory = newLucene(); memory->addField(L"foo", fooField.str(), analyzer); memory->addField(L"term", termField.str(), analyzer); checkAllQueries(memory, ramdir, analyzer); } void checkAllQueries(MemoryIndexPtr memory, RAMDirectoryPtr ramdir, AnalyzerPtr analyzer) { IndexSearcherPtr ram = newLucene(ramdir); IndexSearcherPtr mem = memory->createSearcher(); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"foo", analyzer); for (HashSet::iterator query = queries.begin(); query != queries.end(); ++query) { TopDocsPtr ramDocs = ram->search(qp->parse(*query), 1); TopDocsPtr memDocs = mem->search(qp->parse(*query), 1); BOOST_CHECK_EQUAL(ramDocs->totalHits, memDocs->totalHits); } } AnalyzerPtr randomAnalyzer() { switch (random->nextInt(3)) { case 0: return newLucene(); case 1: return newLucene(LuceneVersion::LUCENE_CURRENT); default: return newLucene(LuceneVersion::LUCENE_CURRENT); } } /// half of the time, returns a random term from TEST_TERMS. /// the other half of the time, returns a random unicode string. String randomTerm() { if (random->nextInt() % 2 == 1) { // return a random TEST_TERM return TEST_TERMS[random->nextInt(TEST_TERMS.size())]; } else { // return a random unicode term return randomString(); } } /// Return a random unicode term, like StressIndexingTest. String randomString() { int32_t end = random->nextInt(20); if (buffer.size() < 1 + end) buffer.resize((int32_t)((double)(1 + end) * 1.25)); for (int32_t i = 0; i < end; ++i) { int32_t t = random->nextInt(5); if (t == 0 && i < end - 1) { #ifdef LPP_UNICODE_CHAR_SIZE_2 // Make a surrogate pair // High surrogate buffer[i++] = (wchar_t)nextInt(0xd800, 0xdc00); // Low surrogate buffer[i] = (wchar_t)nextInt(0xdc00, 0xe000); #else buffer[i] = (wchar_t)nextInt(0xdc00, 0xe000); #endif } else if (t <= 1) buffer[i] = (wchar_t)nextInt(0x01, 0x80); else if (t == 2) buffer[i] = (wchar_t)nextInt(0x80, 0x800); else if (t == 3) buffer[i] = (wchar_t)nextInt(0x800, 0xd800); else if (t == 4) buffer[i] = (wchar_t)nextInt(0xe000, 0xfff0); } return String(buffer.get(), end); } /// start is inclusive and end is exclusive int32_t nextInt(int32_t start, int32_t end) { return start + random->nextInt(end - start); } }; const int32_t MemoryIndexTestFixture::ITERATIONS = 100; BOOST_FIXTURE_TEST_SUITE(MemoryIndexTest, MemoryIndexTestFixture) /// runs random tests, up to ITERATIONS times. BOOST_AUTO_TEST_CASE(testRandomQueries) { for (int32_t i = 0; i < ITERATIONS; ++i) checkAgainstRAMDirectory(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/contrib/snowball/000077500000000000000000000000001217574114600223425ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/contrib/snowball/SnowballTest.cpp000066400000000000000000000026371217574114600254770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "SnowballAnalyzer.h" #include "StopAnalyzer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SnowballTest, BaseTokenStreamFixture) BOOST_AUTO_TEST_CASE(testEnglish) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, L"english"); checkAnalyzesTo(a, L"he abhorred accents", newCollection(L"he", L"abhor", L"accent")); } BOOST_AUTO_TEST_CASE(testStopwords) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, L"english", StopAnalyzer::ENGLISH_STOP_WORDS_SET()); checkAnalyzesTo(a, L"the quick brown fox jumped", newCollection(L"quick", L"brown", L"fox", L"jump")); } BOOST_AUTO_TEST_CASE(testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, L"english"); checkAnalyzesToReuse(a, L"he abhorred accents", newCollection(L"he", L"abhor", L"accent")); checkAnalyzesToReuse(a, L"she abhorred him", newCollection(L"she", L"abhor", L"him")); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/document/000077500000000000000000000000001217574114600206775ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/document/BinaryDocumentTest.cpp000066400000000000000000000111441217574114600251670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Field.h" #include "Document.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "IndexReader.h" #include "CompressionTools.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BinaryDocumentTest, LuceneTestFixture) static String binaryValStored = L"this text will be stored as a byte array in the index"; static String binaryValCompressed = L"this text will be also stored and compressed as a byte array in the index"; BOOST_AUTO_TEST_CASE(testBinaryFieldInIndex) { ByteArray binaryStored = ByteArray::newInstance(binaryValStored.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryStored.get(), binaryValStored.c_str(), binaryValStored.length()); FieldablePtr binaryFldStored = newLucene(L"binaryStored", binaryStored, Field::STORE_YES); FieldablePtr stringFldStored = newLucene(L"stringStored", binaryValStored, Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_NO); // binary fields with store off are not allowed BOOST_CHECK_EXCEPTION(newLucene(L"fail", binaryStored, Field::STORE_NO), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); DocumentPtr doc = newLucene(); doc->add(binaryFldStored); doc->add(stringFldStored); // test for field count BOOST_CHECK_EQUAL(2, doc->getFields().size()); // add the doc to a ram index MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc); writer->close(); // open a reader and fetch the document IndexReaderPtr reader = IndexReader::open(dir, false); DocumentPtr docFromReader = reader->document(0); BOOST_CHECK(docFromReader); // fetch the binary stored field and compare it's content with the original one ByteArray storedTest = docFromReader->getBinaryValue(L"binaryStored"); String binaryFldStoredTest((wchar_t*)storedTest.get(), storedTest.size() / sizeof(wchar_t)); BOOST_CHECK_EQUAL(binaryFldStoredTest, binaryValStored); // fetch the string field and compare it's content with the original one String stringFldStoredTest = docFromReader->get(L"stringStored"); BOOST_CHECK_EQUAL(stringFldStoredTest, binaryValStored); // delete the document from index reader->deleteDocument(0); BOOST_CHECK_EQUAL(0, reader->numDocs()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testCompressionTools) { ByteArray binaryCompressed = ByteArray::newInstance(binaryValCompressed.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryCompressed.get(), binaryValCompressed.c_str(), binaryValCompressed.length()); FieldablePtr binaryFldCompressed = newLucene(L"binaryCompressed", CompressionTools::compress(binaryCompressed), Field::STORE_YES); FieldablePtr stringFldCompressed = newLucene(L"stringCompressed", CompressionTools::compressString(binaryValCompressed), Field::STORE_YES); DocumentPtr doc = newLucene(); doc->add(binaryFldCompressed); doc->add(stringFldCompressed); // add the doc to a ram index MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc); writer->close(); // open a reader and fetch the document IndexReaderPtr reader = IndexReader::open(dir, false); DocumentPtr docFromReader = reader->document(0); BOOST_CHECK(docFromReader); // fetch the binary compressed field and compare it's content with the original one ByteArray compressTest = CompressionTools::decompress(docFromReader->getBinaryValue(L"binaryCompressed")); String binaryFldCompressedTest((wchar_t*)compressTest.get(), compressTest.size() / sizeof(wchar_t)); BOOST_CHECK_EQUAL(binaryFldCompressedTest, binaryValCompressed); BOOST_CHECK_EQUAL(CompressionTools::decompressString(docFromReader->getBinaryValue(L"stringCompressed")), binaryValCompressed); reader->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/document/DateFieldTest.cpp000066400000000000000000000022271217574114600240670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "DateField.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(DateFieldTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testMinDate) { BOOST_CHECK_EQUAL(DateField::MIN_DATE_STRING(), L"000000000"); } BOOST_AUTO_TEST_CASE(testMaxDate) { BOOST_CHECK_EQUAL(DateField::MAX_DATE_STRING(), L"zzzzzzzzz"); } BOOST_AUTO_TEST_CASE(testDateToString) { BOOST_CHECK_EQUAL(DateField::dateToString(boost::posix_time::ptime(boost::gregorian::date(2010, boost::gregorian::Jan, 14))), L"0g4erxmo0"); } BOOST_AUTO_TEST_CASE(testTimeToString) { BOOST_CHECK_EQUAL(DateField::timeToString(1263427200000LL), L"0g4erxmo0"); } BOOST_AUTO_TEST_CASE(testStringToTime) { BOOST_CHECK_EQUAL(DateField::stringToTime(L"0g4erxmo0"), 1263427200000LL); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/document/DateToolsTest.cpp000066400000000000000000000242001217574114600241370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include #include "LuceneTestFixture.h" #include "DateTools.h" using namespace Lucene; using namespace boost::posix_time; using namespace boost::gregorian; BOOST_FIXTURE_TEST_SUITE(DateToolsTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testDateToString) { BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_YEAR), L"2010"); BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_MONTH), L"201001"); BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_DAY), L"20100114"); BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_HOUR), L"2010011403"); BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_MINUTE), L"201001140341"); BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_SECOND), L"20100114034105"); BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MILLISECOND), L"20100114034105123"); } BOOST_AUTO_TEST_CASE(testTimeToString) { BOOST_CHECK_EQUAL(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_YEAR), L"2010"); BOOST_CHECK_EQUAL(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_MONTH), L"201001"); BOOST_CHECK_EQUAL(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_DAY), L"20100114"); BOOST_CHECK_EQUAL(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_HOUR), L"2010011403"); BOOST_CHECK_EQUAL(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_MINUTE), L"201001140341"); BOOST_CHECK_EQUAL(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_SECOND), L"20100114034105"); BOOST_CHECK_EQUAL(DateTools::timeToString(1263440465123LL, DateTools::RESOLUTION_MILLISECOND), L"20100114034105123"); } BOOST_AUTO_TEST_CASE(testStringToTime) { BOOST_CHECK_EQUAL(DateTools::stringToTime(L"2010"), 1262304000000LL); BOOST_CHECK_EQUAL(DateTools::stringToTime(L"201001"), 1262304000000LL); BOOST_CHECK_EQUAL(DateTools::stringToTime(L"20100114"), 1263427200000LL); BOOST_CHECK_EQUAL(DateTools::stringToTime(L"2010011403"), 1263438000000LL); BOOST_CHECK_EQUAL(DateTools::stringToTime(L"201001140341"), 1263440460000LL); BOOST_CHECK_EQUAL(DateTools::stringToTime(L"20100114034105"), 1263440465000LL); BOOST_CHECK_EQUAL(DateTools::stringToTime(L"20100114034105123"), 1263440465123LL); } BOOST_AUTO_TEST_CASE(testDateRound) { BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_YEAR), ptime(date(2010, Jan, 1))); BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MONTH), ptime(date(2010, Feb, 1))); BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_DAY), ptime(date(2010, Feb, 16))); BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_HOUR), ptime(date(2010, Feb, 16), hours(3))); BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MINUTE), ptime(date(2010, Feb, 16), hours(3) + minutes(41))); BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_SECOND), ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5))); BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MILLISECOND), ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123))); } BOOST_AUTO_TEST_CASE(testParseDateGB) { DateTools::setDateOrder(DateTools::DATEORDER_DMY); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01122005"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"011205"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/12/2005"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/12/05"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/12/2005"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/12/05"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/1/05"), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/Jan/05"), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/Jan/05"), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/Jan/2005"), ptime(date(2005, 01, 01))); } BOOST_AUTO_TEST_CASE(testParseDateUS) { DateTools::setDateOrder(DateTools::DATEORDER_MDY); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12012005"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"120105"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/01/2005"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/01/05"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/1/2005"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/1/05"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/1/05"), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/1/05"), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/01/05"), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/01/2005"), ptime(date(2005, 01, 01))); } BOOST_AUTO_TEST_CASE(testParseDateLocale) { bool hasThisLocale = false; try { std::locale("en_GB.UTF-8"); hasThisLocale = true; } catch (...) { } if (hasThisLocale) { DateTools::setDateOrder(DateTools::DATEORDER_LOCALE); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01122005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"011205", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/12/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/12/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/12/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/12/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/1/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/Jan/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/Jan/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/Jan/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); } try { std::locale("en_US.UTF-8"); hasThisLocale = true; } catch (...) { hasThisLocale = false; } if (hasThisLocale) { DateTools::setDateOrder(DateTools::DATEORDER_LOCALE); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12012005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"120105", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/01/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/01/05", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/1/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/01/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/01/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); } } BOOST_AUTO_TEST_CASE(testParseDateSeparator) { DateTools::setDateOrder(DateTools::DATEORDER_DMY); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01122005"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"011205"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01-12-2005"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01 12 05"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1.12.2005"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1.12.05"), ptime(date(2005, 12, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1 1 05"), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"1 Jan 05"), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01-Jan-05"), ptime(date(2005, 01, 01))); BOOST_CHECK_EQUAL(DateTools::parseDate(L"01,Jan,2005"), ptime(date(2005, 01, 01))); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/document/DocumentTest.cpp000066400000000000000000000242001217574114600240170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Document.h" #include "Field.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "Term.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(DocumentTest, LuceneTestFixture) static String binaryVal = L"this text will be stored as a byte array in the index"; static String binaryVal2 = L"this text will be also stored as a byte array in the index"; static DocumentPtr makeDocumentWithFields() { DocumentPtr doc = newLucene(); doc->add(newLucene(L"keyword", L"test1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"keyword", L"test2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"text", L"test1", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"text", L"test2", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"unindexed", L"test1", Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"unindexed", L"test2", Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"unstored", L"test1", Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"unstored", L"test2", Field::STORE_NO, Field::INDEX_ANALYZED)); return doc; } static void checkDocument(DocumentPtr doc, bool fromIndex) { Collection keywordFieldValues = doc->getValues(L"keyword"); Collection textFieldValues = doc->getValues(L"text"); Collection unindexedFieldValues = doc->getValues(L"unindexed"); Collection unstoredFieldValues = doc->getValues(L"unstored"); BOOST_CHECK_EQUAL(keywordFieldValues.size(), 2); BOOST_CHECK_EQUAL(textFieldValues.size(), 2); BOOST_CHECK_EQUAL(unindexedFieldValues.size(), 2); // this test cannot work for documents retrieved from the index since unstored fields will obviously not be returned if (!fromIndex) BOOST_CHECK_EQUAL(unstoredFieldValues.size(), 2); BOOST_CHECK_EQUAL(keywordFieldValues[0], L"test1"); BOOST_CHECK_EQUAL(keywordFieldValues[1], L"test2"); BOOST_CHECK_EQUAL(textFieldValues[0], L"test1"); BOOST_CHECK_EQUAL(textFieldValues[1], L"test2"); BOOST_CHECK_EQUAL(unindexedFieldValues[0], L"test1"); BOOST_CHECK_EQUAL(unindexedFieldValues[1], L"test2"); // this test cannot work for documents retrieved from the index since unstored fields will obviously not be returned if (!fromIndex) { BOOST_CHECK_EQUAL(unstoredFieldValues[0], L"test1"); BOOST_CHECK_EQUAL(unstoredFieldValues[1], L"test2"); } } BOOST_AUTO_TEST_CASE(testBinaryField) { DocumentPtr doc = newLucene(); FieldablePtr stringFld = newLucene(L"string", binaryVal, Field::STORE_YES, Field::INDEX_NO); ByteArray binaryBytes1 = ByteArray::newInstance(binaryVal.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryBytes1.get(), binaryVal.c_str(), binaryVal.length()); FieldablePtr binaryFld = newLucene(L"binary", binaryBytes1, Field::STORE_YES); ByteArray binaryBytes2 = ByteArray::newInstance(binaryVal2.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryBytes2.get(), binaryVal2.c_str(), binaryVal2.length()); FieldablePtr binaryFld2 = newLucene(L"binary", binaryBytes2, Field::STORE_YES); doc->add(stringFld); doc->add(binaryFld); BOOST_CHECK_EQUAL(2, doc->getFields().size()); BOOST_CHECK(binaryFld->isBinary()); BOOST_CHECK(binaryFld->isStored()); BOOST_CHECK(!binaryFld->isIndexed()); BOOST_CHECK(!binaryFld->isTokenized()); ByteArray bytesTest = doc->getBinaryValue(L"binary"); String binaryTest((wchar_t*)bytesTest.get(), bytesTest.size() / sizeof(wchar_t)); BOOST_CHECK_EQUAL(binaryTest, binaryVal); String stringTest = doc->get(L"string"); BOOST_CHECK_EQUAL(binaryTest, stringTest); doc->add(binaryFld2); BOOST_CHECK_EQUAL(3, doc->getFields().size()); Collection binaryTests = doc->getBinaryValues(L"binary"); BOOST_CHECK_EQUAL(2, binaryTests.size()); bytesTest = binaryTests[0]; binaryTest = String((wchar_t*)bytesTest.get(), bytesTest.size() / sizeof(wchar_t)); ByteArray bytesTest2 = binaryTests[1]; String binaryTest2((wchar_t*)bytesTest2.get(), bytesTest2.size() / sizeof(wchar_t)); BOOST_CHECK_NE(binaryTest, binaryTest2); BOOST_CHECK_EQUAL(binaryTest, binaryVal); BOOST_CHECK_EQUAL(binaryTest2, binaryVal2); doc->removeField(L"string"); BOOST_CHECK_EQUAL(2, doc->getFields().size()); doc->removeFields(L"binary"); BOOST_CHECK_EQUAL(0, doc->getFields().size()); } /// Tests {@link Document#removeField(String)} method for a brand new Document that has not been indexed yet. BOOST_AUTO_TEST_CASE(testRemoveForNewDocument) { DocumentPtr doc = makeDocumentWithFields(); BOOST_CHECK_EQUAL(8, doc->getFields().size()); doc->removeFields(L"keyword"); BOOST_CHECK_EQUAL(6, doc->getFields().size()); doc->removeFields(L"doesnotexists"); // removing non-existing fields is silently ignored doc->removeFields(L"keyword"); // removing a field more than once BOOST_CHECK_EQUAL(6, doc->getFields().size()); doc->removeField(L"text"); BOOST_CHECK_EQUAL(5, doc->getFields().size()); doc->removeField(L"text"); BOOST_CHECK_EQUAL(4, doc->getFields().size()); doc->removeField(L"text"); BOOST_CHECK_EQUAL(4, doc->getFields().size()); doc->removeField(L"doesnotexists"); // removing non-existing fields is silently ignored BOOST_CHECK_EQUAL(4, doc->getFields().size()); doc->removeFields(L"unindexed"); BOOST_CHECK_EQUAL(2, doc->getFields().size()); doc->removeFields(L"unstored"); BOOST_CHECK_EQUAL(0, doc->getFields().size()); doc->removeFields(L"doesnotexists"); // removing non-existing fields is silently ignored BOOST_CHECK_EQUAL(0, doc->getFields().size()); } BOOST_AUTO_TEST_CASE(testConstructorExceptions) { newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO); // ok newLucene(L"name", L"value", Field::STORE_NO, Field::INDEX_NOT_ANALYZED); // ok BOOST_CHECK_EXCEPTION(newLucene(L"name", L"value", Field::STORE_NO, Field::INDEX_NO), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_NO); // ok BOOST_CHECK_EXCEPTION(newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_YES), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); } /// Tests {@link Document#getValues(String)} method for a brand new Document that has not been indexed yet. BOOST_AUTO_TEST_CASE(testGetValuesForNewDocument) { checkDocument(makeDocumentWithFields(), false); } /// Tests {@link Document#getValues(String)} method for a Document retrieved from an index. BOOST_AUTO_TEST_CASE(testGetValuesForIndexedDocument) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(makeDocumentWithFields()); writer->close(); SearcherPtr searcher = newLucene(dir, true); // search for something that does exists QueryPtr query = newLucene(newLucene(L"keyword", L"test1")); // ensure that queries return expected results without DateFilter first Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); checkDocument(searcher->doc(hits[0]->doc), true); searcher->close(); } BOOST_AUTO_TEST_CASE(testFieldSetValue) { FieldPtr field = newLucene(L"id", L"id1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED); DocumentPtr doc = newLucene(); doc->add(field); doc->add(newLucene(L"keyword", L"test", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc); field->setValue(L"id2"); writer->addDocument(doc); field->setValue(L"id3"); writer->addDocument(doc); writer->close(); SearcherPtr searcher = newLucene(dir, true); QueryPtr query = newLucene(newLucene(L"keyword", L"test")); // ensure that queries return expected results without DateFilter first Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); int32_t result = 0; for (int32_t i = 0; i < 3; ++i) { DocumentPtr doc2 = searcher->doc(hits[i]->doc); FieldPtr f = doc2->getField(L"id"); if (f->stringValue() == L"id1") result |= 1; else if (f->stringValue() == L"id2") result |= 2; else if (f->stringValue() == L"id3") result |= 4; else BOOST_FAIL("unexpected id field"); } searcher->close(); dir->close(); BOOST_CHECK_EQUAL(7, result); } BOOST_AUTO_TEST_CASE(testFieldSetValueChangeBinary) { FieldPtr field1 = newLucene(L"field1", ByteArray::newInstance(0), Field::STORE_YES); FieldPtr field2 = newLucene(L"field2", L"", Field::STORE_YES, Field::INDEX_ANALYZED); BOOST_CHECK_EXCEPTION(field1->setValue(L"abc"), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); BOOST_CHECK_EXCEPTION(field2->setValue(ByteArray::newInstance(0)), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/document/NumberToolsTest.cpp000066400000000000000000000047541217574114600245260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "NumberTools.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(NumberToolsTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testMinValue) { BOOST_CHECK_EQUAL(NumberTools::MIN_STRING_VALUE(), L"-0000000000000"); } BOOST_AUTO_TEST_CASE(testMaxValue) { BOOST_CHECK_EQUAL(NumberTools::MAX_STRING_VALUE(), L"01y2p0ij32e8e7"); } BOOST_AUTO_TEST_CASE(testValueSize) { BOOST_CHECK_EQUAL(NumberTools::STR_SIZE(), 14); } BOOST_AUTO_TEST_CASE(testLongToString) { BOOST_CHECK_EQUAL(NumberTools::longToString(LLONG_MIN), L"-0000000000000"); BOOST_CHECK_EQUAL(NumberTools::longToString(LLONG_MAX), L"01y2p0ij32e8e7"); BOOST_CHECK_EQUAL(NumberTools::longToString(1LL), L"00000000000001"); BOOST_CHECK_EQUAL(NumberTools::longToString(999LL), L"000000000000rr"); BOOST_CHECK_EQUAL(NumberTools::longToString(34234LL), L"00000000000qey"); BOOST_CHECK_EQUAL(NumberTools::longToString(4345325254LL), L"00000001zv3efa"); BOOST_CHECK_EQUAL(NumberTools::longToString(986778657657575LL), L"00009ps7uuwdlz"); BOOST_CHECK_EQUAL(NumberTools::longToString(23232143543434234LL), L"0006cr3vell8my"); } BOOST_AUTO_TEST_CASE(testStringToLong) { BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"-0000000000000"), LLONG_MIN); BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"01y2p0ij32e8e7"), LLONG_MAX); BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"00000000000001"), 1LL); BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"000000000000rr"), 999LL); BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"00000000000qey"), 34234LL); BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"00000001zv3efa"), 4345325254LL); BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"00009ps7uuwdlz"), 986778657657575LL); BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"0006cr3vell8my"), 23232143543434234LL); BOOST_CHECK_EXCEPTION(NumberTools::stringToLong(L"32132"), LuceneException, check_exception(LuceneException::NumberFormat)); // wrong length BOOST_CHECK_EXCEPTION(NumberTools::stringToLong(L"9006cr3vell8my"), LuceneException, check_exception(LuceneException::NumberFormat)); // wrong prefix } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/include/000077500000000000000000000000001217574114600205045ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/include/BaseTestRangeFilterFixture.h000066400000000000000000000032241217574114600260620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BASETESTRANGEFILTERFIXTURE_H #define BASETESTRANGEFILTERFIXTURE_H #include "LuceneTestFixture.h" #include "LuceneObject.h" namespace Lucene { DECLARE_SHARED_PTR(TestIndex) /// Collation interacts badly with hyphens -- collation produces different ordering than Unicode code-point ordering, /// so two indexes are created: /// one which can't have negative random integers, for testing collated ranges, and the other which can have negative /// random integers, for all other tests. class TestIndex : public LuceneObject { public: TestIndex(int32_t minR, int32_t maxR, bool allowNegativeRandomInts); virtual ~TestIndex(); LUCENE_CLASS(TestIndex); public: int32_t maxR; int32_t minR; bool allowNegativeRandomInts; RAMDirectoryPtr index; }; class BaseTestRangeFilterFixture : public LuceneTestFixture { public: BaseTestRangeFilterFixture(); virtual ~BaseTestRangeFilterFixture(); public: TestIndexPtr signedIndex; TestIndexPtr unsignedIndex; int32_t minId; int32_t maxId; int32_t intLength; RandomPtr random; protected: void build(TestIndexPtr index); String pad(int32_t n); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/BaseTokenStreamFixture.h000066400000000000000000000117241217574114600252600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BASETOKENSTREAMFIXTURE_H #define BASETOKENSTREAMFIXTURE_H #include "LuceneTestFixture.h" #include "Attribute.h" namespace Lucene { DECLARE_SHARED_PTR(CheckClearAttributesAttribute) class CheckClearAttributesAttribute : public Attribute { public: CheckClearAttributesAttribute(); virtual ~CheckClearAttributesAttribute(); LUCENE_CLASS(CheckClearAttributesAttribute); protected: bool clearCalled; public: bool getAndResetClearCalled(); virtual void clear(); virtual bool equals(LuceneObjectPtr other); virtual int32_t hashCode(); virtual void copyTo(AttributePtr target); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; class BaseTokenStreamFixture : public LuceneTestFixture { public: virtual ~BaseTokenStreamFixture(); public: // some helpers to test Analyzers and TokenStreams static void checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, Collection types, Collection posIncrements, int32_t finalOffset = -1); static void checkTokenStreamContents(TokenStreamPtr ts, Collection output); static void checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection types); static void checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection posIncrements); static void checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets); static void checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, int32_t finalOffset); static void checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements); static void checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements, int32_t finalOffset); static void checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection types, Collection posIncrements); static void checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output); static void checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection types); static void checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection posIncrements); static void checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets); static void checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements); static void checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection types, Collection posIncrements); static void checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output); static void checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection types); static void checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection posIncrements); static void checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets); static void checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements); static void checkOneTerm(AnalyzerPtr analyzer, const String& input, const String& expected); static void checkOneTermReuse(AnalyzerPtr analyzer, const String& input, const String& expected); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/CheckHits.h000066400000000000000000000057431217574114600225330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHECKHITS_H #define CHECKHITS_H #include "test_lucene.h" namespace Lucene { class CheckHits { public: virtual ~CheckHits(); public: /// Some explains methods calculate their values though a slightly different order of operations /// from the actual scoring method - this allows for a small amount of variation static const double EXPLAIN_SCORE_TOLERANCE_DELTA; /// Tests that all documents up to maxDoc which are *not* in the expected result set, have an /// explanation which indicates no match (ie: Explanation value of 0.0) static void checkNoMatchExplanations(QueryPtr q, const String& defaultFieldName, SearcherPtr searcher, Collection results); /// Tests that a query matches the an expected set of documents using a HitCollector. /// /// Note that when using the HitCollector API, documents will be collected if they "match" /// regardless of what their score is. static void checkHitCollector(QueryPtr query, const String& defaultFieldName, SearcherPtr searcher, Collection results); /// Tests that a query matches the an expected set of documents using Hits. /// /// Note that when using the Hits API, documents will only be returned if they have a /// positive normalized score. static void checkHits(QueryPtr query, const String& defaultFieldName, SearcherPtr searcher, Collection results); /// Tests that a Hits has an expected order of documents static void checkDocIds(Collection results, Collection hits); /// Tests that two queries have an expected order of documents, and that the two queries have /// the same score values. static void checkHitsQuery(QueryPtr query, Collection hits1, Collection hits2, Collection results); static void checkEqual(QueryPtr query, Collection hits1, Collection hits2); /// Asserts that the explanation value for every document matching a query corresponds with the true score. /// Optionally does "deep" testing of the explanation details. static void checkExplanations(QueryPtr query, const String& defaultFieldName, SearcherPtr searcher, bool deep = false); /// Assert that an explanation has the expected score, and optionally that its sub-details max/sum/factor /// match to that score. static void verifyExplanation(const String& q, int32_t doc, double score, bool deep, ExplanationPtr expl); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/DocHelper.h000066400000000000000000000075561217574114600225370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCHELPER_H #define DOCHELPER_H #include "test_lucene.h" namespace Lucene { class DocHelper { public: DocHelper(); virtual ~DocHelper(); public: static const wchar_t* FIELD_1_TEXT; static const wchar_t* TEXT_FIELD_1_KEY; static FieldPtr textField1; static const wchar_t* FIELD_2_TEXT; static const int32_t FIELD_2_FREQS[]; static const wchar_t* TEXT_FIELD_2_KEY; static FieldPtr textField2; static const wchar_t* FIELD_3_TEXT; static const wchar_t* TEXT_FIELD_3_KEY; static FieldPtr textField3; static const wchar_t* KEYWORD_TEXT; static const wchar_t* KEYWORD_FIELD_KEY; static FieldPtr keyField; static const wchar_t* NO_NORMS_TEXT; static const wchar_t* NO_NORMS_KEY; static FieldPtr noNormsField; static const wchar_t* NO_TF_TEXT; static const wchar_t* NO_TF_KEY; static FieldPtr noTFField; static const wchar_t* UNINDEXED_FIELD_TEXT; static const wchar_t* UNINDEXED_FIELD_KEY; static FieldPtr unIndField; static const wchar_t* UNSTORED_1_FIELD_TEXT; static const wchar_t* UNSTORED_FIELD_1_KEY; static FieldPtr unStoredField1; static const wchar_t* UNSTORED_2_FIELD_TEXT; static const wchar_t* UNSTORED_FIELD_2_KEY; static FieldPtr unStoredField2; static const wchar_t* LAZY_FIELD_BINARY_KEY; static ByteArray LAZY_FIELD_BINARY_BYTES; static FieldPtr lazyFieldBinary; static const wchar_t* LAZY_FIELD_KEY; static const wchar_t* LAZY_FIELD_TEXT; static FieldPtr lazyField; static const wchar_t* LARGE_LAZY_FIELD_KEY; static String LARGE_LAZY_FIELD_TEXT; static FieldPtr largeLazyField; static const uint8_t _FIELD_UTF1_TEXT[]; static const String FIELD_UTF1_TEXT; static const wchar_t* TEXT_FIELD_UTF1_KEY; static FieldPtr textUtfField1; static const uint8_t _FIELD_UTF2_TEXT[]; static const String FIELD_UTF2_TEXT; static const int32_t FIELD_UTF2_FREQS[]; static const wchar_t* TEXT_FIELD_UTF2_KEY; static FieldPtr textUtfField2; static MapStringString nameValues; static Collection fields; static MapStringField all; static MapStringField indexed; static MapStringField stored; static MapStringField unstored; static MapStringField unindexed; static MapStringField termvector; static MapStringField notermvector; static MapStringField lazy; static MapStringField noNorms; static MapStringField noTf; public: /// Adds the fields above to a document void setupDoc(DocumentPtr doc); /// Writes the document to the directory using a segment named "test"; returns the SegmentInfo describing the new segment SegmentInfoPtr writeDoc(DirectoryPtr dir, DocumentPtr doc); /// Writes the document to the directory using the analyzer and the similarity score; returns the SegmentInfo describing the new segment SegmentInfoPtr writeDoc(DirectoryPtr dir, AnalyzerPtr analyzer, SimilarityPtr similarity, DocumentPtr doc); int32_t numFields(DocumentPtr doc); protected: /// One-time setup to initialise static members void setup(); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/ExplanationsFixture.h000066400000000000000000000050241217574114600246720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef EXPLANATIONSFIXTURE_H #define EXPLANATIONSFIXTURE_H #include "LuceneTestFixture.h" namespace Lucene { class ExplanationsFixture : public LuceneTestFixture { public: ExplanationsFixture(); virtual ~ExplanationsFixture(); public: static const String KEY; static const String FIELD; protected: IndexSearcherPtr searcher; QueryParserPtr qp; Collection docFields; public: virtual SpanTermQueryPtr st(const String& s); virtual SpanFirstQueryPtr sf(const String& s, int32_t b); virtual SpanNotQueryPtr snot(SpanQueryPtr i, SpanQueryPtr e); virtual SpanOrQueryPtr sor(const String& s, const String& e); virtual SpanOrQueryPtr sor(SpanQueryPtr s, SpanQueryPtr e); virtual SpanOrQueryPtr sor(const String& s, const String& m, const String& e); virtual SpanOrQueryPtr sor(SpanQueryPtr s, SpanQueryPtr m, SpanQueryPtr e); virtual SpanNearQueryPtr snear(const String& s, const String& e, int32_t slop, bool inOrder); virtual SpanNearQueryPtr snear(SpanQueryPtr s, SpanQueryPtr e, int32_t slop, bool inOrder); virtual SpanNearQueryPtr snear(const String& s, const String& m, const String& e, int32_t slop, bool inOrder); virtual SpanNearQueryPtr snear(SpanQueryPtr s, SpanQueryPtr m, SpanQueryPtr e, int32_t slop, bool inOrder); virtual QueryPtr optB(const String& q); virtual QueryPtr optB(QueryPtr q); virtual QueryPtr reqB(const String& q); virtual QueryPtr reqB(QueryPtr q); virtual Collection ta(Collection s); /// Check the expDocNrs first, then check the query (and the explanations) virtual void qtest(const String& queryText, Collection expDocNrs); virtual void qtest(QueryPtr q, Collection expDocNrs); /// Tests a query using qtest after wrapping it with both optB and reqB virtual void bqtest(QueryPtr q, Collection expDocNrs); virtual void bqtest(const String& queryText, Collection expDocNrs); virtual QueryPtr makeQuery(const String& queryText); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/FunctionFixture.h000066400000000000000000000025521217574114600240150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FUNCTIONFIXTURE_H #define FUNCTIONFIXTURE_H #include "LuceneTestFixture.h" namespace Lucene { class FunctionFixture : public LuceneTestFixture { public: FunctionFixture(bool doMultiSegment); virtual ~FunctionFixture(); public: static const double TEST_SCORE_TOLERANCE_DELTA; public: static const int32_t N_DOCS; static const String ID_FIELD; static const String TEXT_FIELD; static const String INT_FIELD; static const String DOUBLE_FIELD; bool doMultiSegment; DirectoryPtr dir; AnalyzerPtr anlzr; protected: static const Collection DOC_TEXT_LINES(); void addDoc(IndexWriterPtr iw, int32_t i); String id2String(int32_t scoreAndID); String textLine(int32_t docNum); double expectedFieldScore(const String& docIDFieldVal); bool equalCollectionValues(CollectionValue first, CollectionValue second); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/LuceneGlobalFixture.h000066400000000000000000000010511217574114600245550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "test_lucene.h" namespace Lucene { class LuceneGlobalFixture { public: /// setup LuceneGlobalFixture(); /// teardown virtual ~LuceneGlobalFixture(); }; } LucenePlusPlus-rel_3.0.4/src/test/include/LuceneTestFixture.h000066400000000000000000000010431217574114600242750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "test_lucene.h" namespace Lucene { class LuceneTestFixture { public: /// setup LuceneTestFixture(); /// teardown virtual ~LuceneTestFixture(); }; } LucenePlusPlus-rel_3.0.4/src/test/include/MockFSDirectory.h000066400000000000000000000026271217574114600236730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MOCKFSDIRECTORY_H #define MOCKFSDIRECTORY_H #include "test_lucene.h" #include "Directory.h" namespace Lucene { class MockFSDirectory : public Directory { public: MockFSDirectory(const String& path); virtual ~MockFSDirectory(); LUCENE_CLASS(MockFSDirectory); public: Collection allIndexInputs; protected: DirectoryPtr dir; RandomPtr rand; public: virtual IndexInputPtr openInput(const String& name); virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); void tweakBufferSizes(); virtual IndexOutputPtr createOutput(const String& name); virtual void close(); virtual void deleteFile(const String& name); virtual void touchFile(const String& name); virtual uint64_t fileModified(const String& name); virtual bool fileExists(const String& name); virtual HashSet listAll(); virtual int64_t fileLength(const String& name); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/MockFilter.h000066400000000000000000000014361217574114600227200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MOCKFILTER_H #define MOCKFILTER_H #include "test_lucene.h" #include "Filter.h" namespace Lucene { class MockFilter : public Filter { public: MockFilter(); virtual ~MockFilter(); LUCENE_CLASS(MockFilter); protected: bool _wasCalled; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); void clear(); bool wasCalled(); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/MockIndexInput.h000066400000000000000000000017521217574114600235630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MOCKINDEXINPUT_H #define MOCKINDEXINPUT_H #include "test_lucene.h" #include "BufferedIndexInput.h" namespace Lucene { class MockIndexInput : public BufferedIndexInput { public: MockIndexInput(ByteArray bytes); virtual ~MockIndexInput(); LUCENE_CLASS(MockIndexInput); protected: ByteArray buffer; int32_t pointer; int64_t _length; public: virtual void close(); virtual int64_t length(); protected: virtual void readInternal(uint8_t* b, int32_t offset, int32_t length); virtual void seekInternal(int64_t pos); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/MockLock.h000066400000000000000000000014321217574114600223570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MOCKLOCK_H #define MOCKLOCK_H #include "test_lucene.h" #include "Lock.h" namespace Lucene { class MockLock : public Lock { public: MockLock(); virtual ~MockLock(); LUCENE_CLASS(MockLock); public: int32_t lockAttempts; public: virtual bool obtain(); virtual void release(); virtual bool isLocked(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/MockLockFactory.h000066400000000000000000000017041217574114600237110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MOCKLOCKFACTORY_H #define MOCKLOCKFACTORY_H #include "test_lucene.h" #include "LockFactory.h" namespace Lucene { class MockLockFactory : public LockFactory { public: MockLockFactory(); virtual ~MockLockFactory(); LUCENE_CLASS(MockLockFactory); public: bool lockPrefixSet; int32_t makeLockCount; MapStringLock locksCreated; public: virtual void setLockPrefix(const String& lockPrefix); virtual LockPtr makeLock(const String& lockName); virtual void clearLock(const String& lockName); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/MockRAMDirectory.h000066400000000000000000000115401217574114600237740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MOCKRAMDIRECTORY_H #define MOCKRAMDIRECTORY_H #include "test_lucene.h" #include "RAMDirectory.h" namespace Lucene { /// This is a subclass of RAMDirectory that adds methods intended to be used only by unit tests. class MockRAMDirectory : public RAMDirectory { public: MockRAMDirectory(); MockRAMDirectory(DirectoryPtr dir); virtual ~MockRAMDirectory(); LUCENE_CLASS(MockRAMDirectory); public: int64_t maxSize; RandomPtr randomState; // Max actual bytes used. This is set by MockRAMOutputStream int64_t maxUsedSize; double randomIOExceptionRate; bool noDeleteOpenFile; bool preventDoubleWrite; bool crashed; MapStringInt openFiles; // Only tracked if noDeleteOpenFile is true: if an attempt is made to delete an // open file, we enroll it here. HashSet openFilesDeleted; Collection failures; protected: HashSet unSyncedFiles; HashSet createdFiles; public: /// If set to true, we throw an IO exception if the same file is opened by createOutput, ever. void setPreventDoubleWrite(bool value); virtual void sync(const String& name); /// Simulates a crash of OS or machine by overwriting unsynced files. void crash(); void clearCrash(); void setMaxSizeInBytes(int64_t maxSize); int64_t getMaxSizeInBytes(); /// Returns the peek actual storage used (bytes) in this directory. int64_t getMaxUsedSizeInBytes(); void resetMaxUsedSizeInBytes(); /// Emulate windows whereby deleting an open file is not allowed (raise IO exception) void setNoDeleteOpenFile(bool value); bool getNoDeleteOpenFile(); /// If 0.0, no exceptions will be thrown. Else this should be a double 0.0 - 1.0. We will randomly throw an /// IO exception on the first write to an OutputStream based on this probability. void setRandomIOExceptionRate(double rate, int64_t seed); double getRandomIOExceptionRate(); void maybeThrowIOException(); virtual void deleteFile(const String& name); virtual HashSet getOpenDeletedFiles(); virtual IndexOutputPtr createOutput(const String& name); virtual IndexInputPtr openInput(const String& name); /// Provided for testing purposes. Use sizeInBytes() instead. int64_t getRecomputedSizeInBytes(); /// Like getRecomputedSizeInBytes(), but, uses actual file lengths rather than buffer allocations (which are /// quantized up to nearest RAMOutputStream::BUFFER_SIZE (now 1024) bytes. int64_t getRecomputedActualSizeInBytes(); virtual void close(); /// Add a Failure object to the list of objects to be evaluated at every potential failure point void failOn(MockDirectoryFailurePtr fail); /// Iterate through the failures list, giving each object a chance to throw an IO exception. void maybeThrowDeterministicException(); protected: void init(); void deleteFile(const String& name, bool forced); }; /// Objects that represent fail-able conditions. Objects of a derived class are created and registered with the /// mock directory. After register, each object will be invoked once for each first write of a file, giving the /// object a chance to throw an IO exception. class MockDirectoryFailure : public LuceneObject { public: MockDirectoryFailure(); virtual ~MockDirectoryFailure(); LUCENE_CLASS(MockDirectoryFailure); public: /// eval is called on the first write of every new file. virtual void eval(MockRAMDirectoryPtr dir); /// reset should set the state of the failure to its default (freshly constructed) state. Reset is convenient /// for tests that want to create one failure object and then reuse it in multiple cases. This, combined with /// the fact that MockDirectoryFailure subclasses are often anonymous classes makes reset difficult to do otherwise. virtual MockDirectoryFailurePtr reset(); virtual void setDoFail(); virtual void clearDoFail(); protected: bool doFail; }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/MockRAMInputStream.h000066400000000000000000000022021217574114600242760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MOCKRAMINPUTSTREAM_H #define MOCKRAMINPUTSTREAM_H #include "test_lucene.h" #include "RAMInputStream.h" namespace Lucene { /// Used by MockRAMDirectory to create an input stream that keeps track of when it's been closed. class MockRAMInputStream : public RAMInputStream { public: /// Construct an empty output buffer. MockRAMInputStream(); MockRAMInputStream(MockRAMDirectoryPtr dir, const String& name, RAMFilePtr f); virtual ~MockRAMInputStream(); LUCENE_CLASS(MockRAMInputStream); protected: MockRAMDirectoryWeakPtr _dir; String name; bool isClone; public: virtual void close(); virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/MockRAMOutputStream.h000066400000000000000000000024661217574114600245130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MOCKRAMOUTPUTSTREAM_H #define MOCKRAMOUTPUTSTREAM_H #include "test_lucene.h" #include "RAMOutputStream.h" namespace Lucene { /// Used by MockRAMDirectory to create an output stream that will throw an IOException on fake disk full, track max /// disk space actually used, and maybe throw random IOExceptions. class MockRAMOutputStream : public RAMOutputStream { public: /// Construct an empty output buffer. MockRAMOutputStream(MockRAMDirectoryPtr dir, RAMFilePtr f, const String& name); virtual ~MockRAMOutputStream(); LUCENE_CLASS(MockRAMOutputStream); protected: MockRAMDirectoryWeakPtr _dir; bool first; String name; public: ByteArray singleByte; public: virtual void close(); virtual void flush(); virtual void writeByte(uint8_t b); virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/PayloadHelper.h000066400000000000000000000021141217574114600234040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADHELPER_H #define PAYLOADHELPER_H #include "test_lucene.h" namespace Lucene { class PayloadHelper { public: virtual ~PayloadHelper(); public: static const String NO_PAYLOAD_FIELD; static const String MULTI_FIELD; static const String FIELD; public: static const ByteArray payloadField(); static const ByteArray payloadMultiField1(); static const ByteArray payloadMultiField2(); /// Sets up a RAMDirectory, and adds documents (using intToEnglish()) with two fields: field and multiField /// and analyzes them using the PayloadHelperAnalyzer static IndexSearcherPtr setUp(SimilarityPtr similarity, int32_t numDocs); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/QueryUtils.h000066400000000000000000000056271217574114600230150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYUTILS_H #define QUERYUTILS_H #include "test_lucene.h" namespace Lucene { class QueryUtils { public: virtual ~QueryUtils(); public: /// Check the types of things query objects should be able to do. static void check(QueryPtr q); /// Check very basic hashCode and equals static void checkHashEquals(QueryPtr q); static void checkEqual(QueryPtr q1, QueryPtr q2); static void checkUnequal(QueryPtr q1, QueryPtr q2); /// Deep check that explanations of a query 'score' correctly static void checkExplanations(QueryPtr q, SearcherPtr s); /// Various query sanity checks on a searcher, some checks are only done /// for types of IndexSearcher. static void check(QueryPtr q1, SearcherPtr s); static void check(QueryPtr q1, SearcherPtr s, bool wrap); /// Given an IndexSearcher, returns a new IndexSearcher whose IndexReader is a MultiReader /// containing the Reader of the original IndexSearcher, as well as several "empty" /// IndexReaders - some of which will have deleted documents in them. This new IndexSearcher /// should behave exactly the same as the original IndexSearcher. /// @param s the searcher to wrap. /// @param edge if negative, s will be the first sub; if 0, s will be in the middle, if /// positive s will be the last sub static IndexSearcherPtr wrapUnderlyingReader(IndexSearcherPtr s, int32_t edge); /// Given a Searcher, returns a new MultiSearcher wrapping the the original Searcher, as well /// as several "empty" IndexSearchers - some of which will have deleted documents in them. /// This new MultiSearcher should behave exactly the same as the original Searcher. /// @param s the Searcher to wrap /// @param edge if negative, s will be the first sub; if 0, s will be in hte middle, if positive /// s will be the last sub static MultiSearcherPtr wrapSearcher(SearcherPtr s, int32_t edge); static RAMDirectoryPtr makeEmptyIndex(int32_t numDeletedDocs); /// Alternate scorer skipTo(), skipTo(), next(), next(), skipTo(), skipTo(), etc and ensure /// a hitcollector receives same docs and scores static void checkSkipTo(QueryPtr q, IndexSearcherPtr s); /// Check that first skip on just created scorers always goes to the right doc static void checkFirstSkipTo(QueryPtr q, IndexSearcherPtr s); }; } #endif LucenePlusPlus-rel_3.0.4/src/test/include/TestInc.h000066400000000000000000000007371217574114600222350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifdef _WIN32 #include "targetver.h" #define WIN32_LEAN_AND_MEAN #define NOMINMAX #include #endif #include "test_lucene.h" LucenePlusPlus-rel_3.0.4/src/test/include/TestUtils.h000066400000000000000000000025071217574114600226210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TESTUTILS_H #define TESTUTILS_H #include "Lucene.h" namespace Lucene { /// Initialise unit test files directory void setTestDir(const String& dir); /// Return unit test files directory String getTestDir(); /// Return temporary directory String getTempDir(); /// Return temporary directory (randomly generated) String getTempDir(const String& desc); /// Wait for concurrent merge to finish void syncConcurrentMerges(IndexWriterPtr writer); /// Wait for concurrent merge to finish void syncConcurrentMerges(MergeSchedulerPtr ms); /// Return English representation of given integer String intToEnglish(int32_t i); /// Return English representation of given integer (recursive) String _intToEnglish(int32_t i); /// This runs the CheckIndex tool on the index in. /// If any issues are hit, a RuntimeException is thrown; else, true is returned. bool checkIndex(DirectoryPtr dir); } #endif LucenePlusPlus-rel_3.0.4/src/test/include/test_lucene.h000066400000000000000000000026201217574114600231670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TEST_LUCENE_H #define TEST_LUCENE_H #include "Lucene.h" #include "LuceneContrib.h" #include "StringUtils.h" #include namespace std { inline std::ostream& operator<< (std::ostream& out, const Lucene::String& s) { out << Lucene::StringUtils::toUTF8(s); return out; } } namespace Lucene { DECLARE_SHARED_PTR(MockDirectoryFailure) DECLARE_SHARED_PTR(MockFSDirectory) DECLARE_SHARED_PTR(MockLock) DECLARE_SHARED_PTR(MockLockFactory) DECLARE_SHARED_PTR(MockRAMDirectory) DECLARE_SHARED_PTR(MockRAMInputStream) DECLARE_SHARED_PTR(MockRAMOutputStream) DECLARE_SHARED_PTR(MockFilter) typedef HashMap MapStringField; struct check_exception { check_exception(LuceneException::ExceptionType type) : checkType(type) {} inline bool operator()(const LuceneException& e) { return (checkType == LuceneException::Null || e.getType() == checkType); } LuceneException::ExceptionType checkType; }; } #endif LucenePlusPlus-rel_3.0.4/src/test/index/000077500000000000000000000000001217574114600201705ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/index/AddIndexesNoOptimizeTest.cpp000066400000000000000000000435701217574114600255730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "IndexWriter.h" #include "IndexReader.h" #include "WhitespaceAnalyzer.h" #include "LogDocMergePolicy.h" #include "Field.h" #include "Document.h" #include "Term.h" #include "TermDocs.h" #include "PhraseQuery.h" #include "MockRAMDirectory.h" #include "LogByteSizeMergePolicy.h" #include "SerialMergeScheduler.h" #include "SegmentInfo.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(AddIndexesNoOptimizeTest, LuceneTestFixture) static IndexWriterPtr newWriter(DirectoryPtr dir, bool create) { IndexWriterPtr writer = newLucene(dir, newLucene(), create, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMergePolicy(newLucene(writer)); return writer; } static void addDocs(IndexWriterPtr writer, int32_t numDocs) { for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } } static void addDocs2(IndexWriterPtr writer, int32_t numDocs) { for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"bbb", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } } static void verifyNumDocs(DirectoryPtr dir, int32_t numDocs) { IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(reader->maxDoc(), numDocs); BOOST_CHECK_EQUAL(reader->numDocs(), numDocs); reader->close(); } static void verifyTermDocs(DirectoryPtr dir, TermPtr term, int32_t numDocs) { IndexReaderPtr reader = IndexReader::open(dir, true); TermDocsPtr termDocs = reader->termDocs(term); int32_t count = 0; while (termDocs->next()) ++count; BOOST_CHECK_EQUAL(count, numDocs); reader->close(); } static void setUpDirs(DirectoryPtr dir, DirectoryPtr aux) { IndexWriterPtr writer; writer = newWriter(dir, true); writer->setMaxBufferedDocs(1000); // add 1000 documents in 1 segment addDocs(writer, 1000); BOOST_CHECK_EQUAL(1000, writer->maxDoc()); BOOST_CHECK_EQUAL(1, writer->getSegmentCount()); writer->close(); writer = newWriter(aux, true); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(100); writer->setMergeFactor(10); // add 30 documents in 3 segments for (int32_t i = 0; i < 3; ++i) { addDocs(writer, 10); writer->close(); writer = newWriter(aux, false); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(100); writer->setMergeFactor(10); } BOOST_CHECK_EQUAL(30, writer->maxDoc()); BOOST_CHECK_EQUAL(3, writer->getSegmentCount()); writer->close(); } BOOST_AUTO_TEST_CASE(testSimpleCase) { // main directory DirectoryPtr dir = newLucene(); // two auxiliary directories DirectoryPtr aux = newLucene(); DirectoryPtr aux2 = newLucene(); IndexWriterPtr writer = newWriter(dir, true); // add 100 documents addDocs(writer, 100); BOOST_CHECK_EQUAL(writer->maxDoc(), 100); writer->close(); writer = newWriter(aux, true); writer->setUseCompoundFile(false); // use one without a compound file // add 40 documents in separate files addDocs(writer, 40); BOOST_CHECK_EQUAL(writer->maxDoc(), 40); writer->close(); writer = newWriter(aux2, true); // add 40 documents in compound files addDocs2(writer, 50); BOOST_CHECK_EQUAL(writer->maxDoc(), 50); writer->close(); // test doc count before segments are merged writer = newWriter(dir, false); BOOST_CHECK_EQUAL(writer->maxDoc(), 100); writer->addIndexesNoOptimize(newCollection(aux, aux2)); BOOST_CHECK_EQUAL(writer->maxDoc(), 190); writer->close(); // make sure the old index is correct verifyNumDocs(aux, 40); // make sure the new index is correct verifyNumDocs(dir, 190); // now add another set in. DirectoryPtr aux3 = newLucene(); writer = newWriter(aux3, true); // add 40 documents addDocs(writer, 40); BOOST_CHECK_EQUAL(writer->maxDoc(), 40); writer->close(); // test doc count before segments are merged/index is optimized writer = newWriter(dir, false); BOOST_CHECK_EQUAL(writer->maxDoc(), 190); writer->addIndexesNoOptimize(newCollection(aux3)); BOOST_CHECK_EQUAL(writer->maxDoc(), 230); writer->close(); // make sure the new index is correct verifyNumDocs(dir, 230); verifyTermDocs(dir, newLucene(L"content", L"aaa"), 180); verifyTermDocs(dir, newLucene(L"content", L"bbb"), 50); // now optimize it. writer = newWriter(dir, false); writer->optimize(); writer->close(); // make sure the new index is correct verifyNumDocs(dir, 230); verifyTermDocs(dir, newLucene(L"content", L"aaa"), 180); verifyTermDocs(dir, newLucene(L"content", L"bbb"), 50); // now add a single document DirectoryPtr aux4 = newLucene(); writer = newWriter(aux4, true); addDocs2(writer, 1); writer->close(); writer = newWriter(dir, false); BOOST_CHECK_EQUAL(writer->maxDoc(), 230); writer->addIndexesNoOptimize(newCollection(aux4)); BOOST_CHECK_EQUAL(writer->maxDoc(), 231); writer->close(); verifyNumDocs(dir, 231); verifyTermDocs(dir, newLucene(L"content", L"bbb"), 51); } BOOST_AUTO_TEST_CASE(testWithPendingDeletes) { // main directory DirectoryPtr dir = newLucene(); // auxiliary directory DirectoryPtr aux = newLucene(); setUpDirs(dir, aux); IndexWriterPtr writer = newWriter(dir, false); writer->addIndexesNoOptimize(newCollection(aux)); // Adds 10 docs, then replaces them with another 10 docs, so 10 pending deletes for (int32_t i = 0; i < 20; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i % 10), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"content", L"bbb " + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->updateDocument(newLucene(L"id", StringUtils::toString(i % 10)), doc); } // Deletes one of the 10 added docs, leaving 9 PhraseQueryPtr q = newLucene(); q->add(newLucene(L"content", L"bbb")); q->add(newLucene(L"content", L"14")); writer->deleteDocuments(q); writer->optimize(); writer->commit(); verifyNumDocs(dir, 1039); verifyTermDocs(dir, newLucene(L"content", L"aaa"), 1030); verifyTermDocs(dir, newLucene(L"content", L"bbb"), 9); writer->close(); dir->close(); aux->close(); } BOOST_AUTO_TEST_CASE(testWithPendingDeletes2) { // main directory DirectoryPtr dir = newLucene(); // auxiliary directory DirectoryPtr aux = newLucene(); setUpDirs(dir, aux); IndexWriterPtr writer = newWriter(dir, false); // Adds 10 docs, then replaces them with another 10 docs, so 10 pending deletes for (int32_t i = 0; i < 20; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i % 10), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"content", L"bbb " + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->updateDocument(newLucene(L"id", StringUtils::toString(i % 10)), doc); } writer->addIndexesNoOptimize(newCollection(aux)); // Deletes one of the 10 added docs, leaving 9 PhraseQueryPtr q = newLucene(); q->add(newLucene(L"content", L"bbb")); q->add(newLucene(L"content", L"14")); writer->deleteDocuments(q); writer->optimize(); writer->commit(); verifyNumDocs(dir, 1039); verifyTermDocs(dir, newLucene(L"content", L"aaa"), 1030); verifyTermDocs(dir, newLucene(L"content", L"bbb"), 9); writer->close(); dir->close(); aux->close(); } BOOST_AUTO_TEST_CASE(testWithPendingDeletes3) { // main directory DirectoryPtr dir = newLucene(); // auxiliary directory DirectoryPtr aux = newLucene(); setUpDirs(dir, aux); IndexWriterPtr writer = newWriter(dir, false); // Adds 10 docs, then replaces them with another 10 docs, so 10 pending deletes for (int32_t i = 0; i < 20; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i % 10), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"content", L"bbb " + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->updateDocument(newLucene(L"id", StringUtils::toString(i % 10)), doc); } // Deletes one of the 10 added docs, leaving 9 PhraseQueryPtr q = newLucene(); q->add(newLucene(L"content", L"bbb")); q->add(newLucene(L"content", L"14")); writer->deleteDocuments(q); writer->addIndexesNoOptimize(newCollection(aux)); writer->optimize(); writer->commit(); verifyNumDocs(dir, 1039); verifyTermDocs(dir, newLucene(L"content", L"aaa"), 1030); verifyTermDocs(dir, newLucene(L"content", L"bbb"), 9); writer->close(); dir->close(); aux->close(); } BOOST_AUTO_TEST_CASE(testAddSelf) { // main directory DirectoryPtr dir = newLucene(); // auxiliary directory DirectoryPtr aux = newLucene(); IndexWriterPtr writer = newWriter(dir, true); // add 100 documents addDocs(writer, 100); BOOST_CHECK_EQUAL(100, writer->maxDoc()); writer->close(); writer = newWriter(aux, true); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(1000); // add 140 documents in separate files addDocs(writer, 40); writer->close(); writer = newWriter(aux, true); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(1000); addDocs(writer, 100); writer->close(); writer = newWriter(dir, false); BOOST_CHECK_EXCEPTION(writer->addIndexesNoOptimize(newCollection(aux, dir)), LuceneException, check_exception(LuceneException::IllegalArgument)); // cannot add self BOOST_CHECK_EQUAL(100, writer->maxDoc()); writer->close(); // make sure the index is correct verifyNumDocs(dir, 100); dir->close(); aux->close(); } BOOST_AUTO_TEST_CASE(testNoTailSegments) { // main directory DirectoryPtr dir = newLucene(); // auxiliary directory DirectoryPtr aux = newLucene(); setUpDirs(dir, aux); IndexWriterPtr writer = newWriter(dir, false); writer->setMaxBufferedDocs(10); writer->setMergeFactor(4); addDocs(writer, 10); writer->addIndexesNoOptimize(newCollection(aux)); BOOST_CHECK_EQUAL(1040, writer->maxDoc()); BOOST_CHECK_EQUAL(2, writer->getSegmentCount()); BOOST_CHECK_EQUAL(1000, writer->getDocCount(0)); writer->close(); // make sure the index is correct verifyNumDocs(dir, 1040); dir->close(); aux->close(); } BOOST_AUTO_TEST_CASE(testNoCopySegments) { // main directory DirectoryPtr dir = newLucene(); // auxiliary directory DirectoryPtr aux = newLucene(); setUpDirs(dir, aux); IndexWriterPtr writer = newWriter(dir, false); writer->setMaxBufferedDocs(9); writer->setMergeFactor(4); addDocs(writer, 2); writer->addIndexesNoOptimize(newCollection(aux)); BOOST_CHECK_EQUAL(1032, writer->maxDoc()); BOOST_CHECK_EQUAL(2, writer->getSegmentCount()); BOOST_CHECK_EQUAL(1000, writer->getDocCount(0)); writer->close(); // make sure the index is correct verifyNumDocs(dir, 1032); dir->close(); aux->close(); } BOOST_AUTO_TEST_CASE(testNoMergeAfterCopy) { // main directory DirectoryPtr dir = newLucene(); // auxiliary directory DirectoryPtr aux = newLucene(); setUpDirs(dir, aux); IndexWriterPtr writer = newWriter(dir, false); writer->setMaxBufferedDocs(10); writer->setMergeFactor(4); writer->addIndexesNoOptimize(newCollection(aux, newLucene(aux))); BOOST_CHECK_EQUAL(1060, writer->maxDoc()); BOOST_CHECK_EQUAL(1000, writer->getDocCount(0)); writer->close(); // make sure the index is correct verifyNumDocs(dir, 1060); dir->close(); aux->close(); } BOOST_AUTO_TEST_CASE(testMergeAfterCopy) { // main directory DirectoryPtr dir = newLucene(); // auxiliary directory DirectoryPtr aux = newLucene(); setUpDirs(dir, aux); IndexReaderPtr reader = IndexReader::open(aux, false); for (int32_t i = 0; i < 20; ++i) reader->deleteDocument(i); BOOST_CHECK_EQUAL(10, reader->numDocs()); reader->close(); IndexWriterPtr writer = newWriter(dir, false); writer->setMaxBufferedDocs(4); writer->setMergeFactor(4); writer->addIndexesNoOptimize(newCollection(aux, newLucene(aux))); BOOST_CHECK_EQUAL(1020, writer->maxDoc()); BOOST_CHECK_EQUAL(1000, writer->getDocCount(0)); writer->close(); // make sure the index is correct verifyNumDocs(dir, 1020); dir->close(); aux->close(); } BOOST_AUTO_TEST_CASE(testMoreMerges) { // main directory DirectoryPtr dir = newLucene(); // auxiliary directory DirectoryPtr aux = newLucene(); DirectoryPtr aux2 = newLucene(); setUpDirs(dir, aux); IndexWriterPtr writer = newWriter(aux2, true); writer->setMaxBufferedDocs(100); writer->setMergeFactor(10); writer->addIndexesNoOptimize(newCollection(aux)); BOOST_CHECK_EQUAL(30, writer->maxDoc()); BOOST_CHECK_EQUAL(3, writer->getSegmentCount()); writer->close(); IndexReaderPtr reader = IndexReader::open(aux, false); for (int32_t i = 0; i < 27; ++i) reader->deleteDocument(i); BOOST_CHECK_EQUAL(3, reader->numDocs()); reader->close(); reader = IndexReader::open(aux2, false); for (int32_t i = 0; i < 8; ++i) reader->deleteDocument(i); BOOST_CHECK_EQUAL(22, reader->numDocs()); reader->close(); writer = newWriter(dir, false); writer->setMaxBufferedDocs(6); writer->setMergeFactor(4); writer->addIndexesNoOptimize(newCollection(aux, aux2)); BOOST_CHECK_EQUAL(1025, writer->maxDoc()); BOOST_CHECK_EQUAL(1000, writer->getDocCount(0)); writer->close(); // make sure the index is correct verifyNumDocs(dir, 1025); dir->close(); aux->close(); } BOOST_AUTO_TEST_CASE(testHangOnClose) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergePolicy(newLucene(writer)); writer->setMaxBufferedDocs(5); writer->setUseCompoundFile(false); writer->setMergeFactor(100); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); for (int32_t i = 0; i < 60; ++i) writer->addDocument(doc); writer->setMaxBufferedDocs(200); DocumentPtr doc2 = newLucene(); doc2->add(newLucene(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_NO)); doc2->add(newLucene(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_NO)); doc2->add(newLucene(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_NO)); doc2->add(newLucene(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_NO)); for (int32_t i = 0; i < 60; ++i) writer->addDocument(doc2); writer->close(); DirectoryPtr dir2 = newLucene(); writer = newLucene(dir2, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); LogByteSizeMergePolicyPtr lmp = newLucene(writer); lmp->setMinMergeMB(0.0001); writer->setMergePolicy(lmp); writer->setMergeFactor(4); writer->setUseCompoundFile(false); writer->setMergeScheduler(newLucene()); writer->addIndexesNoOptimize(newCollection(dir)); writer->close(); dir->close(); dir2->close(); } BOOST_AUTO_TEST_CASE(testTargetCFS) { // make sure CFS of destination indexwriter is respected when copying tail segments DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newWriter(dir, true); writer->setUseCompoundFile(false); addDocs(writer, 1); writer->close(); DirectoryPtr other = newLucene(); writer = newWriter(other, true); writer->setUseCompoundFile(true); writer->addIndexesNoOptimize(newCollection(dir)); BOOST_CHECK(writer->newestSegment()->getUseCompoundFile()); writer->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/AtomicUpdateTest.cpp000066400000000000000000000136161217574114600241220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexWriter.h" #include "IndexReader.h" #include "LuceneThread.h" #include "Document.h" #include "Field.h" #include "Term.h" #include "SimpleAnalyzer.h" #include "MockRAMDirectory.h" #include "FSDirectory.h" #include "Random.h" #include "MiscUtils.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(AtomicUpdateTest, LuceneTestFixture) class MockIndexWriter : public IndexWriter { public: MockIndexWriter(DirectoryPtr dir, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(dir, a, create, mfl) { random = newLucene(); } virtual ~MockIndexWriter() { } protected: RandomPtr random; public: virtual bool testPoint(const String& name) { if (random->nextInt(4) == 2) LuceneThread::threadYield(); return true; } }; DECLARE_SHARED_PTR(TimedThread) DECLARE_SHARED_PTR(IndexerThread) DECLARE_SHARED_PTR(SearcherThread) class TimedThread : public LuceneThread { public: TimedThread() { this->failed = false; } virtual ~TimedThread() { } LUCENE_CLASS(TimedThread); public: bool failed; protected: static const int32_t RUN_TIME_SEC; public: virtual void doWork() = 0; virtual void run() { int64_t stopTime = MiscUtils::currentTimeMillis() + 1000 * RUN_TIME_SEC; try { while ((int64_t)MiscUtils::currentTimeMillis() < stopTime && !failed) doWork(); } catch (LuceneException& e) { failed = true; BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; const int32_t TimedThread::RUN_TIME_SEC = 3; class IndexerThread : public TimedThread { public: IndexerThread(IndexWriterPtr writer) { this->writer = writer; } virtual ~IndexerThread() { } LUCENE_CLASS(IndexerThread); public: IndexWriterPtr writer; public: virtual void doWork() { // Update all 100 docs for (int32_t i = 0; i < 100; ++i) { DocumentPtr d = newLucene(); d->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->updateDocument(newLucene(L"id", StringUtils::toString(i)), d); } } }; class SearcherThread : public TimedThread { public: SearcherThread(DirectoryPtr directory) { this->directory = directory; } virtual ~SearcherThread() { } LUCENE_CLASS(SearcherThread); protected: DirectoryPtr directory; public: virtual void doWork() { IndexReaderPtr r = IndexReader::open(directory, true); if (r->numDocs() != 100) BOOST_FAIL("num docs failure"); r->close(); } }; // Run one indexer and 2 searchers against single index as stress test. static void runTest(DirectoryPtr directory) { Collection threads(Collection::newInstance(4)); AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(7); writer->setMergeFactor(3); // Establish a base index of 100 docs for (int32_t i = 0; i < 100; ++i) { DocumentPtr d = newLucene(); d->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); if ((i - 1) % 7 == 0) writer->commit(); writer->addDocument(d); } writer->commit(); IndexReaderPtr r = IndexReader::open(directory, true); BOOST_CHECK_EQUAL(100, r->numDocs()); r->close(); IndexerThreadPtr indexerThread1 = newLucene(writer); threads[0] = indexerThread1; indexerThread1->start(); IndexerThreadPtr indexerThread2 = newLucene(writer); threads[1] = indexerThread2; indexerThread2->start(); SearcherThreadPtr searcherThread1 = newLucene(directory); threads[2] = searcherThread1; searcherThread1->start(); SearcherThreadPtr searcherThread2 = newLucene(directory); threads[3] = searcherThread2; searcherThread2->start(); indexerThread1->join(); indexerThread2->join(); searcherThread1->join(); searcherThread2->join(); writer->close(); BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1 BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2 BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1 BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2 } /// Run above stress test against RAMDirectory. BOOST_AUTO_TEST_CASE(testAtomicUpdatesRAMDirectory) { DirectoryPtr directory = newLucene(); runTest(directory); directory->close(); } /// Run above stress test against FSDirectory BOOST_AUTO_TEST_CASE(testAtomicUpdatesFSDirectory) { String dirPath(getTempDir(L"lucene.test.atomic")); DirectoryPtr directory = FSDirectory::open(dirPath); runTest(directory); directory->close(); FileUtils::removeDirectory(dirPath); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/BackwardsCompatibilityTest.cpp000066400000000000000000000542041217574114600261740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FSDirectory.h" #include "IndexReader.h" #include "IndexWriter.h" #include "IndexSearcher.h" #include "WhitespaceAnalyzer.h" #include "Term.h" #include "Document.h" #include "Field.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "ReaderUtil.h" #include "SegmentReader.h" #include "FieldsReader.h" #include "FieldSelector.h" #include "TermQuery.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "CompoundFileReader.h" #include "NumericField.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BackwardsCompatibilityTest, LuceneTestFixture) /// Verify we can read the pre-2.1 file format, do searches against it, and add documents to it. static String fullDir(const String& dirName) { return FileUtils::joinPath(getTempDir(), dirName); } static void rmDir(const String& dirName) { FileUtils::removeDirectory(FileUtils::joinPath(getTempDir(), dirName)); } static void addDoc(IndexWriterPtr writer, int32_t id) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"id", StringUtils::toString(id), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); const uint8_t utf8Field[] = {0x4c, 0x75, 0xf0, 0x9d, 0x84, 0x9e, 0x63, 0x65, 0xf0, 0x9d, 0x85, 0xa0, 0x6e, 0x65, 0x20, 0x00, 0x20, 0xe2, 0x98, 0xa0, 0x20, 0x61, 0x62, 0xf1, 0x95, 0xb0, 0x97, 0x63, 0x64}; const uint8_t utf8Field2[] = {0x66, 0x69, 0x65, 0xe2, 0xb1, 0xb7, 0x6c, 0x64}; doc->add(newLucene(L"autf8", UTF8_TO_STRING(utf8Field), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"utf8", UTF8_TO_STRING(utf8Field), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"content2", L"here is more content with aaa aaa aaa", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(UTF8_TO_STRING(utf8Field2), L"field with non-ascii name", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); // add numeric fields, to test if flex preserves encoding doc->add(newLucene(L"trieInt", 4)->setIntValue(id)); doc->add(newLucene(L"trieLong", 4)->setLongValue(id)); writer->addDocument(doc); } static void addNoProxDoc(IndexWriterPtr writer) { DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"content3", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED); f->setOmitTermFreqAndPositions(true); doc->add(f); f = newLucene(L"content4", L"aaa", Field::STORE_YES, Field::INDEX_NO); f->setOmitTermFreqAndPositions(true); doc->add(f); writer->addDocument(doc); } static void createIndex(const String& dirName, bool doCFS) { FileUtils::removeDirectory(dirName); String fullName(fullDir(dirName)); DirectoryPtr dir = FSDirectory::open(fullName); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(doCFS); writer->setMaxBufferedDocs(10); for (int32_t i = 0; i < 35; ++i) addDoc(writer, i); BOOST_CHECK_EQUAL(35, writer->maxDoc()); writer->close(); // open fresh writer so we get no prx file in the added segment writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(doCFS); writer->setMaxBufferedDocs(10); addNoProxDoc(writer); writer->close(); // Delete one doc so we get a .del file: IndexReaderPtr reader = IndexReader::open(dir, false); TermPtr searchTerm = newLucene(L"id", L"7"); int32_t delCount = reader->deleteDocuments(searchTerm); BOOST_CHECK_EQUAL(1, delCount); // delete the right number of documents // Set one norm so we get a .s0 file: reader->setNorm(21, L"content", 1.5); reader->close(); } static void copyIndex(const String& dirName) { String dirSource(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"legacyindex"), dirName)); String dirDest(FileUtils::joinPath(getTempDir(), dirName)); FileUtils::copyDirectory(dirSource, dirDest); } static const wchar_t* oldNames[] = { L"19.cfs", L"19.nocfs", L"20.cfs", L"20.nocfs", L"21.cfs", L"21.nocfs", L"22.cfs", L"22.nocfs", L"23.cfs", L"23.nocfs", L"24.cfs", L"24.nocfs", L"29.cfs", L"29.nocfs" }; static const int32_t oldNamesLength = SIZEOF_ARRAY(oldNames); namespace CheckCompressedFields { class CompressedFieldSelector : public FieldSelector { public: virtual ~CompressedFieldSelector() { }; LUCENE_CLASS(CompressedFieldSelector); public: virtual FieldSelectorResult accept(const String& fieldName) { return fieldName == L"compressed" ? FieldSelector::SELECTOR_SIZE : FieldSelector::SELECTOR_LOAD; } }; } void checkCompressedFields29(DirectoryPtr dir, bool shouldStillBeCompressed) { int32_t count = 0; static String TEXT_TO_COMPRESS = L"this is a compressed field and should appear in 3.0 as an uncompressed field after merge"; int32_t TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.length() * 2; // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields static uint8_t BINARY_TO_COMPRESS[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; int32_t BINARY_PLAIN_LENGTH = SIZEOF_ARRAY(BINARY_TO_COMPRESS); IndexReaderPtr reader = IndexReader::open(dir, true); LuceneException finally; try { // look into sub readers and check if raw merge is on/off Collection readers = Collection::newInstance(); ReaderUtil::gatherSubReaders(readers, reader); for (Collection::iterator ir = readers.begin(); ir != readers.end(); ++ir) { FieldsReaderPtr fr = boost::dynamic_pointer_cast(*ir)->getFieldsReader(); // for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index BOOST_CHECK_NE(shouldStillBeCompressed, fr->canReadRawDocs()); } // test that decompression works correctly for (int32_t i = 0; i < reader->maxDoc(); ++i) { if (!reader->isDeleted(i)) { DocumentPtr d = reader->document(i); if (!d->get(L"content3").empty()) continue; ++count; FieldablePtr compressed = d->getFieldable(L"compressed"); if (StringUtils::toInt(d->get(L"id")) % 2 == 0) { BOOST_CHECK(!compressed->isBinary()); BOOST_CHECK_EQUAL(TEXT_TO_COMPRESS, compressed->stringValue()); // correctly decompressed string } else { BOOST_CHECK(compressed->isBinary()); BOOST_CHECK(std::memcmp(BINARY_TO_COMPRESS, compressed->getBinaryValue().get(), BINARY_PLAIN_LENGTH) == 0); // correctly decompressed binary } } } // check if field was decompressed after optimize for (int32_t i = 0; i < reader->maxDoc(); ++i) { if (!reader->isDeleted(i)) { DocumentPtr d = reader->document(i, newLucene()); if (!d->get(L"content3").empty()) continue; ++count; // read the size from the binary value using DataInputStream (this prevents us from doing the shift ops ourselves) uint8_t* ds = d->getFieldable(L"compressed")->getBinaryValue().get(); int32_t actualSize = ((ds[0] & 0xff) << 24) + ((ds[1] & 0xff) << 16) + ((ds[2] & 0xff) << 8) + (ds[3] & 0xff); int32_t compressedSize = StringUtils::toInt(d->get(L"compressedSize")); bool binary = (StringUtils::toInt(d->get(L"id")) % 2 > 0); int32_t shouldSize = shouldStillBeCompressed ? compressedSize : (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH); BOOST_CHECK_EQUAL(shouldSize, actualSize); if (!shouldStillBeCompressed) BOOST_CHECK_NE(compressedSize, actualSize); } } BOOST_CHECK_EQUAL(34 * 2, count); // correct number of tests } catch (LuceneException& e) { finally = e; } reader->close(); finally.throwException(); } static void testHits(Collection hits, int32_t expectedCount, IndexReaderPtr reader) { int32_t hitCount = hits.size(); BOOST_CHECK_EQUAL(expectedCount, hitCount); for (int32_t i = 0; i < hitCount; ++i) { reader->document(hits[i]->doc); reader->getTermFreqVectors(hits[i]->doc); } } static void searchIndex(const String& dirName, const String& oldName) { String dirPath = fullDir(dirName); DirectoryPtr dir = FSDirectory::open(dirPath); IndexSearcherPtr searcher = newLucene(dir, true); IndexReaderPtr reader = searcher->getIndexReader(); checkIndex(dir); const uint8_t utf8Field[] = {0x4c, 0x75, 0xf0, 0x9d, 0x84, 0x9e, 0x63, 0x65, 0xf0, 0x9d, 0x85, 0xa0, 0x6e, 0x65, 0x20, 0x00, 0x20, 0xe2, 0x98, 0xa0, 0x20, 0x61, 0x62, 0xf1, 0x95, 0xb0, 0x97, 0x63, 0x64}; const uint8_t utf8Field2[] = {0x66, 0x69, 0x65, 0xe2, 0xb1, 0xb7, 0x6c, 0x64}; const uint8_t utf8Lucene[] = {0x4c, 0x75, 0xf0, 0x9d, 0x84, 0x9e, 0x63, 0x65, 0xf0, 0x9d, 0x85, 0xa0, 0x6e, 0x65}; const uint8_t utf8Abcd[] = {0x61, 0x62, 0xf1, 0x95, 0xb0, 0x97, 0x63, 0x64}; const wchar_t _zeroField[] = {0x0000}; String zeroField(_zeroField, SIZEOF_ARRAY(_zeroField)); for (int32_t i = 0; i < 35; ++i) { if (!reader->isDeleted(i)) { DocumentPtr d = reader->document(i); Collection fields = d->getFields(); if (!boost::starts_with(oldName, L"19.") && !boost::starts_with(oldName, L"20.") && !boost::starts_with(oldName, L"21.") && !boost::starts_with(oldName, L"22.")) { if (!d->getField(L"content3")) { int32_t numFields = boost::starts_with(oldName, L"29.") ? 7 : 5; BOOST_CHECK_EQUAL(numFields, fields.size()); FieldPtr f = boost::dynamic_pointer_cast(d->getField(L"id")); BOOST_CHECK_EQUAL(StringUtils::toString(i), f->stringValue()); f = boost::dynamic_pointer_cast(d->getField(L"utf8")); BOOST_CHECK_EQUAL(UTF8_TO_STRING(utf8Field), f->stringValue()); f = boost::dynamic_pointer_cast(d->getField(L"autf8")); BOOST_CHECK_EQUAL(UTF8_TO_STRING(utf8Field), f->stringValue()); f = boost::dynamic_pointer_cast(d->getField(L"content2")); BOOST_CHECK_EQUAL(L"here is more content with aaa aaa aaa", f->stringValue()); f = boost::dynamic_pointer_cast(d->getField(UTF8_TO_STRING(utf8Field2))); BOOST_CHECK_EQUAL(L"field with non-ascii name", f->stringValue()); } } } else { // Only ID 7 is deleted BOOST_CHECK_EQUAL(7, i); } } Collection hits = searcher->search(newLucene(newLucene(L"content", L"aaa")), FilterPtr(), 1000)->scoreDocs; // First document should be #21 since it's norm was increased DocumentPtr d = searcher->doc(hits[0]->doc); BOOST_CHECK_EQUAL(L"21", d->get(L"id")); // get the right document first testHits(hits, 34, searcher->getIndexReader()); if (!boost::starts_with(oldName, L"19.") && !boost::starts_with(oldName, L"20.") && !boost::starts_with(oldName, L"21.") && !boost::starts_with(oldName, L"22.")) { // Test on indices >= 2.3 hits = searcher->search(newLucene(newLucene(L"utf8", zeroField)), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(34, hits.size()); hits = searcher->search(newLucene(newLucene(L"utf8", UTF8_TO_STRING(utf8Lucene))), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(34, hits.size()); hits = searcher->search(newLucene(newLucene(L"utf8", UTF8_TO_STRING(utf8Abcd))), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(34, hits.size()); } searcher->close(); dir->close(); } // Open pre-lockless index, add docs, do a delete and setNorm, and search static void changeIndexNoAdds(const String& dirName) { String dirPath = fullDir(dirName); DirectoryPtr dir = FSDirectory::open(dirPath); // make sure searching sees right # hits IndexSearcherPtr searcher = newLucene(dir, true); Collection hits = searcher->search(newLucene(newLucene(L"content", L"aaa")), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(34, hits.size()); // number of hits DocumentPtr d = searcher->doc(hits[0]->doc); BOOST_CHECK_EQUAL(L"21", d->get(L"id")); // first document searcher->close(); // make sure we can do a delete & setNorm against this pre-lockless segment IndexReaderPtr reader = IndexReader::open(dir, false); TermPtr searchTerm = newLucene(L"id", L"6"); int32_t delCount = reader->deleteDocuments(searchTerm); BOOST_CHECK_EQUAL(1, delCount); // delete count reader->setNorm(22, L"content", 2.0); reader->close(); // make sure they "took" searcher = newLucene(dir, true); hits = searcher->search(newLucene(newLucene(L"content", L"aaa")), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(33, hits.size()); // number of hits d = searcher->doc(hits[0]->doc); BOOST_CHECK_EQUAL(L"22", d->get(L"id")); // first document testHits(hits, 33, searcher->getIndexReader()); searcher->close(); // optimize IndexWriterPtr writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthUNLIMITED); writer->optimize(); writer->close(); searcher = newLucene(dir, true); hits = searcher->search(newLucene(newLucene(L"content", L"aaa")), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(33, hits.size()); // number of hits d = searcher->doc(hits[0]->doc); BOOST_CHECK_EQUAL(L"22", d->get(L"id")); // first document testHits(hits, 33, searcher->getIndexReader()); searcher->close(); dir->close(); } // Open pre-lockless index, add docs, do a delete and setNorm, and search static void changeIndexWithAdds(const String& dirName) { String origDirName(dirName); String dirPath = fullDir(dirName); DirectoryPtr dir = FSDirectory::open(dirPath); // open writer IndexWriterPtr writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthUNLIMITED); // add 10 docs for (int32_t i = 0; i < 10; ++i) addDoc(writer, 35 + i); // make sure writer sees right total - writer seems not to know about deletes in .del? int32_t dirNumber = StringUtils::toInt(dirName.substr(0, 2)); int32_t expected = dirNumber < 24 ? 45 : 46; BOOST_CHECK_EQUAL(expected, writer->maxDoc()); // doc count writer->close(); // make sure searching sees right # hits IndexSearcherPtr searcher = newLucene(dir, true); Collection hits = searcher->search(newLucene(newLucene(L"content", L"aaa")), FilterPtr(), 1000)->scoreDocs; DocumentPtr d = searcher->doc(hits[0]->doc); BOOST_CHECK_EQUAL(L"21", d->get(L"id")); // first document testHits(hits, 44, searcher->getIndexReader()); searcher->close(); // make sure we can do delete & setNorm against this pre-lockless segment IndexReaderPtr reader = IndexReader::open(dir, false); TermPtr searchTerm = newLucene(L"id", L"6"); int32_t delCount = reader->deleteDocuments(searchTerm); BOOST_CHECK_EQUAL(1, delCount); // delete count reader->setNorm(22, L"content", 2.0); reader->close(); // make sure they "took" searcher = newLucene(dir, true); hits = searcher->search(newLucene(newLucene(L"content", L"aaa")), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(43, hits.size()); // number of hits d = searcher->doc(hits[0]->doc); BOOST_CHECK_EQUAL(L"22", d->get(L"id")); // first document testHits(hits, 43, searcher->getIndexReader()); searcher->close(); // optimize writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthUNLIMITED); writer->optimize(); writer->close(); searcher = newLucene(dir, true); hits = searcher->search(newLucene(newLucene(L"content", L"aaa")), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(43, hits.size()); // number of hits d = searcher->doc(hits[0]->doc); testHits(hits, 43, searcher->getIndexReader()); BOOST_CHECK_EQUAL(L"22", d->get(L"id")); // first document searcher->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testCreateCFS) { String dirName(L"testindex.cfs"); createIndex(dirName, true); rmDir(dirName); } BOOST_AUTO_TEST_CASE(testCreateNoCFS) { String dirName(L"testindex.nocfs"); createIndex(dirName, true); rmDir(dirName); } BOOST_AUTO_TEST_CASE(testOptimizeOldIndex) { int32_t hasTested29 = 0; for (int32_t i = 0; i < oldNamesLength; ++i) { copyIndex(oldNames[i]); String dirName(fullDir(oldNames[i])); DirectoryPtr dir = FSDirectory::open(dirName); if (boost::starts_with(oldNames[i], L"29.")) { checkCompressedFields29(dir, true); ++hasTested29; } IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->optimize(); w->close(); checkIndex(dir); if (boost::starts_with(oldNames[i], L"29.")) { checkCompressedFields29(dir, false); ++hasTested29; } dir->close(); rmDir(oldNames[i]); } BOOST_CHECK_EQUAL(4, hasTested29); // test for compressed field should have run 4 times } BOOST_AUTO_TEST_CASE(testSearchOldIndex) { for (int32_t i = 0; i < oldNamesLength; ++i) { copyIndex(oldNames[i]); String dirName(fullDir(oldNames[i])); searchIndex(oldNames[i], oldNames[i]); rmDir(oldNames[i]); } } BOOST_AUTO_TEST_CASE(testIndexOldIndexNoAdds) { for (int32_t i = 0; i < oldNamesLength; ++i) { copyIndex(oldNames[i]); String dirName(fullDir(oldNames[i])); changeIndexNoAdds(oldNames[i]); rmDir(oldNames[i]); } } BOOST_AUTO_TEST_CASE(testIndexOldIndex) { for (int32_t i = 0; i < oldNamesLength; ++i) { copyIndex(oldNames[i]); String dirName(fullDir(oldNames[i])); changeIndexWithAdds(oldNames[i]); rmDir(oldNames[i]); } } // Verifies that the expected file names were produced BOOST_AUTO_TEST_CASE(testExactFileNames) { String outputDir = L"lucene.backwardscompat0.index"; rmDir(outputDir); LuceneException finally; try { DirectoryPtr dir = FSDirectory::open(fullDir(outputDir)); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setRAMBufferSizeMB(16.0); for (int32_t i = 0; i < 35; ++i) addDoc(writer, i); BOOST_CHECK_EQUAL(35, writer->maxDoc()); // doc count writer->close(); // Delete one doc so we get a .del file IndexReaderPtr reader = IndexReader::open(dir, false); TermPtr searchTerm = newLucene(L"id", L"7"); int32_t delCount = reader->deleteDocuments(searchTerm); BOOST_CHECK_EQUAL(1, delCount); // delete the right number of documents // Set one norm so we get a .s0 file reader->setNorm(21, L"content", 1.5); reader->close(); CompoundFileReaderPtr cfsReader = newLucene(dir, L"_0.cfs"); FieldInfosPtr fieldInfos = newLucene(cfsReader, L"_0.fnm"); int32_t contentFieldIndex = -1; for (int32_t i = 0; i < fieldInfos->size(); ++i) { FieldInfoPtr fi = fieldInfos->fieldInfo(i); if (fi->name == L"content") { contentFieldIndex = i; break; } } cfsReader->close(); BOOST_CHECK_NE(contentFieldIndex, -1); // locate the 'content' field number in the _2.cfs segment // Now verify file names HashSet expected = HashSet::newInstance(); expected.add(L"_0.cfs"); expected.add(L"_0_1.del"); expected.add(L"_0_1.s" + StringUtils::toString(contentFieldIndex)); expected.add(L"segments_3"); expected.add(L"segments.gen"); HashSet actual = dir->listAll(); BOOST_CHECK_EQUAL(expected, actual); dir->close(); } catch (LuceneException& e) { finally = e; } rmDir(outputDir); finally.throwException(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/ByteSlicesTest.cpp000066400000000000000000000106541217574114600236100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "ByteBlockPool.h" #include "DocumentsWriter.h" #include "ByteSliceWriter.h" #include "ByteSliceReader.h" #include "IndexWriter.h" #include "Random.h" #include "MiscUtils.h" using namespace Lucene; DECLARE_SHARED_PTR(TestByteBlockAllocator) class TestByteBlockAllocator : public ByteBlockPoolAllocatorBase { public: TestByteBlockAllocator() { this->freeByteBlocks = Collection::newInstance(); } virtual ~TestByteBlockAllocator() { } LUCENE_CLASS(TestByteBlockAllocator); public: Collection freeByteBlocks; public: virtual ByteArray getByteBlock(bool trackAllocations) { SyncLock syncLock(this); int32_t size = freeByteBlocks.size(); ByteArray b; if (size == 0) { b = ByteArray::newInstance(DocumentsWriter::BYTE_BLOCK_SIZE); MiscUtils::arrayFill(b.get(), 0, b.size(), 0); } else b = freeByteBlocks.removeLast(); return b; } virtual void recycleByteBlocks(Collection blocks, int32_t start, int32_t end) { SyncLock syncLock(this); for (int32_t i = start; i < end; ++i) freeByteBlocks.add(blocks[i]); } virtual void recycleByteBlocks(Collection blocks) { SyncLock syncLock(this); int32_t size = blocks.size(); for (int32_t i = 0; i < size; ++i) freeByteBlocks.add(blocks[i]); } }; BOOST_FIXTURE_TEST_SUITE(ByteSlicesTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testBasic) { ByteBlockPoolPtr pool = newLucene(newLucene(), false); int32_t NUM_STREAM = 25; ByteSliceWriterPtr writer = newLucene(pool); Collection starts = Collection::newInstance(NUM_STREAM); Collection uptos = Collection::newInstance(NUM_STREAM); Collection counters = Collection::newInstance(NUM_STREAM); RandomPtr r = newLucene(); ByteSliceReaderPtr reader = newLucene(); for (int32_t ti = 0; ti < 100; ++ti) { for (int32_t stream = 0; stream < NUM_STREAM; ++stream) { starts[stream] = -1; counters[stream] = 0; } bool debug = false; for (int32_t iter = 0; iter < 10000; ++iter) { int32_t stream = r->nextInt(NUM_STREAM); if (debug) std::wcout << L"write stream=" << stream << L"\n"; if (starts[stream] == -1) { int32_t spot = pool->newSlice(ByteBlockPool::FIRST_LEVEL_SIZE()); uptos[stream] = spot + pool->byteOffset; starts[stream] = uptos[stream]; if (debug) std::wcout << L" init to " << starts[stream] << L"\n"; } writer->init(uptos[stream]); int32_t numValue = r->nextInt(20); for (int32_t j = 0; j < numValue; ++j) { if (debug) std::wcout << L" write " << (counters[stream] + j) << L"\n"; writer->writeVInt(counters[stream] + j); } counters[stream] += numValue; uptos[stream] = writer->getAddress(); if (debug) std::wcout << L" addr now " << uptos[stream] << L"\n"; } for (int32_t stream = 0; stream < NUM_STREAM; ++stream) { if (debug) std::wcout << L" stream=" << stream << L" count=" << counters[stream] << L"\n"; if (starts[stream] != uptos[stream]) { reader->init(pool, starts[stream], uptos[stream]); for (int32_t j = 0; j < counters[stream]; ++j) BOOST_CHECK_EQUAL(j, reader->readVInt()); } } pool->reset(); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/CheckIndexTest.cpp000066400000000000000000000053011217574114600235400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "WhitespaceAnalyzer.h" #include "IndexReader.h" #include "CheckIndex.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(CheckIndexTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testDeletedDocs) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); for (int32_t i = 0; i < 19; ++i) writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocument(5); reader->close(); CheckIndexPtr checker = newLucene(dir); IndexStatusPtr indexStatus = checker->checkIndex(); BOOST_CHECK(indexStatus->clean); SegmentInfoStatusPtr seg = indexStatus->segmentInfos[0]; BOOST_CHECK(seg->openReaderPassed); BOOST_CHECK(seg->diagnostics); BOOST_CHECK(seg->fieldNormStatus); BOOST_CHECK(seg->fieldNormStatus->error.isNull()); BOOST_CHECK_EQUAL(1, seg->fieldNormStatus->totFields); BOOST_CHECK(seg->termIndexStatus); BOOST_CHECK(seg->termIndexStatus->error.isNull()); BOOST_CHECK_EQUAL(1, seg->termIndexStatus->termCount); BOOST_CHECK_EQUAL(19, seg->termIndexStatus->totFreq); BOOST_CHECK_EQUAL(18, seg->termIndexStatus->totPos); BOOST_CHECK(seg->storedFieldStatus); BOOST_CHECK(seg->storedFieldStatus->error.isNull()); BOOST_CHECK_EQUAL(18, seg->storedFieldStatus->docCount); BOOST_CHECK_EQUAL(18, seg->storedFieldStatus->totFields); BOOST_CHECK(seg->termVectorStatus); BOOST_CHECK(seg->termVectorStatus->error.isNull()); BOOST_CHECK_EQUAL(18, seg->termVectorStatus->docCount); BOOST_CHECK_EQUAL(18, seg->termVectorStatus->totVectors); BOOST_CHECK(!seg->diagnostics.empty()); Collection onlySegments = Collection::newInstance(); onlySegments.add(L"_0"); BOOST_CHECK(checker->checkIndex(onlySegments)->clean); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/CompoundFileTest.cpp000066400000000000000000000436171217574114600241330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "SimpleFSDirectory.h" #include "_SimpleFSDirectory.h" #include "IndexOutput.h" #include "IndexInput.h" #include "CompoundFileWriter.h" #include "CompoundFileReader.h" #include "Random.h" #include "MiscUtils.h" #include "FileUtils.h" using namespace Lucene; class CompoundFileTestFixture : public LuceneTestFixture { public: CompoundFileTestFixture() { indexDir = FileUtils::joinPath(getTempDir(), L"testIndex"); FileUtils::removeDirectory(indexDir); // use a simple FSDir here, to be sure to have SimpleFSInputs dir = newLucene(indexDir); } virtual ~CompoundFileTestFixture() { dir->close(); FileUtils::removeDirectory(indexDir); } protected: String indexDir; DirectoryPtr dir; public: /// Creates a file of the specified size with random data. void createRandomFile(DirectoryPtr dir, const String& name, int32_t size) { IndexOutputPtr os = dir->createOutput(name); RandomPtr r = newLucene(); for (int32_t i = 0; i < size; ++i) os->writeByte((uint8_t)r->nextInt(256)); os->close(); } void createSequenceFile(DirectoryPtr dir, const String& name, uint8_t start, int32_t size) { IndexOutputPtr os = dir->createOutput(name); for (int32_t i = 0; i < size; ++i) { os->writeByte(start); ++start; } os->close(); } void checkSameStreams(IndexInputPtr expected, IndexInputPtr test) { BOOST_CHECK(expected); BOOST_CHECK(test); BOOST_CHECK_EQUAL(expected->length(), test->length()); BOOST_CHECK_EQUAL(expected->getFilePointer(), test->getFilePointer()); ByteArray expectedBuffer(ByteArray::newInstance(512)); ByteArray testBuffer(ByteArray::newInstance(expectedBuffer.size())); int64_t remainder = expected->length() - expected->getFilePointer(); while (remainder > 0) { int32_t readLen = std::min((int32_t)remainder, expectedBuffer.size()); expected->readBytes(expectedBuffer.get(), 0, readLen); test->readBytes(testBuffer.get(), 0, readLen); checkEqualArrays(expectedBuffer, testBuffer, 0, readLen); remainder -= readLen; } } void checkSameStreams(IndexInputPtr expected, IndexInputPtr actual, int64_t seekTo) { if (seekTo >= 0 && seekTo < (int64_t)expected->length()) { expected->seek(seekTo); actual->seek(seekTo); checkSameStreams(expected, actual); } } void checkSameSeekBehavior(IndexInputPtr expected, IndexInputPtr actual) { // seek to 0 int64_t point = 0; checkSameStreams(expected, actual, point); // seek to middle point = expected->length() / 2l; checkSameStreams(expected, actual, point); // seek to end - 2 point = expected->length() - 2; checkSameStreams(expected, actual, point); // seek to end - 1 point = expected->length() - 1; checkSameStreams(expected, actual, point); // seek to the end point = expected->length(); checkSameStreams(expected, actual, point); // seek past end point = expected->length() + 1; checkSameStreams(expected, actual, point); } void checkEqualArrays(ByteArray expected, ByteArray test, int32_t start, int32_t length) { BOOST_CHECK(expected); BOOST_CHECK(test); for (int32_t i = start; i < length; ++i) BOOST_CHECK_EQUAL(expected[i], test[i]); } /// Setup a larger compound file with a number of components, each of which is a sequential file (so that we can /// easily tell that we are reading in the right byte). The methods sets up 20 files - f0 to f19, the size of each /// file is 1000 bytes. void setUpLarger() { CompoundFileWriterPtr cw = newLucene(dir, L"f.comp"); for (int32_t i = 0; i < 20; ++i) { createSequenceFile(dir, L"f" + StringUtils::toString(i), 0, 2000); cw->addFile(L"f" + StringUtils::toString(i)); } cw->close(); } bool isCSIndexInputOpen(IndexInputPtr is) { if (MiscUtils::typeOf(is)) { CSIndexInputPtr cis = boost::dynamic_pointer_cast(is); return isSimpleFSIndexInputOpen(cis->base); } else return false; } bool isSimpleFSIndexInputOpen(IndexInputPtr is) { if (MiscUtils::typeOf(is)) { SimpleFSIndexInputPtr fis = boost::dynamic_pointer_cast(is); return fis->isValid(); } else return false; } }; BOOST_FIXTURE_TEST_SUITE(CompoundFileTest, CompoundFileTestFixture) /// This test creates compound file based on a single file. Files of different sizes are tested: 0, 1, 10, 100 bytes. BOOST_AUTO_TEST_CASE(testSingleFile) { IntArray data(IntArray::newInstance(4)); data[0] = 0; data[1] = 1; data[2] = 10; data[3] = 100; for (int32_t i = 0; i < data.size(); ++i) { String name = L"t" + StringUtils::toString(data[i]); createSequenceFile(dir, name, 0, data[i]); CompoundFileWriterPtr csw = newLucene(dir, name + L".cfs"); csw->addFile(name); csw->close(); CompoundFileReaderPtr csr = newLucene(dir, name + L".cfs"); IndexInputPtr expected = dir->openInput(name); IndexInputPtr actual = csr->openInput(name); checkSameStreams(expected, actual); checkSameSeekBehavior(expected, actual); expected->close(); actual->close(); csr->close(); } } /// This test creates compound file based on two files. BOOST_AUTO_TEST_CASE(testTwoFiles) { createSequenceFile(dir, L"d1", 0, 15); createSequenceFile(dir, L"d2", 0, 114); CompoundFileWriterPtr csw = newLucene(dir, L"d.csf"); csw->addFile(L"d1"); csw->addFile(L"d2"); csw->close(); CompoundFileReaderPtr csr = newLucene(dir, L"d.csf"); IndexInputPtr expected = dir->openInput(L"d1"); IndexInputPtr actual = csr->openInput(L"d1"); checkSameStreams(expected, actual); checkSameSeekBehavior(expected, actual); expected->close(); actual->close(); expected = dir->openInput(L"d2"); actual = csr->openInput(L"d2"); checkSameStreams(expected, actual); checkSameSeekBehavior(expected, actual); expected->close(); actual->close(); csr->close(); } /// This test creates a compound file based on a large number of files of various length. The file content is generated randomly. /// The sizes range from 0 to 1Mb. Some of the sizes are selected to test the buffering logic in the file reading code. /// For this the chunk variable is set to the length of the buffer used internally by the compound file logic. BOOST_AUTO_TEST_CASE(testRandomFiles) { // Setup the test segment String segment = L"test"; int32_t chunk = 1024; // internal buffer size used by the stream createRandomFile(dir, segment + L".zero", 0); createRandomFile(dir, segment + L".one", 1); createRandomFile(dir, segment + L".ten", 10); createRandomFile(dir, segment + L".hundred", 100); createRandomFile(dir, segment + L".big1", chunk); createRandomFile(dir, segment + L".big2", chunk - 1); createRandomFile(dir, segment + L".big3", chunk + 1); createRandomFile(dir, segment + L".big4", 3 * chunk); createRandomFile(dir, segment + L".big5", 3 * chunk - 1); createRandomFile(dir, segment + L".big6", 3 * chunk + 1); createRandomFile(dir, segment + L".big7", 1000 * chunk); // Setup extraneous files createRandomFile(dir, L"onetwothree", 100); createRandomFile(dir, segment + L".notIn", 50); createRandomFile(dir, segment + L".notIn2", 51); // Now test CompoundFileWriterPtr csw = newLucene(dir, L"test.cfs"); Collection data(Collection::newInstance()); data.add(L".zero"); data.add(L".one"); data.add(L".ten"); data.add(L".hundred"); data.add(L".big1"); data.add(L".big2"); data.add(L".big3"); data.add(L".big4"); data.add(L".big5"); data.add(L".big6"); data.add(L".big7"); for (Collection::iterator name = data.begin(); name != data.end(); ++name) csw->addFile(segment + *name); csw->close(); CompoundFileReaderPtr csr = newLucene(dir, L"test.cfs"); for (Collection::iterator name = data.begin(); name != data.end(); ++name) { IndexInputPtr check = dir->openInput(segment + *name); IndexInputPtr test = csr->openInput(segment + *name); checkSameStreams(check, test); checkSameSeekBehavior(check, test); test->close(); check->close(); } csr->close(); } BOOST_AUTO_TEST_CASE(testReadAfterClose) { // Setup the test file - we need more than 1024 bytes IndexOutputPtr os = dir->createOutput(L"test"); for (int32_t i = 0; i < 2000; ++i) os->writeByte((uint8_t)i); os->close(); IndexInputPtr in = dir->openInput(L"test"); // This read primes the buffer in IndexInput uint8_t b = in->readByte(); // Close the file in->close(); // ERROR: this call should fail, but succeeds because the buffer is still filled b = in->readByte(); // ERROR: this call should fail, but succeeds for some reason as well in->seek(1099); BOOST_CHECK_EXCEPTION(in->readByte(), LuceneException, check_exception(LuceneException::IO)); } BOOST_AUTO_TEST_CASE(testClonedStreamsClosing) { setUpLarger(); CompoundFileReaderPtr cr = newLucene(dir, L"f.comp"); // basic clone IndexInputPtr expected = dir->openInput(L"f11"); // this test only works for FSIndexInput BOOST_CHECK(MiscUtils::typeOf(expected)); BOOST_CHECK(isSimpleFSIndexInputOpen(expected)); IndexInputPtr one = cr->openInput(L"f11"); BOOST_CHECK(isCSIndexInputOpen(one)); IndexInputPtr two = boost::dynamic_pointer_cast(one->clone()); BOOST_CHECK(isCSIndexInputOpen(two)); checkSameStreams(expected, one); expected->seek(0); checkSameStreams(expected, two); // Now close the first stream one->close(); BOOST_CHECK(isCSIndexInputOpen(one)); // Only close when cr is closed // The following should really fail since we couldn't expect to access a file once close has been called // on it (regardless of buffering and/or clone magic) expected->seek(0); two->seek(0); checkSameStreams(expected, two); // basic clone two/2 // Now close the compound reader cr->close(); BOOST_CHECK(!isCSIndexInputOpen(one)); BOOST_CHECK(!isCSIndexInputOpen(two)); // The following may also fail since the compound stream is closed expected->seek(0); two->seek(0); // Now close the second clone two->close(); expected->seek(0); two->seek(0); expected->close(); } /// This test opens two files from a compound stream and verifies that their file positions are independent of each other. BOOST_AUTO_TEST_CASE(testRandomAccess) { setUpLarger(); CompoundFileReaderPtr cr = newLucene(dir, L"f.comp"); // Open two files IndexInputPtr e1 = dir->openInput(L"f11"); IndexInputPtr e2 = dir->openInput(L"f3"); IndexInputPtr a1 = cr->openInput(L"f11"); IndexInputPtr a2 = dir->openInput(L"f3"); // Seek the first pair e1->seek(100); a1->seek(100); BOOST_CHECK_EQUAL(100, e1->getFilePointer()); BOOST_CHECK_EQUAL(100, a1->getFilePointer()); uint8_t be1 = e1->readByte(); uint8_t ba1 = a1->readByte(); BOOST_CHECK_EQUAL(be1, ba1); // Now seek the second pair e2->seek(1027); a2->seek(1027); BOOST_CHECK_EQUAL(1027, e2->getFilePointer()); BOOST_CHECK_EQUAL(1027, a2->getFilePointer()); uint8_t be2 = e2->readByte(); uint8_t ba2 = a2->readByte(); BOOST_CHECK_EQUAL(be2, ba2); // Now make sure the first one didn't move BOOST_CHECK_EQUAL(101, e1->getFilePointer()); BOOST_CHECK_EQUAL(101, a1->getFilePointer()); be1 = e1->readByte(); ba1 = a1->readByte(); BOOST_CHECK_EQUAL(be1, ba1); // Now more the first one again, past the buffer length e1->seek(1910); a1->seek(1910); BOOST_CHECK_EQUAL(1910, e1->getFilePointer()); BOOST_CHECK_EQUAL(1910, a1->getFilePointer()); be1 = e1->readByte(); ba1 = a1->readByte(); BOOST_CHECK_EQUAL(be1, ba1); // Now make sure the second set didn't move BOOST_CHECK_EQUAL(1028, e2->getFilePointer()); BOOST_CHECK_EQUAL(1028, a2->getFilePointer()); be2 = e2->readByte(); ba2 = a2->readByte(); BOOST_CHECK_EQUAL(be2, ba2); // Move the second set back, again cross the buffer size e2->seek(17); a2->seek(17); BOOST_CHECK_EQUAL(17, e2->getFilePointer()); BOOST_CHECK_EQUAL(17, a2->getFilePointer()); be2 = e2->readByte(); ba2 = a2->readByte(); BOOST_CHECK_EQUAL(be2, ba2); // Finally, make sure the first set didn't move // Now make sure the first one didn't move BOOST_CHECK_EQUAL(1911, e1->getFilePointer()); BOOST_CHECK_EQUAL(1911, a1->getFilePointer()); be1 = e1->readByte(); ba1 = a1->readByte(); BOOST_CHECK_EQUAL(be1, ba1); e1->close(); e2->close(); a1->close(); a2->close(); cr->close(); } /// This test opens two files from a compound stream and verifies that their file positions are independent of each other. BOOST_AUTO_TEST_CASE(testRandomAccessClones) { setUpLarger(); CompoundFileReaderPtr cr = newLucene(dir, L"f.comp"); // Open two files IndexInputPtr e1 = cr->openInput(L"f11"); IndexInputPtr e2 = cr->openInput(L"f3"); IndexInputPtr a1 = boost::dynamic_pointer_cast(e1->clone()); IndexInputPtr a2 = boost::dynamic_pointer_cast(e2->clone()); // Seek the first pair e1->seek(100); a1->seek(100); BOOST_CHECK_EQUAL(100, e1->getFilePointer()); BOOST_CHECK_EQUAL(100, a1->getFilePointer()); uint8_t be1 = e1->readByte(); uint8_t ba1 = a1->readByte(); BOOST_CHECK_EQUAL(be1, ba1); // Now seek the second pair e2->seek(1027); a2->seek(1027); BOOST_CHECK_EQUAL(1027, e2->getFilePointer()); BOOST_CHECK_EQUAL(1027, a2->getFilePointer()); uint8_t be2 = e2->readByte(); uint8_t ba2 = a2->readByte(); BOOST_CHECK_EQUAL(be2, ba2); // Now make sure the first one didn't move BOOST_CHECK_EQUAL(101, e1->getFilePointer()); BOOST_CHECK_EQUAL(101, a1->getFilePointer()); be1 = e1->readByte(); ba1 = a1->readByte(); BOOST_CHECK_EQUAL(be1, ba1); // Now more the first one again, past the buffer length e1->seek(1910); a1->seek(1910); BOOST_CHECK_EQUAL(1910, e1->getFilePointer()); BOOST_CHECK_EQUAL(1910, a1->getFilePointer()); be1 = e1->readByte(); ba1 = a1->readByte(); BOOST_CHECK_EQUAL(be1, ba1); // Now make sure the second set didn't move BOOST_CHECK_EQUAL(1028, e2->getFilePointer()); BOOST_CHECK_EQUAL(1028, a2->getFilePointer()); be2 = e2->readByte(); ba2 = a2->readByte(); BOOST_CHECK_EQUAL(be2, ba2); // Move the second set back, again cross the buffer size e2->seek(17); a2->seek(17); BOOST_CHECK_EQUAL(17, e2->getFilePointer()); BOOST_CHECK_EQUAL(17, a2->getFilePointer()); be2 = e2->readByte(); ba2 = a2->readByte(); BOOST_CHECK_EQUAL(be2, ba2); // Finally, make sure the first set didn't move // Now make sure the first one didn't move BOOST_CHECK_EQUAL(1911, e1->getFilePointer()); BOOST_CHECK_EQUAL(1911, a1->getFilePointer()); be1 = e1->readByte(); ba1 = a1->readByte(); BOOST_CHECK_EQUAL(be1, ba1); e1->close(); e2->close(); a1->close(); a2->close(); cr->close(); } BOOST_AUTO_TEST_CASE(testFileNotFound) { setUpLarger(); CompoundFileReaderPtr cr = newLucene(dir, L"f.comp"); IndexInputPtr e1; // Open two files BOOST_CHECK_EXCEPTION(e1 = cr->openInput(L"bogus"), LuceneException, check_exception(LuceneException::IO)); cr->close(); } BOOST_AUTO_TEST_CASE(testReadPastEOF) { setUpLarger(); CompoundFileReaderPtr cr = newLucene(dir, L"f.comp"); IndexInputPtr is = cr->openInput(L"f2"); is->seek(is->length() - 10); ByteArray b(ByteArray::newInstance(100)); is->readBytes(b.get(), 0, 10); uint8_t test = 0; BOOST_CHECK_EXCEPTION(test = is->readByte(), LuceneException, check_exception(LuceneException::IO)); is->seek(is->length() - 10); BOOST_CHECK_EXCEPTION(is->readBytes(b.get(), 0, 50), LuceneException, check_exception(LuceneException::IO)); is->close(); cr->close(); } /// This test that writes larger than the size of the buffer output will correctly increment the file pointer. BOOST_AUTO_TEST_CASE(testLargeWrites) { IndexOutputPtr os = dir->createOutput(L"testBufferStart.txt"); RandomPtr r = newLucene(); ByteArray largeBuf(ByteArray::newInstance(2048)); for (int32_t i = 0; i < largeBuf.size(); ++i) largeBuf[i] = (uint8_t)r->nextInt(256); int64_t currentPos = os->getFilePointer(); os->writeBytes(largeBuf.get(), largeBuf.size()); BOOST_CHECK_EQUAL(currentPos + largeBuf.size(), os->getFilePointer()); os->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/ConcurrentMergeSchedulerTest.cpp000066400000000000000000000277711217574114600265130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "SimpleAnalyzer.h" #include "WhitespaceAnalyzer.h" #include "IndexWriter.h" #include "IndexReader.h" #include "ConcurrentMergeScheduler.h" #include "_ConcurrentMergeScheduler.h" #include "Document.h" #include "Field.h" #include "LogDocMergePolicy.h" #include "Term.h" #include "SegmentInfos.h" #include "IndexFileDeleter.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "TestPoint.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(ConcurrentMergeSchedulerTest, LuceneTestFixture) static bool mergeCalled = false; static bool mergeThreadCreated = false; static bool excCalled = false; static void checkNoUnreferencedFiles(DirectoryPtr dir) { HashSet _startFiles = dir->listAll(); SegmentInfosPtr infos = newLucene(); infos->read(dir); IndexFileDeleterPtr deleter = newLucene(dir, newLucene(), infos, InfoStreamPtr(), DocumentsWriterPtr(), HashSet()); HashSet _endFiles = dir->listAll(); Collection startFiles = Collection::newInstance(_startFiles.begin(), _startFiles.end()); Collection endFiles = Collection::newInstance(_endFiles.begin(), _endFiles.end()); std::sort(startFiles.begin(), startFiles.end()); std::sort(endFiles.begin(), endFiles.end()); BOOST_CHECK(startFiles.equals(endFiles)); } namespace TestFlushException { DECLARE_SHARED_PTR(FailOnlyOnFlush) class FailOnlyOnFlush : public MockDirectoryFailure { public: FailOnlyOnFlush() { hitExc = false; mainThread = LuceneThread::currentId(); TestPoint::clear(); } virtual ~FailOnlyOnFlush() { } public: bool hitExc; int64_t mainThread; public: virtual void setDoFail() { MockDirectoryFailure::setDoFail(); hitExc = false; } virtual void clearDoFail() { MockDirectoryFailure::clearDoFail(); this->doFail = false; } virtual void eval(MockRAMDirectoryPtr dir) { if (this->doFail && mainThread == LuceneThread::currentId() && TestPoint::getTestPoint(L"doFlush")) { hitExc = true; boost::throw_exception(IOException(L"now failing during flush")); } } }; DECLARE_SHARED_PTR(TestableIndexWriter) class TestableIndexWriter : public IndexWriter { public: TestableIndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(d, a, create, mfl) { } virtual ~TestableIndexWriter() { } LUCENE_CLASS(TestableIndexWriter); public: using IndexWriter::flush; }; } /// Make sure running background merges still work fine even when we are hitting exceptions during flushing. BOOST_AUTO_TEST_CASE(testFlushExceptions) { MockRAMDirectoryPtr directory = newLucene(); TestFlushException::FailOnlyOnFlushPtr failure = newLucene(); directory->failOn(failure); TestFlushException::TestableIndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); ConcurrentMergeSchedulerPtr cms = newLucene(); writer->setMergeScheduler(cms); writer->setMaxBufferedDocs(2); DocumentPtr doc = newLucene(); FieldPtr idField = newLucene(L"id", L"", Field::STORE_YES, Field::INDEX_NOT_ANALYZED); doc->add(idField); int32_t extraCount = 0; for (int32_t i = 0; i < 10; ++i) { for (int32_t j = 0; j < 20; ++j) { idField->setValue(StringUtils::toString(i * 20 + j)); writer->addDocument(doc); } // must cycle here because sometimes the merge flushes the doc we just added and so there's nothing to // flush, and we don't hit the exception while (true) { try { writer->addDocument(doc); failure->setDoFail(); writer->flush(true, false, true); BOOST_CHECK(!failure->hitExc); ++extraCount; } catch (LuceneException&) { failure->clearDoFail(); break; } } } writer->close(); IndexReaderPtr reader = IndexReader::open(directory, true); BOOST_CHECK_EQUAL(200 + extraCount, reader->numDocs()); reader->close(); directory->close(); } /// Test that deletes committed after a merge started and before it finishes, are correctly merged back BOOST_AUTO_TEST_CASE(testDeleteMerging) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); ConcurrentMergeSchedulerPtr cms = newLucene(); writer->setMergeScheduler(cms); LogDocMergePolicyPtr mp = newLucene(writer); writer->setMergePolicy(mp); // Force degenerate merging so we can get a mix of merging of segments with and without deletes at the start mp->setMinMergeDocs(1000); DocumentPtr doc = newLucene(); FieldPtr idField = newLucene(L"id", L"", Field::STORE_YES, Field::INDEX_NOT_ANALYZED); doc->add(idField); for (int32_t i = 0; i < 10; ++i) { for (int32_t j = 0; j < 100; ++j) { idField->setValue(StringUtils::toString(i * 100 + j)); writer->addDocument(doc); } int32_t delID = i; while (delID < 100 * (1 + i)) { writer->deleteDocuments(newLucene(L"id", StringUtils::toString(delID))); delID += 10; } writer->commit(); } writer->close(); IndexReaderPtr reader = IndexReader::open(directory, true); // Verify that we did not lose any deletes BOOST_CHECK_EQUAL(450, reader->numDocs()); reader->close(); directory->close(); } BOOST_AUTO_TEST_CASE(testNoExtraFiles) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < 7; ++i) { ConcurrentMergeSchedulerPtr cms = newLucene(); writer->setMergeScheduler(cms); writer->setMaxBufferedDocs(2); for (int32_t j = 0; j < 21; ++j) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); checkNoUnreferencedFiles(directory); // Reopen writer = newLucene(directory, newLucene(), false, IndexWriter::MaxFieldLengthUNLIMITED); } writer->close(); directory->close(); } BOOST_AUTO_TEST_CASE(testNoWaitClose) { RAMDirectoryPtr directory = newLucene(); DocumentPtr doc = newLucene(); FieldPtr idField = newLucene(L"id", L"", Field::STORE_YES, Field::INDEX_NOT_ANALYZED); doc->add(idField); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < 10; ++i) { ConcurrentMergeSchedulerPtr cms = newLucene(); writer->setMergeScheduler(cms); writer->setMaxBufferedDocs(2); writer->setMergeFactor(100); for (int32_t j = 0; j < 201; ++j) { idField->setValue(StringUtils::toString(i * 201 + j)); writer->addDocument(doc); } int32_t delID = i * 201; for (int32_t j = 0; j < 20; ++j) { writer->deleteDocuments(newLucene(L"id", StringUtils::toString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we stress out aborting them on close writer->setMergeFactor(3); writer->addDocument(doc); writer->commit(); writer->close(false); IndexReaderPtr reader = IndexReader::open(directory, true); BOOST_CHECK_EQUAL((1 + i) * 182, reader->numDocs()); reader->close(); // Reopen writer = newLucene(directory, newLucene(), false, IndexWriter::MaxFieldLengthUNLIMITED); } writer->close(); directory->close(); // allow time for merge threads to finish LuceneThread::threadSleep(1000); } namespace TestSubclassConcurrentMergeScheduler { DECLARE_SHARED_PTR(MyMergeScheduler) class FailOnlyOnMerge : public MockDirectoryFailure { public: FailOnlyOnMerge() { TestPoint::clear(); } virtual ~FailOnlyOnMerge() { } public: virtual void eval(MockRAMDirectoryPtr dir) { if (TestPoint::getTestPoint(L"doMerge")) boost::throw_exception(IOException(L"now failing during merge")); } }; class MyMergeThread : public MergeThread { public: MyMergeThread(ConcurrentMergeSchedulerPtr merger, IndexWriterPtr writer, OneMergePtr startMerge) : MergeThread(merger, writer, startMerge) { mergeThreadCreated = true; } virtual ~MyMergeThread() { } }; class MyMergeScheduler : public ConcurrentMergeScheduler { public: virtual ~MyMergeScheduler() { } LUCENE_CLASS(MyMergeScheduler); protected: virtual MergeThreadPtr getMergeThread(IndexWriterPtr writer, OneMergePtr merge) { MergeThreadPtr thread = newLucene(shared_from_this(), writer, merge); thread->setThreadPriority(getMergeThreadPriority()); return thread; } virtual void handleMergeException(const LuceneException& exc) { excCalled = true; } virtual void doMerge(OneMergePtr merge) { mergeCalled = true; ConcurrentMergeScheduler::doMerge(merge); } }; } BOOST_AUTO_TEST_CASE(testSubclassConcurrentMergeScheduler) { MockRAMDirectoryPtr dir = newLucene(); dir->failOn(newLucene()); DocumentPtr doc = newLucene(); FieldPtr idField = newLucene(L"id", L"", Field::STORE_YES, Field::INDEX_NOT_ANALYZED); doc->add(idField); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); TestSubclassConcurrentMergeScheduler::MyMergeSchedulerPtr ms = newLucene(); writer->setMergeScheduler(ms); writer->setMaxBufferedDocs(2); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); for (int32_t i = 0; i < 20; ++i) writer->addDocument(doc); ms->sync(); writer->close(); BOOST_CHECK(mergeThreadCreated); BOOST_CHECK(mergeCalled); BOOST_CHECK(excCalled); dir->close(); BOOST_CHECK(ConcurrentMergeScheduler::anyUnhandledExceptions()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/CrashTest.cpp000066400000000000000000000107621217574114600226020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "NoLockFactory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "ConcurrentMergeScheduler.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(CrashTest, LuceneTestFixture) static IndexWriterPtr initIndex(MockRAMDirectoryPtr dir) { dir->setLockFactory(NoLockFactory::getNoLockFactory()); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(10); boost::dynamic_pointer_cast(writer->getMergeScheduler())->setSuppressExceptions(); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"id", L"0", Field::STORE_YES, Field::INDEX_ANALYZED)); for (int32_t i = 0; i < 157; ++i) writer->addDocument(doc); return writer; } static IndexWriterPtr initIndex() { return initIndex(newLucene()); } static void crash(IndexWriterPtr writer) { MockRAMDirectoryPtr dir = boost::dynamic_pointer_cast(writer->getDirectory()); ConcurrentMergeSchedulerPtr cms = boost::dynamic_pointer_cast(writer->getMergeScheduler()); dir->crash(); cms->sync(); dir->clearCrash(); } BOOST_AUTO_TEST_CASE(testCrashWhileIndexing) { IndexWriterPtr writer = initIndex(); MockRAMDirectoryPtr dir = boost::dynamic_pointer_cast(writer->getDirectory()); crash(writer); IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK(reader->numDocs() < 157); } BOOST_AUTO_TEST_CASE(testWriterAfterCrash) { IndexWriterPtr writer = initIndex(); MockRAMDirectoryPtr dir = boost::dynamic_pointer_cast(writer->getDirectory()); dir->setPreventDoubleWrite(false); crash(writer); writer = initIndex(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK(reader->numDocs() < 314); } BOOST_AUTO_TEST_CASE(testCrashAfterReopen) { IndexWriterPtr writer = initIndex(); MockRAMDirectoryPtr dir = boost::dynamic_pointer_cast(writer->getDirectory()); writer->close(); writer = initIndex(dir); BOOST_CHECK_EQUAL(314, writer->maxDoc()); crash(writer); IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK(reader->numDocs() >= 157); } BOOST_AUTO_TEST_CASE(testCrashAfterClose) { IndexWriterPtr writer = initIndex(); MockRAMDirectoryPtr dir = boost::dynamic_pointer_cast(writer->getDirectory()); writer->close(); dir->crash(); IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(157, reader->numDocs()); } BOOST_AUTO_TEST_CASE(testCrashAfterCloseNoWait) { IndexWriterPtr writer = initIndex(); MockRAMDirectoryPtr dir = boost::dynamic_pointer_cast(writer->getDirectory()); writer->close(false); dir->crash(); IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(157, reader->numDocs()); } BOOST_AUTO_TEST_CASE(testCrashReaderDeletes) { IndexWriterPtr writer = initIndex(); MockRAMDirectoryPtr dir = boost::dynamic_pointer_cast(writer->getDirectory()); writer->close(false); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocument(3); dir->crash(); reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(157, reader->numDocs()); } BOOST_AUTO_TEST_CASE(testCrashReaderDeletesAfterClose) { IndexWriterPtr writer = initIndex(); MockRAMDirectoryPtr dir = boost::dynamic_pointer_cast(writer->getDirectory()); writer->close(false); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocument(3); reader->close(); dir->crash(); reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(156, reader->numDocs()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/DeletionPolicyTest.cpp000066400000000000000000000651651217574114600244740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexCommit.h" #include "SegmentInfos.h" #include "IndexDeletionPolicy.h" #include "MockRAMDirectory.h" #include "IndexReader.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "IndexFileNames.h" #include "Document.h" #include "Field.h" #include "SerialMergeScheduler.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "TermQuery.h" #include "Term.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(DeletionPolicyTest, LuceneTestFixture) static void verifyCommitOrder(Collection commits) { IndexCommitPtr firstCommit = commits[0]; int64_t last = SegmentInfos::generationFromSegmentsFileName(firstCommit->getSegmentsFileName()); BOOST_CHECK_EQUAL(last, firstCommit->getGeneration()); int64_t lastVersion = firstCommit->getVersion(); int64_t lastTimestamp = firstCommit->getTimestamp(); for (int32_t i = 1; i < commits.size(); ++i) { IndexCommitPtr commit = commits[i]; int64_t now = SegmentInfos::generationFromSegmentsFileName(commit->getSegmentsFileName()); int64_t nowVersion = commit->getVersion(); int64_t nowTimestamp = commit->getTimestamp(); BOOST_CHECK(now > last); // SegmentInfos commits are out-of-order? BOOST_CHECK(nowVersion > lastVersion); // SegmentInfos versions are out-of-order? BOOST_CHECK(nowTimestamp >= lastTimestamp); // SegmentInfos timestamps are out-of-order? BOOST_CHECK_EQUAL(now, commit->getGeneration()); last = now; lastVersion = nowVersion; lastTimestamp = nowTimestamp; } } static void addDoc(IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } DECLARE_SHARED_PTR(KeepAllDeletionPolicy) DECLARE_SHARED_PTR(KeepNoneOnInitDeletionPolicy) DECLARE_SHARED_PTR(KeepLastNDeletionPolicy) DECLARE_SHARED_PTR(ExpirationTimeDeletionPolicy) class KeepAllDeletionPolicy : public IndexDeletionPolicy { public: KeepAllDeletionPolicy() { numOnInit = 0; numOnCommit = 0; } virtual ~KeepAllDeletionPolicy() { } LUCENE_CLASS(KeepAllDeletionPolicy); public: int32_t numOnInit; int32_t numOnCommit; DirectoryPtr dir; public: virtual void onInit(Collection commits) { verifyCommitOrder(commits); ++numOnInit; } virtual void onCommit(Collection commits) { IndexCommitPtr lastCommit = commits[commits.size() - 1]; IndexReaderPtr r = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(r->isOptimized(), lastCommit->isOptimized()); r->close(); verifyCommitOrder(commits); ++numOnCommit; } }; /// This is useful for adding to a big index when you know readers are not using it. class KeepNoneOnInitDeletionPolicy : public IndexDeletionPolicy { public: KeepNoneOnInitDeletionPolicy() { numOnInit = 0; numOnCommit = 0; } virtual ~KeepNoneOnInitDeletionPolicy() { } LUCENE_CLASS(KeepNoneOnInitDeletionPolicy); public: int32_t numOnInit; int32_t numOnCommit; public: virtual void onInit(Collection commits) { verifyCommitOrder(commits); ++numOnInit; // On init, delete all commit points for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { (*commit)->deleteCommit(); BOOST_CHECK((*commit)->isDeleted()); } } virtual void onCommit(Collection commits) { verifyCommitOrder(commits); int32_t size = commits.size(); // Delete all but last one for (int32_t i = 0; i < size - 1; ++i) commits[i]->deleteCommit(); ++numOnCommit; } }; class KeepLastNDeletionPolicy : public IndexDeletionPolicy { public: KeepLastNDeletionPolicy(int32_t numToKeep) { this->numOnInit = 0; this->numOnCommit = 0; this->numToKeep = numToKeep; this->numDelete = 0; this->seen = HashSet::newInstance(); } virtual ~KeepLastNDeletionPolicy() { } LUCENE_CLASS(KeepLastNDeletionPolicy); public: int32_t numOnInit; int32_t numOnCommit; int32_t numToKeep; int32_t numDelete; HashSet seen; public: virtual void onInit(Collection commits) { verifyCommitOrder(commits); ++numOnInit; // do no deletions on init doDeletes(commits, false); } virtual void onCommit(Collection commits) { verifyCommitOrder(commits); doDeletes(commits, true); } protected: void doDeletes(Collection commits, bool isCommit) { // Assert that we really are only called for each new commit if (isCommit) { String fileName = commits[commits.size() - 1]->getSegmentsFileName(); if (seen.contains(fileName)) BOOST_FAIL("onCommit was called twice on the same commit point"); seen.add(fileName); ++numOnCommit; } int32_t size = commits.size(); for (int32_t i = 0; i < size - numToKeep; ++i) { commits[i]->deleteCommit(); ++numDelete; } } }; /// Delete a commit only when it has been obsoleted by N seconds class ExpirationTimeDeletionPolicy : public IndexDeletionPolicy { public: ExpirationTimeDeletionPolicy(DirectoryPtr dir, double seconds) { this->dir = dir; this->expirationTimeSeconds = seconds; this->numDelete = 0; } virtual ~ExpirationTimeDeletionPolicy() { } LUCENE_CLASS(ExpirationTimeDeletionPolicy); public: DirectoryPtr dir; double expirationTimeSeconds; int32_t numDelete; public: virtual void onInit(Collection commits) { verifyCommitOrder(commits); onCommit(commits); } virtual void onCommit(Collection commits) { verifyCommitOrder(commits); IndexCommitPtr lastCommit = commits[commits.size() - 1]; // Any commit older than expireTime should be deleted double expireTime = dir->fileModified(lastCommit->getSegmentsFileName()) / 1000.0 - expirationTimeSeconds; for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { double modTime = dir->fileModified((*commit)->getSegmentsFileName()) / 1000.0; if (*commit != lastCommit && modTime < expireTime) { (*commit)->deleteCommit(); ++numDelete; } } } }; /// Test "by time expiration" deletion policy BOOST_AUTO_TEST_CASE(testExpirationTimeDeletionPolicy) { const double SECONDS = 2.0; bool useCompoundFile = true; DirectoryPtr dir = newLucene(); ExpirationTimeDeletionPolicyPtr policy = newLucene(dir, SECONDS); IndexWriterPtr writer = newLucene(dir, newLucene(), true, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setUseCompoundFile(useCompoundFile); writer->close(); int64_t lastDeleteTime = 0; for (int32_t i = 0; i < 7; ++i) { // Record last time when writer performed deletes of past commits lastDeleteTime = MiscUtils::currentTimeMillis(); writer = newLucene(dir, newLucene(), false, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setUseCompoundFile(useCompoundFile); for (int32_t j = 0; j < 17; ++j) addDoc(writer); writer->close(); // Make sure to sleep long enough so that some commit points will be deleted LuceneThread::threadSleep(1000.0 * (SECONDS / 5.0)); } // First, make sure the policy in fact deleted something BOOST_CHECK(policy->numDelete > 0); // no commits were deleted // Then simplistic check: just verify that the segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can open a reader on each int64_t gen = SegmentInfos::getCurrentSegmentGeneration(dir); String fileName = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen); dir->deleteFile(IndexFileNames::SEGMENTS_GEN()); while (gen > 0) { try { IndexReaderPtr reader = IndexReader::open(dir, true); reader->close(); fileName = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen); int64_t modTime = dir->fileModified(fileName); BOOST_CHECK(lastDeleteTime - modTime <= (SECONDS * 1000)); } catch (IOException&) { // OK break; } dir->deleteFile(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen)); --gen; } dir->close(); } /// Test a silly deletion policy that keeps all commits around. BOOST_AUTO_TEST_CASE(testKeepAllDeletionPolicy) { for (int32_t pass = 0; pass < 2; ++pass) { bool useCompoundFile = ((pass % 2) != 0); // Never deletes a commit KeepAllDeletionPolicyPtr policy = newLucene(); DirectoryPtr dir = newLucene(); policy->dir = dir; IndexWriterPtr writer = newLucene(dir, newLucene(), true, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(10); writer->setUseCompoundFile(useCompoundFile); writer->setMergeScheduler(newLucene()); for (int32_t i = 0; i < 107; ++i) addDoc(writer); writer->close(); writer = newLucene(dir, newLucene(), false, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setUseCompoundFile(useCompoundFile); writer->optimize(); writer->close(); BOOST_CHECK_EQUAL(2, policy->numOnInit); // If we are not auto committing then there should be exactly 2 commits (one per close above) BOOST_CHECK_EQUAL(2, policy->numOnCommit); // Test listCommits Collection commits = IndexReader::listCommits(dir); // 1 from opening writer + 2 from closing writer BOOST_CHECK_EQUAL(3, commits.size()); // Make sure we can open a reader on each commit for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { IndexReaderPtr r = IndexReader::open(*commit, IndexDeletionPolicyPtr(), false); r->close(); } // Simplistic check: just verify all segments_N's still exist, and, I can open a reader on each dir->deleteFile(IndexFileNames::SEGMENTS_GEN()); int64_t gen = SegmentInfos::getCurrentSegmentGeneration(dir); while (gen > 0) { IndexReaderPtr reader = IndexReader::open(dir, true); reader->close(); dir->deleteFile(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen)); --gen; if (gen > 0) { // Now that we've removed a commit point, which should have orphan'd at least one index file. // Open and close a writer and check that it actually removed something int32_t preCount = dir->listAll().size(); writer = newLucene(dir, newLucene(), false, policy, IndexWriter::MaxFieldLengthLIMITED); writer->close(); int32_t postCount = dir->listAll().size(); BOOST_CHECK(postCount < preCount); } } dir->close(); } } /// Uses KeepAllDeletionPolicy to keep all commits around, then, opens a new IndexWriter on a previous commit point. BOOST_AUTO_TEST_CASE(testOpenPriorSnapshot) { // Never deletes a commit KeepAllDeletionPolicyPtr policy = newLucene(); DirectoryPtr dir = newLucene(); policy->dir = dir; IndexWriterPtr writer = newLucene(dir, newLucene(), (IndexDeletionPolicyPtr)policy, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < 10; ++i) { addDoc(writer); if ((1 + i) % 2 == 0) writer->commit(); } writer->close(); Collection commits = IndexReader::listCommits(dir); BOOST_CHECK_EQUAL(6, commits.size()); IndexCommitPtr lastCommit; for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { if (!lastCommit || (*commit)->getGeneration() > lastCommit->getGeneration()) lastCommit = *commit; } BOOST_CHECK(lastCommit); // Now add 1 doc and optimize writer = newLucene(dir, newLucene(), (IndexDeletionPolicyPtr)policy, IndexWriter::MaxFieldLengthLIMITED); addDoc(writer); BOOST_CHECK_EQUAL(11, writer->numDocs()); writer->optimize(); writer->close(); BOOST_CHECK_EQUAL(7, IndexReader::listCommits(dir).size()); // Now open writer on the commit just before optimize writer = newLucene(dir, newLucene(), policy, IndexWriter::MaxFieldLengthLIMITED, lastCommit); BOOST_CHECK_EQUAL(10, writer->numDocs()); // Should undo our rollback writer->rollback(); IndexReaderPtr r = IndexReader::open(dir, true); // Still optimized, still 11 docs BOOST_CHECK(r->isOptimized()); BOOST_CHECK_EQUAL(11, r->numDocs()); r->close(); writer = newLucene(dir, newLucene(), policy, IndexWriter::MaxFieldLengthLIMITED, lastCommit); BOOST_CHECK_EQUAL(10, writer->numDocs()); // Commits the rollback writer->close(); // Now 8 because we made another commit BOOST_CHECK_EQUAL(8, IndexReader::listCommits(dir).size()); r = IndexReader::open(dir, true); // Not optimized because we rolled it back, and now only 10 docs BOOST_CHECK(!r->isOptimized()); BOOST_CHECK_EQUAL(10, r->numDocs()); r->close(); // Reoptimize writer = newLucene(dir, newLucene(), (IndexDeletionPolicyPtr)policy, IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); writer->close(); r = IndexReader::open(dir, true); BOOST_CHECK(r->isOptimized()); BOOST_CHECK_EQUAL(10, r->numDocs()); r->close(); // Now open writer on the commit just before optimize, but this time keeping only the last commit writer = newLucene(dir, newLucene(), newLucene(), IndexWriter::MaxFieldLengthLIMITED, lastCommit); BOOST_CHECK_EQUAL(10, writer->numDocs()); // Reader still sees optimized index, because writer opened on the prior commit has not yet committed r = IndexReader::open(dir, true); BOOST_CHECK(r->isOptimized()); BOOST_CHECK_EQUAL(10, r->numDocs()); r->close(); writer->close(); // Now reader sees unoptimized index: r = IndexReader::open(dir, true); BOOST_CHECK(!r->isOptimized()); BOOST_CHECK_EQUAL(10, r->numDocs()); r->close(); dir->close(); } /// Test keeping NO commit points. This is a viable and useful case eg where you want to build a big index and you know there are no readers. BOOST_AUTO_TEST_CASE(testKeepNoneOnInitDeletionPolicy) { for (int32_t pass = 0; pass < 2; ++pass) { bool useCompoundFile = ((pass % 2) != 0); KeepNoneOnInitDeletionPolicyPtr policy = newLucene(); DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(10); writer->setUseCompoundFile(useCompoundFile); for (int32_t i = 0; i < 107; ++i) addDoc(writer); writer->close(); writer = newLucene(dir, newLucene(), false, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setUseCompoundFile(useCompoundFile); writer->optimize(); writer->close(); BOOST_CHECK_EQUAL(2, policy->numOnInit); // If we are not auto committing then there should be exactly 2 commits (one per close above) BOOST_CHECK_EQUAL(2, policy->numOnCommit); // Simplistic check: just verify the index is in fact readable IndexReaderPtr reader = IndexReader::open(dir, true); reader->close(); dir->close(); } } /// Test a deletion policy that keeps last N commits. BOOST_AUTO_TEST_CASE(testKeepLastNDeletionPolicy) { int32_t N = 5; for (int32_t pass = 0; pass < 2; ++pass) { bool useCompoundFile = ((pass % 2) != 0); DirectoryPtr dir = newLucene(); KeepLastNDeletionPolicyPtr policy = newLucene(N); for (int32_t j = 0; j < N + 1; ++j) { IndexWriterPtr writer = newLucene(dir, newLucene(), true, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(10); writer->setUseCompoundFile(useCompoundFile); for (int32_t i = 0; i < 17; ++i) addDoc(writer); writer->optimize(); writer->close(); } BOOST_CHECK(policy->numDelete > 0); BOOST_CHECK_EQUAL(N + 1, policy->numOnInit); BOOST_CHECK_EQUAL(N + 1, policy->numOnCommit); // Simplistic check: just verify only the past N segments_N's still exist, and, I can open a reader on each dir->deleteFile(IndexFileNames::SEGMENTS_GEN()); int64_t gen = SegmentInfos::getCurrentSegmentGeneration(dir); for (int32_t i = 0; i < N + 1; ++i) { try { IndexReaderPtr reader = IndexReader::open(dir, true); reader->close(); if (i == N) BOOST_FAIL("should have failed on commits prior to last"); } catch (IOException& e) { if (i != N) BOOST_FAIL(e.getError()); } if (i < N) dir->deleteFile(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen)); --gen; } dir->close(); } } /// Test a deletion policy that keeps last N commits around, with reader doing deletes. BOOST_AUTO_TEST_CASE(testKeepLastNDeletionPolicyWithReader) { int32_t N = 10; for (int32_t pass = 0; pass < 2; ++pass) { bool useCompoundFile = ((pass % 2) != 0); KeepLastNDeletionPolicyPtr policy = newLucene(N); DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setUseCompoundFile(useCompoundFile); writer->close(); TermPtr searchTerm = newLucene(L"content", L"aaa"); QueryPtr query = newLucene(searchTerm); for (int32_t i = 0; i < N + 1; ++i) { writer = newLucene(dir, newLucene(), false, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setUseCompoundFile(useCompoundFile); for (int32_t j = 0; j < 17; ++j) addDoc(writer); // this is a commit writer->close(); IndexReaderPtr reader = IndexReader::open(dir, policy, false); reader->deleteDocument(3 * i + 1); reader->setNorm(4 * i + 1, L"content", 2.0); IndexSearcherPtr searcher = newLucene(reader); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(16 * (1 + i), hits.size()); // this is a commit reader->close(); searcher->close(); } writer = newLucene(dir, newLucene(), false, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setUseCompoundFile(useCompoundFile); writer->optimize(); // this is a commit writer->close(); BOOST_CHECK_EQUAL(2 * (N + 2), policy->numOnInit); BOOST_CHECK_EQUAL(2 * (N + 2) - 1, policy->numOnCommit); IndexSearcherPtr searcher = newLucene(dir, false); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(176, hits.size()); // Simplistic check: just verify only the past N segments_N's still exist, and, I can open a reader on each int64_t gen = SegmentInfos::getCurrentSegmentGeneration(dir); dir->deleteFile(IndexFileNames::SEGMENTS_GEN()); int32_t expectedCount = 176; for (int32_t i = 0; i < N + 1; ++i) { try { IndexReaderPtr reader = IndexReader::open(dir, true); // Work backwards in commits on what the expected count should be. searcher = newLucene(reader); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; if (i > 1) { if (i % 2 == 0) expectedCount += 1; else expectedCount -= 17; } BOOST_CHECK_EQUAL(expectedCount, hits.size()); searcher->close(); reader->close(); if (i == N) BOOST_FAIL("should have failed on commits before last 5"); } catch (IOException& e) { if (i != N) BOOST_FAIL(e.getError()); } if (i < N) dir->deleteFile(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen)); --gen; } dir->close(); } } /// Test a deletion policy that keeps last N commits around, through creates. BOOST_AUTO_TEST_CASE(testKeepLastNDeletionPolicyWithCreates) { int32_t N = 10; for (int32_t pass = 0; pass < 2; ++pass) { bool useCompoundFile = ((pass % 2) != 0); KeepLastNDeletionPolicyPtr policy = newLucene(N); DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(10); writer->setUseCompoundFile(useCompoundFile); writer->close(); TermPtr searchTerm = newLucene(L"content", L"aaa"); QueryPtr query = newLucene(searchTerm); for (int32_t i = 0; i < N + 1; ++i) { writer = newLucene(dir, newLucene(), false, policy, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(10); writer->setUseCompoundFile(useCompoundFile); for (int32_t j = 0; j < 17; ++j) addDoc(writer); // this is a commit writer->close(); IndexReaderPtr reader = IndexReader::open(dir, policy, false); reader->deleteDocument(3); reader->setNorm(5, L"content", 2.0); IndexSearcherPtr searcher = newLucene(reader); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(16, hits.size()); // this is a commit reader->close(); searcher->close(); writer = newLucene(dir, newLucene(), true, policy, IndexWriter::MaxFieldLengthUNLIMITED); // This will not commit: there are no changes pending because we opened for "create" writer->close(); } BOOST_CHECK_EQUAL(1 + 3 * (N + 1), policy->numOnInit); BOOST_CHECK_EQUAL(3 * ( N + 1), policy->numOnCommit); IndexSearcherPtr searcher = newLucene(dir, false); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // Simplistic check: just verify only the past N segments_N's still exist, and, I can open a reader on each int64_t gen = SegmentInfos::getCurrentSegmentGeneration(dir); dir->deleteFile(IndexFileNames::SEGMENTS_GEN()); int32_t expectedCount = 0; for (int32_t i = 0; i < N + 1; ++i) { try { IndexReaderPtr reader = IndexReader::open(dir, true); // Work backwards in commits on what the expected count should be. searcher = newLucene(reader); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(expectedCount, hits.size()); searcher->close(); if (expectedCount == 0) expectedCount = 16; else if (expectedCount == 16) expectedCount = 17; else if (expectedCount == 17) expectedCount = 0; reader->close(); if (i == N) BOOST_FAIL("should have failed on commits before last"); } catch (IOException& e) { if (i != N) BOOST_FAIL(e.getError()); } if (i < N) dir->deleteFile(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen)); --gen; } dir->close(); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/DirectoryReaderTest.cpp000066400000000000000000000176731217574114600246410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "DirectoryReader.h" #include "RAMDirectory.h" #include "Document.h" #include "DocHelper.h" #include "SegmentInfos.h" #include "IndexReader.h" #include "TermFreqVector.h" #include "Field.h" #include "DefaultSimilarity.h" #include "MultiReader.h" #include "StandardAnalyzer.h" #include "IndexWriter.h" #include "TermDocs.h" #include "TermEnum.h" #include "Term.h" using namespace Lucene; class DirectoryReaderTestFixture : public LuceneTestFixture, public DocHelper { public: DirectoryReaderTestFixture() { readers = Collection::newInstance(2); dir = newLucene(); doc1 = newLucene(); doc2 = newLucene(); DocHelper::setupDoc(doc1); DocHelper::setupDoc(doc2); DocHelper::writeDoc(dir, doc1); DocHelper::writeDoc(dir, doc2); sis = newLucene(); sis->read(dir); } virtual ~DirectoryReaderTestFixture() { } protected: DirectoryPtr dir; DocumentPtr doc1; DocumentPtr doc2; Collection readers; SegmentInfosPtr sis; public: void doTestDocument() { sis->read(dir); IndexReaderPtr reader = openReader(); BOOST_CHECK(reader); DocumentPtr newDoc1 = reader->document(0); BOOST_CHECK(newDoc1); BOOST_CHECK(DocHelper::numFields(newDoc1) == DocHelper::numFields(doc1) - DocHelper::unstored.size()); DocumentPtr newDoc2 = reader->document(1); BOOST_CHECK(newDoc2); BOOST_CHECK(DocHelper::numFields(newDoc2) == DocHelper::numFields(doc2) - DocHelper::unstored.size()); TermFreqVectorPtr vector = reader->getTermFreqVector(0, DocHelper::TEXT_FIELD_2_KEY); BOOST_CHECK(vector); checkNorms(reader); } void doTestUndeleteAll() { sis->read(dir); IndexReaderPtr reader = openReader(); BOOST_CHECK(reader); BOOST_CHECK_EQUAL(2, reader->numDocs()); reader->deleteDocument(0); BOOST_CHECK_EQUAL(1, reader->numDocs()); reader->undeleteAll(); BOOST_CHECK_EQUAL(2, reader->numDocs()); // Ensure undeleteAll survives commit/close/reopen reader->commit(MapStringString()); reader->close(); if (boost::dynamic_pointer_cast(reader)) { // MultiReader does not "own" the directory so it does not write the changes to sis on commit sis->commit(dir); } sis->read(dir); reader = openReader(); BOOST_CHECK_EQUAL(2, reader->numDocs()); reader->deleteDocument(0); BOOST_CHECK_EQUAL(1, reader->numDocs()); reader->commit(MapStringString()); reader->close(); if (boost::dynamic_pointer_cast(reader)) { // MultiReader does not "own" the directory so it does not write the changes to sis on commit sis->commit(dir); } sis->read(dir); reader = openReader(); BOOST_CHECK_EQUAL(1, reader->numDocs()); } protected: IndexReaderPtr openReader() { IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK(boost::dynamic_pointer_cast(reader)); BOOST_CHECK(dir); BOOST_CHECK(sis); BOOST_CHECK(reader); return reader; } void checkNorms(IndexReaderPtr reader) { for (Collection::iterator field = DocHelper::fields.begin(); field != DocHelper::fields.end(); ++field) { if ((*field)->isIndexed()) { BOOST_CHECK_EQUAL(reader->hasNorms((*field)->name()), !(*field)->getOmitNorms()); BOOST_CHECK_EQUAL(reader->hasNorms((*field)->name()), !DocHelper::noNorms.contains((*field)->name())); if (!reader->hasNorms((*field)->name())) { // test for fake norms of 1.0 or null depending on the flag ByteArray norms = reader->norms((*field)->name()); uint8_t norm1 = DefaultSimilarity::encodeNorm(1.0); BOOST_CHECK(!norms); norms = ByteArray::newInstance(reader->maxDoc()); reader->norms((*field)->name(), norms, 0); for (int32_t j = 0; j < reader->maxDoc(); ++j) BOOST_CHECK_EQUAL(norms[j], norm1); } } } } void addDoc(RAMDirectoryPtr ramDir1, const String& s, bool create) { IndexWriterPtr iw = newLucene(ramDir1, newLucene(LuceneVersion::LUCENE_CURRENT), create, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"body", s, Field::STORE_YES, Field::INDEX_ANALYZED)); iw->addDocument(doc); iw->close(); } }; BOOST_FIXTURE_TEST_SUITE(DirectoryReaderTest, DirectoryReaderTestFixture) BOOST_AUTO_TEST_CASE(testDirectoryReader) { doTestDocument(); doTestUndeleteAll(); } BOOST_AUTO_TEST_CASE(testIsCurrent) { RAMDirectoryPtr ramDir1 = newLucene(); addDoc(ramDir1, L"test foo", true); RAMDirectoryPtr ramDir2 = newLucene(); addDoc(ramDir2, L"test blah", true); MultiReaderPtr mr = newLucene(newCollection(IndexReader::open(ramDir1, false), IndexReader::open(ramDir2, false))); BOOST_CHECK(mr->isCurrent()); // just opened, must be current addDoc(ramDir1, L"more text", false); BOOST_CHECK(!mr->isCurrent()); // has been modified, not current anymore addDoc(ramDir2, L"even more text", false); BOOST_CHECK(!mr->isCurrent()); // has been modified even more, not current anymore BOOST_CHECK_EXCEPTION(mr->getVersion(), LuceneException, check_exception(LuceneException::UnsupportedOperation)); mr->close(); } BOOST_AUTO_TEST_CASE(testMultiTermDocs) { RAMDirectoryPtr ramDir1 = newLucene(); addDoc(ramDir1, L"test foo", true); RAMDirectoryPtr ramDir2 = newLucene(); addDoc(ramDir2, L"test blah", true); RAMDirectoryPtr ramDir3 = newLucene(); addDoc(ramDir3, L"test wow", true); Collection readers1 = newCollection(IndexReader::open(ramDir1, false), IndexReader::open(ramDir3, false)); Collection readers2 = newCollection(IndexReader::open(ramDir1, false), IndexReader::open(ramDir2, false), IndexReader::open(ramDir3, false)); MultiReaderPtr mr2 = newLucene(readers1); MultiReaderPtr mr3 = newLucene(readers2); // test mixing up TermDocs and TermEnums from different readers. TermDocsPtr td2 = mr2->termDocs(); TermEnumPtr te3 = mr3->terms(newLucene(L"body", L"wow")); td2->seek(te3); int32_t ret = 0; // This should blow up if we forget to check that the TermEnum is from the same reader as the TermDocs. while (td2->next()) ret += td2->doc(); td2->close(); te3->close(); // really a dummy check to ensure that we got some docs and to ensure that nothing is optimized out. BOOST_CHECK(ret > 0); } BOOST_AUTO_TEST_CASE(testAllTermDocs) { IndexReaderPtr reader = openReader(); int32_t NUM_DOCS = 2; TermDocsPtr td = reader->termDocs(TermPtr()); for (int32_t i = 0; i < NUM_DOCS; ++i) { BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(i, td->doc()); BOOST_CHECK_EQUAL(1, td->freq()); } td->close(); reader->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/DocHelper.cpp000066400000000000000000000257331217574114600225530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "DocHelper.h" #include "Field.h" #include "SegmentInfo.h" #include "WhitespaceAnalyzer.h" #include "Similarity.h" #include "Document.h" #include "IndexWriter.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { const wchar_t* DocHelper::FIELD_1_TEXT = L"field one text"; const wchar_t* DocHelper::TEXT_FIELD_1_KEY = L"textField1"; FieldPtr DocHelper::textField1; const wchar_t* DocHelper::FIELD_2_TEXT = L"field field field two text"; // Fields will be lexicographically sorted. So, the order is: field, text, two const int32_t DocHelper::FIELD_2_FREQS[] = {3, 1, 1}; const wchar_t* DocHelper::TEXT_FIELD_2_KEY = L"textField2"; FieldPtr DocHelper::textField2; const wchar_t* DocHelper::FIELD_3_TEXT = L"aaaNoNorms aaaNoNorms bbbNoNorms"; const wchar_t* DocHelper::TEXT_FIELD_3_KEY = L"textField3"; FieldPtr DocHelper::textField3; const wchar_t* DocHelper::KEYWORD_TEXT = L"Keyword"; const wchar_t* DocHelper::KEYWORD_FIELD_KEY = L"keyField"; FieldPtr DocHelper::keyField; const wchar_t* DocHelper::NO_NORMS_TEXT = L"omitNormsText"; const wchar_t* DocHelper::NO_NORMS_KEY = L"omitNorms"; FieldPtr DocHelper::noNormsField; const wchar_t* DocHelper::NO_TF_TEXT = L"analyzed with no tf and positions"; const wchar_t* DocHelper::NO_TF_KEY = L"omitTermFreqAndPositions"; FieldPtr DocHelper::noTFField; const wchar_t* DocHelper::UNINDEXED_FIELD_TEXT = L"unindexed field text"; const wchar_t* DocHelper::UNINDEXED_FIELD_KEY = L"unIndField"; FieldPtr DocHelper::unIndField; const wchar_t* DocHelper::UNSTORED_1_FIELD_TEXT = L"unstored field text"; const wchar_t* DocHelper::UNSTORED_FIELD_1_KEY = L"unStoredField1"; FieldPtr DocHelper::unStoredField1; const wchar_t* DocHelper::UNSTORED_2_FIELD_TEXT = L"unstored field text"; const wchar_t* DocHelper::UNSTORED_FIELD_2_KEY = L"unStoredField2"; FieldPtr DocHelper::unStoredField2; const wchar_t* DocHelper::LAZY_FIELD_BINARY_KEY = L"lazyFieldBinary"; ByteArray DocHelper::LAZY_FIELD_BINARY_BYTES; FieldPtr DocHelper::lazyFieldBinary; const wchar_t* DocHelper::LAZY_FIELD_KEY = L"lazyField"; const wchar_t* DocHelper::LAZY_FIELD_TEXT = L"These are some field bytes"; FieldPtr DocHelper::lazyField; const wchar_t* DocHelper::LARGE_LAZY_FIELD_KEY = L"largeLazyField"; String DocHelper::LARGE_LAZY_FIELD_TEXT; FieldPtr DocHelper::largeLazyField; const uint8_t DocHelper::_FIELD_UTF1_TEXT[] = {0x66, 0x69, 0x65, 0x6c, 0x64, 0x20, 0x6f, 0x6e, 0x65, 0x20, 0xe4, 0xb8, 0x80, 0x74, 0x65, 0x78, 0x74}; const String DocHelper::FIELD_UTF1_TEXT = UTF8_TO_STRING(_FIELD_UTF1_TEXT); const wchar_t* DocHelper::TEXT_FIELD_UTF1_KEY = L"textField1Utf8"; FieldPtr DocHelper::textUtfField1; const uint8_t DocHelper::_FIELD_UTF2_TEXT[] = {0x66, 0x69, 0x65, 0x6c, 0x64, 0x20, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x20, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x20, 0xe4, 0xb8, 0x80, 0x74, 0x77, 0x6f, 0x20, 0x74, 0x65, 0x78, 0x74}; const String DocHelper::FIELD_UTF2_TEXT = UTF8_TO_STRING(_FIELD_UTF2_TEXT); FieldPtr DocHelper::textUtfField2; // Fields will be lexicographically sorted. So, the order is: field, text, two const int32_t DocHelper::FIELD_UTF2_FREQS[] = {3, 1, 1}; const wchar_t* DocHelper::TEXT_FIELD_UTF2_KEY = L"textField2Utf8"; MapStringString DocHelper::nameValues; Collection DocHelper::fields; MapStringField DocHelper::all; MapStringField DocHelper::indexed; MapStringField DocHelper::stored; MapStringField DocHelper::unstored; MapStringField DocHelper::unindexed; MapStringField DocHelper::termvector; MapStringField DocHelper::notermvector; MapStringField DocHelper::lazy; MapStringField DocHelper::noNorms; MapStringField DocHelper::noTf; DocHelper::DocHelper() { static bool setupRequired = true; if (setupRequired) { setup(); setupRequired = false; } } DocHelper::~DocHelper() { } void DocHelper::setup() { textField1 = newLucene(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO); textField2 = newLucene(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); textField3 = newLucene(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field::STORE_YES, Field::INDEX_ANALYZED); textField3->setOmitNorms(true); keyField = newLucene(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field::STORE_YES, Field::INDEX_NOT_ANALYZED); noNormsField = newLucene(NO_NORMS_KEY, NO_NORMS_TEXT, Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS); noTFField = newLucene(NO_TF_KEY, NO_TF_TEXT, Field::STORE_YES, Field::INDEX_ANALYZED); noTFField->setOmitTermFreqAndPositions(true); unIndField = newLucene(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field::STORE_YES, Field::INDEX_NO); unStoredField1 = newLucene(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO); unStoredField2 = newLucene(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES); String binary(L"These are some binary field bytes"); UTF8ResultPtr utf8 = newInstance(); StringUtils::toUTF8(binary.c_str(), binary.length(), utf8); LAZY_FIELD_BINARY_BYTES = ByteArray::newInstance(utf8->length); MiscUtils::arrayCopy(utf8->result.get(), 0, LAZY_FIELD_BINARY_BYTES.get(), 0, utf8->length); lazyFieldBinary = newLucene(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field::STORE_YES); lazyField = newLucene(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field::STORE_YES, Field::INDEX_ANALYZED); if (LARGE_LAZY_FIELD_TEXT.empty()) { LARGE_LAZY_FIELD_TEXT.reserve(550000); for (int32_t i = 0; i < 10000; ++i) LARGE_LAZY_FIELD_TEXT += L"Lazily loading lengths of language in lieu of laughing "; } largeLazyField = newLucene(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field::STORE_YES, Field::INDEX_ANALYZED); textUtfField1 = newLucene(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO); textUtfField2 = newLucene(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); nameValues = MapStringString::newInstance(); nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT); nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT); nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT); nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT); nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT); nameValues.put(NO_TF_KEY, NO_TF_TEXT); nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT); nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT); nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT); nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT); nameValues.put(LAZY_FIELD_BINARY_KEY, L""); nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT); nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT); nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT); fields = Collection::newInstance(); fields.add(textField1); fields.add(textField2); fields.add(textField3); fields.add(keyField); fields.add(noNormsField); fields.add(noTFField); fields.add(unIndField); fields.add(unStoredField1); fields.add(unStoredField2); fields.add(textUtfField1); fields.add(textUtfField2); fields.add(lazyField); fields.add(lazyFieldBinary); fields.add(largeLazyField); all = MapStringField::newInstance(); indexed = MapStringField::newInstance(); stored = MapStringField::newInstance(); unstored = MapStringField::newInstance(); unindexed = MapStringField::newInstance(); termvector = MapStringField::newInstance(); notermvector = MapStringField::newInstance(); lazy = MapStringField::newInstance(); noNorms = MapStringField::newInstance(); noTf = MapStringField::newInstance(); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { all.put((*field)->name(), *field); if ((*field)->isIndexed()) indexed.put((*field)->name(), *field); else unindexed.put((*field)->name(), *field); if ((*field)->isStored()) stored.put((*field)->name(), *field); else unstored.put((*field)->name(), *field); if ((*field)->isTermVectorStored()) termvector.put((*field)->name(), *field); if ((*field)->isIndexed() && !(*field)->isTermVectorStored()) notermvector.put((*field)->name(), *field); if ((*field)->isLazy()) lazy.put((*field)->name(), *field); if ((*field)->getOmitNorms()) noNorms.put((*field)->name(), *field); if ((*field)->getOmitTermFreqAndPositions()) noTf.put((*field)->name(), *field); } } void DocHelper::setupDoc(DocumentPtr doc) { for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) doc->add(*field); } SegmentInfoPtr DocHelper::writeDoc(DirectoryPtr dir, DocumentPtr doc) { return writeDoc(dir, newLucene(), Similarity::getDefault(), doc); } SegmentInfoPtr DocHelper::writeDoc(DirectoryPtr dir, AnalyzerPtr analyzer, SimilarityPtr similarity, DocumentPtr doc) { IndexWriterPtr writer = newLucene(dir, analyzer, IndexWriter::MaxFieldLengthLIMITED); writer->setSimilarity(similarity); writer->addDocument(doc); writer->commit(); SegmentInfoPtr info = writer->newestSegment(); writer->close(); return info; } int32_t DocHelper::numFields(DocumentPtr doc) { return doc->getFields().size(); } } LucenePlusPlus-rel_3.0.4/src/test/index/DocTest.cpp000066400000000000000000000113621217574114600222440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FSDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "SegmentInfo.h" #include "FileReader.h" #include "Document.h" #include "Field.h" #include "SegmentReader.h" #include "SegmentMerger.h" #include "TermEnum.h" #include "TermPositions.h" #include "Term.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(DocTest, LuceneTestFixture) static SegmentInfoPtr indexDoc(IndexWriterPtr writer, const String& fileName) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"contents", newLucene(FileUtils::joinPath(getTestDir(), fileName)))); writer->addDocument(doc); writer->commit(); return writer->newestSegment(); } static void printSegment(StringStream& out, SegmentInfoPtr si) { SegmentReaderPtr reader = SegmentReader::get(true, si, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); for (int32_t i = 0; i < reader->numDocs(); ++i) out << reader->document(i)->toString() << L"\n"; TermEnumPtr tis = reader->terms(); while (tis->next()) { out << tis->term()->toString(); out << L" DF=" << tis->docFreq() << L"\n"; TermPositionsPtr positions = reader->termPositions(tis->term()); LuceneException finally; try { while (positions->next()) { out << L" doc=" << positions->doc(); out << L" TF=" << positions->freq(); out << L" pos="; out << positions->nextPosition() << L"\n"; for (int32_t j = 1; j < positions->freq(); ++j) out << L"," << positions->nextPosition(); } } catch (LuceneException& e) { finally = e; } positions->close(); finally.throwException(); } tis->close(); reader->close(); } static SegmentInfoPtr merge(SegmentInfoPtr si1, SegmentInfoPtr si2, const String& merged, bool useCompoundFile) { SegmentReaderPtr r1 = SegmentReader::get(true, si1, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); SegmentReaderPtr r2 = SegmentReader::get(true, si2, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); SegmentMergerPtr merger = newLucene(si1->dir, merged); merger->add(r1); merger->add(r2); merger->merge(); merger->closeReaders(); if (useCompoundFile) { HashSet filesToDelete = merger->createCompoundFile(merged + L".cfs"); for (HashSet::iterator file = filesToDelete.begin(); file != filesToDelete.end(); ++file) si1->dir->deleteFile(*file); } return newLucene(merged, si1->docCount + si2->docCount, si1->dir, useCompoundFile, true); } BOOST_AUTO_TEST_CASE(testIndexAndMerge) { String indexDir(FileUtils::joinPath(getTempDir(), L"testDoc")); DirectoryPtr directory = FSDirectory::open(indexDir); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); SegmentInfoPtr si1 = indexDoc(writer, L"testdoc1.txt"); StringStream out; printSegment(out, si1); SegmentInfoPtr si2 = indexDoc(writer, L"testdoc2.txt"); printSegment(out, si2); writer->close(); SegmentInfoPtr siMerge = merge(si1, si2, L"merge", false); printSegment(out, siMerge); SegmentInfoPtr siMerge2 = merge(si1, si2, L"merge2", false); printSegment(out, siMerge2); SegmentInfoPtr siMerge3 = merge(siMerge, siMerge2, L"merge3", false); printSegment(out, siMerge3); directory->close(); String multiFileOutput = out.str(); out.str(L""); directory = FSDirectory::open(indexDir); writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); si1 = indexDoc(writer, L"testdoc1.txt"); printSegment(out, si1); si2 = indexDoc(writer, L"testdoc2.txt"); printSegment(out, si2); writer->close(); siMerge = merge(si1, si2, L"merge", true); printSegment(out, siMerge); siMerge2 = merge(si1, si2, L"merge2", true); printSegment(out, siMerge2); siMerge3 = merge(siMerge, siMerge2, L"merge3", true); printSegment(out, siMerge3); directory->close(); String singleFileOutput = out.str(); BOOST_CHECK_EQUAL(multiFileOutput, singleFileOutput); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/DocumentWriterTest.cpp000066400000000000000000000330231217574114600245100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "RAMDirectory.h" #include "Document.h" #include "DocHelper.h" #include "WhitespaceAnalyzer.h" #include "WhitespaceTokenizer.h" #include "SimpleAnalyzer.h" #include "StandardAnalyzer.h" #include "IndexWriter.h" #include "SegmentInfo.h" #include "SegmentReader.h" #include "_SegmentReader.h" #include "Field.h" #include "FieldInfo.h" #include "FieldInfos.h" #include "TermPositions.h" #include "Term.h" #include "TokenFilter.h" #include "TokenStream.h" #include "TermAttribute.h" #include "PayloadAttribute.h" #include "PositionIncrementAttribute.h" #include "Payload.h" #include "TermFreqVector.h" #include "MiscUtils.h" #include "UnicodeUtils.h" using namespace Lucene; class DocumentWriterTestFixture : public LuceneTestFixture, public DocHelper { public: virtual ~DocumentWriterTestFixture() { } }; BOOST_FIXTURE_TEST_SUITE(DocumentWriterTest, DocumentWriterTestFixture) BOOST_AUTO_TEST_CASE(testAddDocument) { RAMDirectoryPtr dir = newLucene(); DocumentPtr testDoc = newLucene(); DocHelper::setupDoc(testDoc); AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(testDoc); writer->commit(); SegmentInfoPtr info = writer->newestSegment(); writer->close(); // After adding the document, we should be able to read it back in SegmentReaderPtr reader = SegmentReader::get(true, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); BOOST_CHECK(reader); DocumentPtr doc = reader->document(0); BOOST_CHECK(doc); Collection fields = doc->getFields(L"textField2"); BOOST_CHECK(fields && fields.size() == 1); BOOST_CHECK_EQUAL(fields[0]->stringValue(), DocHelper::FIELD_2_TEXT); BOOST_CHECK(fields[0]->isTermVectorStored()); fields = doc->getFields(L"textField1"); BOOST_CHECK(fields && fields.size() == 1); BOOST_CHECK_EQUAL(fields[0]->stringValue(), DocHelper::FIELD_1_TEXT); BOOST_CHECK(!fields[0]->isTermVectorStored()); fields = doc->getFields(L"keyField"); BOOST_CHECK(fields && fields.size() == 1); BOOST_CHECK_EQUAL(fields[0]->stringValue(), DocHelper::KEYWORD_TEXT); fields = doc->getFields(DocHelper::NO_NORMS_KEY); BOOST_CHECK(fields && fields.size() == 1); BOOST_CHECK_EQUAL(fields[0]->stringValue(), DocHelper::NO_NORMS_TEXT); fields = doc->getFields(DocHelper::TEXT_FIELD_3_KEY); BOOST_CHECK(fields && fields.size() == 1); BOOST_CHECK_EQUAL(fields[0]->stringValue(), DocHelper::FIELD_3_TEXT); // test that the norms are not present in the segment if omitNorms is true for (int32_t i = 0; i < reader->core->fieldInfos->size(); ++i) { FieldInfoPtr fi = reader->core->fieldInfos->fieldInfo(i); if (fi->isIndexed) BOOST_CHECK(fi->omitNorms == !reader->hasNorms(fi->name)); } } namespace TestPositionIncrementGap { DECLARE_SHARED_PTR(TestableAnalyzer) class TestableAnalyzer : public Analyzer { public: virtual ~TestableAnalyzer() { } LUCENE_CLASS(TestableAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(reader); } virtual int32_t getPositionIncrementGap(const String& fieldName) { return 500; } }; } BOOST_AUTO_TEST_CASE(testPositionIncrementGap) { RAMDirectoryPtr dir = newLucene(); AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"repeated", L"repeated one", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"repeated", L"repeated two", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->commit(); SegmentInfoPtr info = writer->newestSegment(); writer->close(); SegmentReaderPtr reader = SegmentReader::get(true, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); TermPositionsPtr termPositions = reader->termPositions(newLucene(L"repeated", L"repeated")); BOOST_CHECK(termPositions->next()); int32_t freq = termPositions->freq(); BOOST_CHECK_EQUAL(2, freq); BOOST_CHECK_EQUAL(0, termPositions->nextPosition()); BOOST_CHECK_EQUAL(502, termPositions->nextPosition()); } namespace TestTokenReuse { DECLARE_SHARED_PTR(TestableTokenFilter) DECLARE_SHARED_PTR(TestableAnalyzer) class TestableTokenFilter : public TokenFilter { public: TestableTokenFilter(ReaderPtr reader) : TokenFilter(newLucene(reader)) { first = true; termAtt = addAttribute(); payloadAtt = addAttribute(); posIncrAtt = addAttribute(); } virtual ~TestableTokenFilter() { } LUCENE_CLASS(TestableTokenFilter); public: bool first; AttributeSourceStatePtr state; TermAttributePtr termAtt; PayloadAttributePtr payloadAtt; PositionIncrementAttributePtr posIncrAtt; public: virtual bool incrementToken() { if (state) { restoreState(state); payloadAtt->setPayload(PayloadPtr()); posIncrAtt->setPositionIncrement(0); static const wchar_t buffer[] = L"b"; termAtt->setTermBuffer(buffer, 0, 1); state.reset(); return true; } bool hasNext = input->incrementToken(); if (!hasNext) return false; if (UnicodeUtil::isDigit(termAtt->termBufferArray()[0])) posIncrAtt->setPositionIncrement(termAtt->termBufferArray()[0] - L'0'); if (first) { ByteArray payload = ByteArray::newInstance(1); payload.get()[0] = 100; // set payload on first position only payloadAtt->setPayload(newLucene(payload)); first = false; } // index a "synonym" for every token state = captureState(); return true; } }; class TestableAnalyzer : public Analyzer { public: virtual ~TestableAnalyzer() { } LUCENE_CLASS(TestableAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(reader); } }; } BOOST_AUTO_TEST_CASE(testTokenReuse) { RAMDirectoryPtr dir = newLucene(); AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"f1", L"a 5 a a", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->commit(); SegmentInfoPtr info = writer->newestSegment(); writer->close(); SegmentReaderPtr reader = SegmentReader::get(true, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); TermPositionsPtr termPositions = reader->termPositions(newLucene(L"f1", L"a")); BOOST_CHECK(termPositions->next()); int32_t freq = termPositions->freq(); BOOST_CHECK_EQUAL(3, freq); BOOST_CHECK_EQUAL(0, termPositions->nextPosition()); BOOST_CHECK_EQUAL(true, termPositions->isPayloadAvailable()); BOOST_CHECK_EQUAL(6, termPositions->nextPosition()); BOOST_CHECK_EQUAL(false, termPositions->isPayloadAvailable()); BOOST_CHECK_EQUAL(7, termPositions->nextPosition()); BOOST_CHECK_EQUAL(false, termPositions->isPayloadAvailable()); } namespace TestPreAnalyzedField { DECLARE_SHARED_PTR(TestableTokenStream) class TestableTokenStream : public TokenStream { public: TestableTokenStream() { tokens = newCollection(L"term1", L"term2", L"term3", L"term2"); index = 0; termAtt = addAttribute(); } virtual ~TestableTokenStream() { } LUCENE_CLASS(TestableTokenStream); protected: Collection tokens; int32_t index; TermAttributePtr termAtt; public: virtual bool incrementToken() { if (index == tokens.size()) return false; else { clearAttributes(); termAtt->setTermBuffer(tokens[index++]); return true; } } }; } BOOST_AUTO_TEST_CASE(testPreAnalyzedField) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"preanalyzed", newLucene(), Field::TERM_VECTOR_NO)); writer->addDocument(doc); writer->commit(); SegmentInfoPtr info = writer->newestSegment(); writer->close(); SegmentReaderPtr reader = SegmentReader::get(true, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); TermPositionsPtr termPositions = reader->termPositions(newLucene(L"preanalyzed", L"term1")); BOOST_CHECK(termPositions->next()); BOOST_CHECK_EQUAL(1, termPositions->freq()); BOOST_CHECK_EQUAL(0, termPositions->nextPosition()); termPositions->seek(newLucene(L"preanalyzed", L"term2")); BOOST_CHECK(termPositions->next()); BOOST_CHECK_EQUAL(2, termPositions->freq()); BOOST_CHECK_EQUAL(1, termPositions->nextPosition()); BOOST_CHECK_EQUAL(3, termPositions->nextPosition()); termPositions->seek(newLucene(L"preanalyzed", L"term3")); BOOST_CHECK(termPositions->next()); BOOST_CHECK_EQUAL(1, termPositions->freq()); BOOST_CHECK_EQUAL(2, termPositions->nextPosition()); } /// Test adding two fields with the same name, but with different term vector setting BOOST_AUTO_TEST_CASE(testMixedTermVectorSettingsSameField) { RAMDirectoryPtr dir = newLucene(); DocumentPtr doc = newLucene(); // f1 first without tv then with tv doc->add(newLucene(L"f1", L"v1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_NO)); doc->add(newLucene(L"f1", L"v2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); // f2 first with tv then without tv doc->add(newLucene(L"f2", L"v1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"f2", L"v2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_NO)); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc); writer->close(); checkIndex(dir); IndexReaderPtr reader = IndexReader::open(dir, true); // f1 TermFreqVectorPtr tfv1 = reader->getTermFreqVector(0, L"f1"); BOOST_CHECK(tfv1); BOOST_CHECK_EQUAL(2, tfv1->getTerms().size()); // f2 TermFreqVectorPtr tfv2 = reader->getTermFreqVector(0, L"f2"); BOOST_CHECK(tfv2); BOOST_CHECK_EQUAL(2, tfv2->getTerms().size()); } /// Test adding two fields with the same name, one indexed the other stored only. The omitNorms and /// omitTermFreqAndPositions setting of the stored field should not affect the indexed one BOOST_AUTO_TEST_CASE(testMixedTermVectorSettingsSameField2) { RAMDirectoryPtr dir = newLucene(); DocumentPtr doc = newLucene(); // f1 has no norms doc->add(newLucene(L"f1", L"v1", Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS)); doc->add(newLucene(L"f1", L"v2", Field::STORE_YES, Field::INDEX_NO)); // f2 has no TF FieldPtr f = newLucene(L"f2", L"v1", Field::STORE_NO, Field::INDEX_ANALYZED); f->setOmitTermFreqAndPositions(true); doc->add(f); doc->add(newLucene(L"f2", L"v2", Field::STORE_YES, Field::INDEX_NO)); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc); writer->optimize(); // be sure to have a single segment writer->close(); checkIndex(dir); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(dir); FieldInfosPtr fi = reader->fieldInfos(); // f1 BOOST_CHECK(!reader->hasNorms(L"f1")); BOOST_CHECK(!fi->fieldInfo(L"f1")->omitTermFreqAndPositions); // f2 BOOST_CHECK(reader->hasNorms(L"f2")); BOOST_CHECK(fi->fieldInfo(L"f2")->omitTermFreqAndPositions); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/FieldInfosTest.cpp000066400000000000000000000043501217574114600235600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "DocHelper.h" #include "Document.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "RAMDirectory.h" #include "IndexOutput.h" using namespace Lucene; class FieldInfosTestFixture : public LuceneTestFixture, public DocHelper { public: virtual ~FieldInfosTestFixture() { } }; BOOST_FIXTURE_TEST_SUITE(FieldInfosTest, FieldInfosTestFixture) BOOST_AUTO_TEST_CASE(testFieldInfos) { DocumentPtr testDoc = newLucene(); DocHelper::setupDoc(testDoc); // Positive test of FieldInfos BOOST_CHECK(testDoc); FieldInfosPtr fieldInfos = newLucene(); fieldInfos->add(testDoc); // Since the complement is stored as well in the fields map BOOST_CHECK(fieldInfos->size() == DocHelper::all.size()); // this is all because we are using the no-arg constructor RAMDirectoryPtr dir = newLucene(); String name = L"testFile"; IndexOutputPtr output = dir->createOutput(name); BOOST_CHECK(output); // Use a RAMOutputStream fieldInfos->write(output); output->close(); BOOST_CHECK(output->length() > 0); FieldInfosPtr readIn = newLucene(dir, name); BOOST_CHECK(fieldInfos->size() == readIn->size()); FieldInfoPtr info = readIn->fieldInfo(L"textField1"); BOOST_CHECK(info); BOOST_CHECK(!info->storeTermVector); BOOST_CHECK(!info->omitNorms); info = readIn->fieldInfo(L"textField2"); BOOST_CHECK(info); BOOST_CHECK(info->storeTermVector); BOOST_CHECK(!info->omitNorms); info = readIn->fieldInfo(L"textField3"); BOOST_CHECK(info); BOOST_CHECK(!info->storeTermVector); BOOST_CHECK(info->omitNorms); info = readIn->fieldInfo(L"omitNorms"); BOOST_CHECK(info); BOOST_CHECK(!info->storeTermVector); BOOST_CHECK(info->omitNorms); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/FieldsReaderTest.cpp000066400000000000000000000360671217574114600241010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "RAMDirectory.h" #include "Document.h" #include "FieldInfos.h" #include "DocHelper.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "FieldsReader.h" #include "Field.h" #include "SetBasedFieldSelector.h" #include "LoadFirstFieldSelector.h" #include "FSDirectory.h" #include "BufferedIndexInput.h" #include "IndexReader.h" #include "MiscUtils.h" #include "FileUtils.h" using namespace Lucene; class FieldsReaderTestFixture : public LuceneTestFixture, public DocHelper { public: FieldsReaderTestFixture() { dir = newLucene(); testDoc = newLucene(); fieldInfos = newLucene(); DocHelper::setupDoc(testDoc); fieldInfos->add(testDoc); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); writer->addDocument(testDoc); writer->close(); } virtual ~FieldsReaderTestFixture() { } protected: RAMDirectoryPtr dir; DocumentPtr testDoc; FieldInfosPtr fieldInfos; static String TEST_SEGMENT_NAME; }; String FieldsReaderTestFixture::TEST_SEGMENT_NAME = L"_0"; DECLARE_SHARED_PTR(FaultyFSDirectory) DECLARE_SHARED_PTR(FaultyIndexInput) class FaultyIndexInput : public BufferedIndexInput { public: FaultyIndexInput(IndexInputPtr delegate) { this->delegate = delegate; count = 0; } virtual ~FaultyIndexInput() { } LUCENE_CLASS(FaultyIndexInput); public: IndexInputPtr delegate; static bool doFail; int32_t count; public: virtual void readInternal(uint8_t* b, int32_t offset, int32_t length) { simOutage(); delegate->readBytes(b, offset, length); } virtual void seekInternal(int64_t pos) { delegate->seek(pos); } virtual int64_t length() { return delegate->length(); } virtual void close() { delegate->close(); } virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()) { return newLucene(boost::dynamic_pointer_cast(delegate->clone())); } protected: void simOutage() { if (doFail && count++ % 2 == 1) boost::throw_exception(IOException(L"Simulated network outage")); } }; bool FaultyIndexInput::doFail = false; class FaultyFSDirectory : public Directory { public: FaultyFSDirectory(const String& dir) { fsDir = FSDirectory::open(dir); lockFactory = fsDir->getLockFactory(); } virtual ~FaultyFSDirectory() { } LUCENE_CLASS(FaultyFSDirectory); public: FSDirectoryPtr fsDir; public: virtual IndexInputPtr openInput(const String& name) { return newLucene(fsDir->openInput(name)); } virtual HashSet listAll() { return fsDir->listAll(); } virtual bool fileExists(const String& name) { return fsDir->fileExists(name); } virtual uint64_t fileModified(const String& name) { return fsDir->fileModified(name); } virtual void touchFile(const String& name) { fsDir->touchFile(name); } virtual void deleteFile(const String& name) { fsDir->deleteFile(name); } virtual int64_t fileLength(const String& name) { return fsDir->fileLength(name); } virtual IndexOutputPtr createOutput(const String& name) { return fsDir->createOutput(name); } virtual void close() { fsDir->close(); } }; static void checkSizeEquals(int32_t size, const uint8_t* sizebytes) { BOOST_CHECK_EQUAL((uint8_t)MiscUtils::unsignedShift(size, 24), sizebytes[0]); BOOST_CHECK_EQUAL((uint8_t)MiscUtils::unsignedShift(size, 16), sizebytes[1]); BOOST_CHECK_EQUAL((uint8_t)MiscUtils::unsignedShift(size, 8), sizebytes[2]); BOOST_CHECK_EQUAL((uint8_t)size, sizebytes[3]); } BOOST_FIXTURE_TEST_SUITE(FieldsReaderTest, FieldsReaderTestFixture) BOOST_AUTO_TEST_CASE(testFieldsReader) { BOOST_CHECK(dir); BOOST_CHECK(fieldInfos); FieldsReaderPtr reader = newLucene(dir, TEST_SEGMENT_NAME, fieldInfos); BOOST_CHECK(reader); BOOST_CHECK(reader->size() == 1); DocumentPtr doc = reader->doc(0, FieldSelectorPtr()); BOOST_CHECK(doc); BOOST_CHECK(doc->getField(DocHelper::TEXT_FIELD_1_KEY)); FieldablePtr field = doc->getField(DocHelper::TEXT_FIELD_2_KEY); BOOST_CHECK(field); BOOST_CHECK(field->isTermVectorStored()); BOOST_CHECK(field->isStoreOffsetWithTermVector()); BOOST_CHECK(field->isStorePositionWithTermVector()); BOOST_CHECK(!field->getOmitNorms()); BOOST_CHECK(!field->getOmitTermFreqAndPositions()); field = doc->getField(DocHelper::TEXT_FIELD_3_KEY); BOOST_CHECK(field); BOOST_CHECK(!field->isTermVectorStored()); BOOST_CHECK(!field->isStoreOffsetWithTermVector()); BOOST_CHECK(!field->isStorePositionWithTermVector()); BOOST_CHECK(field->getOmitNorms()); BOOST_CHECK(!field->getOmitTermFreqAndPositions()); field = doc->getField(DocHelper::NO_TF_KEY); BOOST_CHECK(field); BOOST_CHECK(!field->isTermVectorStored()); BOOST_CHECK(!field->isStoreOffsetWithTermVector()); BOOST_CHECK(!field->isStorePositionWithTermVector()); BOOST_CHECK(!field->getOmitNorms()); BOOST_CHECK(field->getOmitTermFreqAndPositions()); reader->close(); } BOOST_AUTO_TEST_CASE(testLazyFields) { BOOST_CHECK(dir); BOOST_CHECK(fieldInfos); FieldsReaderPtr reader = newLucene(dir, TEST_SEGMENT_NAME, fieldInfos); BOOST_CHECK(reader); BOOST_CHECK(reader->size() == 1); HashSet loadFieldNames = HashSet::newInstance(); loadFieldNames.add(DocHelper::TEXT_FIELD_1_KEY); loadFieldNames.add(DocHelper::TEXT_FIELD_UTF1_KEY); HashSet lazyFieldNames = HashSet::newInstance(); lazyFieldNames.add(DocHelper::LARGE_LAZY_FIELD_KEY); lazyFieldNames.add(DocHelper::LAZY_FIELD_KEY); lazyFieldNames.add(DocHelper::LAZY_FIELD_BINARY_KEY); lazyFieldNames.add(DocHelper::TEXT_FIELD_UTF2_KEY); SetBasedFieldSelectorPtr fieldSelector = newLucene(loadFieldNames, lazyFieldNames); DocumentPtr doc = reader->doc(0, fieldSelector); BOOST_CHECK(doc); FieldablePtr field = doc->getFieldable(DocHelper::LAZY_FIELD_KEY); BOOST_CHECK(field); BOOST_CHECK(field->isLazy()); String value = field->stringValue(); BOOST_CHECK(!value.empty()); BOOST_CHECK_EQUAL(value, DocHelper::LAZY_FIELD_TEXT); field = doc->getFieldable(DocHelper::TEXT_FIELD_1_KEY); BOOST_CHECK(field); BOOST_CHECK(!field->isLazy()); field = doc->getFieldable(DocHelper::TEXT_FIELD_UTF1_KEY); BOOST_CHECK(field); BOOST_CHECK(!field->isLazy()); BOOST_CHECK_EQUAL(field->stringValue(), DocHelper::FIELD_UTF1_TEXT); field = doc->getFieldable(DocHelper::TEXT_FIELD_UTF2_KEY); BOOST_CHECK(field); BOOST_CHECK(field->isLazy()); BOOST_CHECK_EQUAL(field->stringValue(), DocHelper::FIELD_UTF2_TEXT); field = doc->getFieldable(DocHelper::LAZY_FIELD_BINARY_KEY); BOOST_CHECK(field); BOOST_CHECK(field->stringValue().empty()); ByteArray bytes = field->getBinaryValue(); BOOST_CHECK(bytes); BOOST_CHECK_EQUAL(DocHelper::LAZY_FIELD_BINARY_BYTES.size(), bytes.size()); BOOST_CHECK(bytes); BOOST_CHECK(bytes.equals(DocHelper::LAZY_FIELD_BINARY_BYTES)); } BOOST_AUTO_TEST_CASE(testLazyFieldsAfterClose) { BOOST_CHECK(dir); BOOST_CHECK(fieldInfos); FieldsReaderPtr reader = newLucene(dir, TEST_SEGMENT_NAME, fieldInfos); BOOST_CHECK(reader); BOOST_CHECK(reader->size() == 1); HashSet loadFieldNames = HashSet::newInstance(); loadFieldNames.add(DocHelper::TEXT_FIELD_1_KEY); loadFieldNames.add(DocHelper::TEXT_FIELD_UTF1_KEY); HashSet lazyFieldNames = HashSet::newInstance(); lazyFieldNames.add(DocHelper::LARGE_LAZY_FIELD_KEY); lazyFieldNames.add(DocHelper::LAZY_FIELD_KEY); lazyFieldNames.add(DocHelper::LAZY_FIELD_BINARY_KEY); lazyFieldNames.add(DocHelper::TEXT_FIELD_UTF2_KEY); SetBasedFieldSelectorPtr fieldSelector = newLucene(loadFieldNames, lazyFieldNames); DocumentPtr doc = reader->doc(0, fieldSelector); BOOST_CHECK(doc); FieldablePtr field = doc->getFieldable(DocHelper::LAZY_FIELD_KEY); BOOST_CHECK(field); BOOST_CHECK(field->isLazy()); reader->close(); BOOST_CHECK_EXCEPTION(field->stringValue(), AlreadyClosedException, check_exception(LuceneException::AlreadyClosed)); } BOOST_AUTO_TEST_CASE(testLoadFirst) { BOOST_CHECK(dir); BOOST_CHECK(fieldInfos); FieldsReaderPtr reader = newLucene(dir, TEST_SEGMENT_NAME, fieldInfos); BOOST_CHECK(reader); BOOST_CHECK(reader->size() == 1); LoadFirstFieldSelectorPtr fieldSelector = newLucene(); DocumentPtr doc = reader->doc(0, fieldSelector); BOOST_CHECK(doc); int32_t count = 0; Collection fields = doc->getFields(); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { BOOST_CHECK(*field); String sv = (*field)->stringValue(); BOOST_CHECK(!sv.empty()); ++count; } BOOST_CHECK_EQUAL(count, 1); } /// Not really a test per se, but we should have some way of assessing whether this is worthwhile. /// Must test using a File based directory. BOOST_AUTO_TEST_CASE(testLazyPerformance) { String path(FileUtils::joinPath(getTempDir(), L"lazyDir")); FSDirectoryPtr tmpDir = FSDirectory::open(path); BOOST_CHECK(tmpDir); IndexWriterPtr writer = newLucene(tmpDir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); writer->addDocument(testDoc); writer->close(); BOOST_CHECK(fieldInfos); FieldsReaderPtr reader; int64_t lazyTime = 0; int64_t regularTime = 0; int32_t length = 50; HashSet lazyFieldNames = HashSet::newInstance(); lazyFieldNames.add(DocHelper::LARGE_LAZY_FIELD_KEY); SetBasedFieldSelectorPtr fieldSelector = newLucene(HashSet::newInstance(), lazyFieldNames); for (int32_t i = 0; i < length; ++i) { reader = newLucene(tmpDir, TEST_SEGMENT_NAME, fieldInfos); BOOST_CHECK(reader); BOOST_CHECK(reader->size() == 1); DocumentPtr doc = reader->doc(0, FieldSelectorPtr()); // Load all of them BOOST_CHECK(doc); FieldablePtr field = doc->getFieldable(DocHelper::LARGE_LAZY_FIELD_KEY); BOOST_CHECK(!field->isLazy()); int64_t start = MiscUtils::currentTimeMillis(); String value = field->stringValue(); int64_t finish = MiscUtils::currentTimeMillis(); // ~ 0ms BOOST_CHECK(!value.empty()); BOOST_CHECK(field); regularTime += (finish - start); reader->close(); reader.reset(); doc.reset(); reader = newLucene(tmpDir, TEST_SEGMENT_NAME, fieldInfos); doc = reader->doc(0, fieldSelector); field = doc->getFieldable(DocHelper::LARGE_LAZY_FIELD_KEY); BOOST_CHECK(field->isLazy()); start = MiscUtils::currentTimeMillis(); value = field->stringValue(); finish = MiscUtils::currentTimeMillis(); // ~ 50 - 70ms BOOST_CHECK(!value.empty()); lazyTime += (finish - start); reader->close(); } BOOST_TEST_MESSAGE("Average Non-lazy time (should be very close to zero): " << (regularTime / length) << " ms for " << length << " reads"); BOOST_TEST_MESSAGE("Average Lazy Time (should be greater than zero): " << (lazyTime / length) << " ms for " << length << " reads"); FileUtils::removeDirectory(path); } namespace TestLoadSize { DECLARE_SHARED_PTR(TestableFieldSelector) class TestableFieldSelector : public FieldSelector { public: virtual ~TestableFieldSelector() { } LUCENE_CLASS(TestableFieldSelector); public: virtual FieldSelectorResult accept(const String& fieldName) { if (fieldName == DocHelper::TEXT_FIELD_1_KEY || fieldName == DocHelper::LAZY_FIELD_BINARY_KEY) return FieldSelector::SELECTOR_SIZE; else if (fieldName == DocHelper::TEXT_FIELD_3_KEY) return FieldSelector::SELECTOR_LOAD; else return FieldSelector::SELECTOR_NO_LOAD; } }; } BOOST_AUTO_TEST_CASE(testLoadSize) { FieldsReaderPtr reader = newLucene(dir, TEST_SEGMENT_NAME, fieldInfos); DocumentPtr doc = reader->doc(0, newLucene()); FieldablePtr f1 = doc->getFieldable(DocHelper::TEXT_FIELD_1_KEY); FieldablePtr f3 = doc->getFieldable(DocHelper::TEXT_FIELD_3_KEY); FieldablePtr fb = doc->getFieldable(DocHelper::LAZY_FIELD_BINARY_KEY); BOOST_CHECK(f1->isBinary()); BOOST_CHECK(!f3->isBinary()); BOOST_CHECK(fb->isBinary()); checkSizeEquals(2 * String(DocHelper::FIELD_1_TEXT).length(), f1->getBinaryValue().get()); BOOST_CHECK_EQUAL(DocHelper::FIELD_3_TEXT, f3->stringValue()); checkSizeEquals(DocHelper::LAZY_FIELD_BINARY_BYTES.size(), fb->getBinaryValue().get()); reader->close(); } BOOST_AUTO_TEST_CASE(testExceptions) { String indexDir(FileUtils::joinPath(getTempDir(), L"testfieldswriterexceptions")); LuceneException finally; try { DirectoryPtr dir = newLucene(indexDir); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 2; ++i) writer->addDocument(testDoc); writer->optimize(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); FaultyIndexInput::doFail = true; bool exc = false; for (int32_t i = 0; i < 2; ++i) { try { reader->document(i); } catch (IOException&) { exc = true; // expected } try { reader->document(i); } catch (IOException&) { exc = true; // expected } } BOOST_CHECK(exc); reader->close(); dir->close(); } catch (LuceneException& e) { finally = e; } FileUtils::removeDirectory(indexDir); finally.throwException(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/FilterIndexReaderTest.cpp000066400000000000000000000076061217574114600251050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FilterIndexReader.h" #include "IndexReader.h" #include "IndexWriter.h" #include "MockRAMDirectory.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "TermEnum.h" #include "Term.h" using namespace Lucene; DECLARE_SHARED_PTR(TestReader) DECLARE_SHARED_PTR(TestTermEnum) DECLARE_SHARED_PTR(TestTermPositions) /// Filter that only permits terms containing 'e' class TestTermEnum : public FilterTermEnum { public: TestTermEnum(TermEnumPtr termEnum) : FilterTermEnum(termEnum) { } virtual ~TestTermEnum() { } LUCENE_CLASS(TestTermEnum); public: virtual bool next() { while (in->next()) { if (in->term()->text().find(L'e') != String::npos) return true; } return false; } }; /// Filter that only returns odd numbered documents. class TestTermPositions : public FilterTermPositions { public: TestTermPositions(TermPositionsPtr in) : FilterTermPositions(in) { } virtual ~TestTermPositions() { } LUCENE_CLASS(TestTermPositions); public: virtual bool next() { while (in->next()) { if ((in->doc() % 2) == 1) return true; } return false; } }; class TestReader : public FilterIndexReader { public: TestReader(IndexReaderPtr reader) : FilterIndexReader(reader) { } virtual ~TestReader() { } LUCENE_CLASS(TestReader); public: /// Filter terms with TestTermEnum. virtual TermEnumPtr terms() { return newLucene(in->terms()); } /// Filter positions with TestTermPositions. virtual TermPositionsPtr termPositions() { return newLucene(in->termPositions()); } }; BOOST_FIXTURE_TEST_SUITE(FilterIndexReaderTest, LuceneTestFixture) /// Tests the IndexReader::getFieldNames implementation BOOST_AUTO_TEST_CASE(testFilterIndexReader) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d1 = newLucene(); d1->add(newLucene(L"default", L"one two", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d1); DocumentPtr d2 = newLucene(); d2->add(newLucene(L"default", L"one three", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d2); DocumentPtr d3 = newLucene(); d2->add(newLucene(L"default", L"two four", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d3); writer->close(); IndexReaderPtr reader = newLucene(IndexReader::open(directory, true)); BOOST_CHECK(reader->isOptimized()); TermEnumPtr terms = reader->terms(); while (terms->next()) BOOST_CHECK_NE(terms->term()->text().find(L'e'), String::npos); terms->close(); TermPositionsPtr positions = reader->termPositions(newLucene(L"default", L"one")); while (positions->next()) BOOST_CHECK((positions->doc() % 2) == 1); int32_t NUM_DOCS = 3; TermDocsPtr td = reader->termDocs(TermPtr()); for (int32_t i = 0; i < NUM_DOCS; ++i) { BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(i, td->doc()); BOOST_CHECK_EQUAL(1, td->freq()); } td->close(); reader->close(); directory->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexCommitTest.cpp000066400000000000000000000047071217574114600237640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexCommit.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexCommitTest, LuceneTestFixture) namespace TestEqualsHashCode { class TestIndexCommit1 : public IndexCommit { public: TestIndexCommit1(DirectoryPtr dir) { this->dir = dir; } virtual ~TestIndexCommit1() { } protected: DirectoryPtr dir; public: virtual String getSegmentsFileName() { return L"a"; } virtual int64_t getVersion() { return 12; } virtual DirectoryPtr getDirectory() { return dir; } virtual HashSet getFileNames() { return HashSet(); } virtual void deleteCommit() { } virtual int64_t getGeneration() { return 0; } virtual int64_t getTimestamp() { return -1; } virtual MapStringString getUserData() { return MapStringString(); } virtual bool isDeleted() { return false; } virtual bool isOptimized() { return false; } }; class TestIndexCommit2 : public TestIndexCommit1 { public: TestIndexCommit2(DirectoryPtr dir) : TestIndexCommit1(dir) { } virtual ~TestIndexCommit2() { } public: virtual String getSegmentsFileName() { return L"b"; } }; } BOOST_AUTO_TEST_CASE(testEqualsHashCode) { DirectoryPtr dir = newLucene(); IndexCommitPtr ic1 = newLucene(dir); IndexCommitPtr ic2 = newLucene(dir); BOOST_CHECK(ic1->equals(ic2)); BOOST_CHECK_EQUAL(ic1->hashCode(), ic2->hashCode()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexFileDeleterTest.cpp000066400000000000000000000150271217574114600247150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexFileDeleter.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "Term.h" #include "CompoundFileReader.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "IndexInput.h" #include "IndexOutput.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexFileDeleterTest, LuceneTestFixture) static void addDoc(IndexWriterPtr writer, int32_t id) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"id", StringUtils::toString(id), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } static void copyFile(DirectoryPtr dir, const String& src, const String& dest) { IndexInputPtr in = dir->openInput(src); IndexOutputPtr out = dir->createOutput(dest); ByteArray b = ByteArray::newInstance(1024); int64_t remainder = in->length(); while (remainder > 0) { int32_t len = std::min(b.size(), (int32_t)remainder); in->readBytes(b.get(), 0, len); out->writeBytes(b.get(), len); remainder -= len; } in->close(); out->close(); } static HashSet difFiles(Collection files1, Collection files2) { HashSet set1 = HashSet::newInstance(); HashSet set2 = HashSet::newInstance(); HashSet extra = HashSet::newInstance(); for (Collection::iterator file = files1.begin(); file != files1.end(); ++file) set1.add(*file); for (Collection::iterator file = files2.begin(); file != files2.end(); ++file) set2.add(*file); for (HashSet::iterator file = set1.begin(); file != set1.end(); ++file) { if (!set2.contains(*file)) extra.add(*file); } for (HashSet::iterator file = set2.begin(); file != set2.end(); ++file) { if (!set1.contains(*file)) extra.add(*file); } return extra; } BOOST_AUTO_TEST_CASE(testDeleteLeftoverFiles) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); int32_t i = 0; for (; i < 35; ++i) addDoc(writer, i); writer->setUseCompoundFile(false); for (; i < 45; ++i) addDoc(writer, i); writer->close(); // Delete one doc so we get a .del file IndexReaderPtr reader = IndexReader::open(dir, false); TermPtr searchTerm = newLucene(L"id", L"7"); int32_t delCount = reader->deleteDocuments(searchTerm); BOOST_CHECK_EQUAL(1, delCount); // Set one norm so we get a .s0 file reader->setNorm(21, L"content", 1.5); reader->close(); // Now, artificially create an extra .del file and extra .s0 file HashSet _files = dir->listAll(); // Here we have to figure out which field number corresponds to "content", and then // set our expected file names below accordingly. CompoundFileReaderPtr cfsReader = newLucene(dir, L"_2.cfs"); FieldInfosPtr fieldInfos = newLucene(cfsReader, L"_2.fnm"); int32_t contentFieldIndex = -1; for (int32_t j = 0; j < fieldInfos->size(); ++j) { FieldInfoPtr fi = fieldInfos->fieldInfo(j); if (fi->name == L"content") { contentFieldIndex = j; break; } } cfsReader->close(); BOOST_CHECK_NE(contentFieldIndex, -1); String normSuffix = L"s" + StringUtils::toString(contentFieldIndex); // Create a bogus separate norms file for a segment/field that actually has a // separate norms file already copyFile(dir, L"_2_1." + normSuffix, L"_2_2." + normSuffix); // Create a bogus separate norms file for a segment/field that actually has a // separate norms file already, using the "not compound file" extension copyFile(dir, L"_2_1." + normSuffix, L"_2_2.f" + StringUtils::toString(contentFieldIndex)); // Create a bogus separate norms file for a segment/field that does not have a // separate norms file already copyFile(dir, L"_2_1." + normSuffix, L"_1_1." + normSuffix); // Create a bogus separate norms file for a segment/field that does not have a // separate norms file already using the "not compound file" extension copyFile(dir, L"_2_1." + normSuffix, L"_1_1.f" + StringUtils::toString(contentFieldIndex)); // Create a bogus separate del file for a segment that already has a separate // del file copyFile(dir, L"_0_1.del", L"_0_2.del"); // Create a bogus separate del file for a segment that does not yet have a // separate del file copyFile(dir, L"_0_1.del", L"_1_1.del"); // Create a bogus separate del file for a non-existent segment copyFile(dir, L"_0_1.del", L"_188_1.del"); // Create a bogus segment file copyFile(dir, L"_0.cfs", L"_188.cfs"); // Create a bogus fnm file when the CFS already exists copyFile(dir, L"_0.cfs", L"_0.fnm"); // Create a deletable file copyFile(dir, L"_0.cfs", L"deletable"); // Create some old segments file copyFile(dir, L"segments_3", L"segments"); copyFile(dir, L"segments_3", L"segments_2"); // Create a bogus cfs file shadowing a non-cfs segment copyFile(dir, L"_2.cfs", L"_3.cfs"); HashSet filesPre = dir->listAll(); // Open and close a writer: it should delete the above 4 files and nothing more writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->close(); HashSet _files2 = dir->listAll(); dir->close(); Collection files = Collection::newInstance(_files.begin(), _files.end()); Collection files2 = Collection::newInstance(_files2.begin(), _files2.end()); std::sort(files.begin(), files.end()); std::sort(files2.begin(), files2.end()); HashSet dif = difFiles(files, files2); BOOST_CHECK(dif.empty()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexInputTest.cpp000066400000000000000000000246651217574114600236400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockIndexInput.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexInputTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testReadInt) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[4] = { 1, 2, 3, 4 }; std::memcpy(inputBytes.get(), input, 4); IndexInputPtr is = newLucene(inputBytes); BOOST_CHECK_EQUAL(is->readInt(), 16909060); } BOOST_AUTO_TEST_CASE(testReadVInt) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[4] = { 200, 201, 150, 96 }; std::memcpy(inputBytes.get(), input, 4); IndexInputPtr is = newLucene(inputBytes); BOOST_CHECK_EQUAL(is->readVInt(), 201696456); } BOOST_AUTO_TEST_CASE(testReadLong) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[8] = { 32, 43, 32, 96, 12, 54, 22, 96 }; std::memcpy(inputBytes.get(), input, 8); IndexInputPtr is = newLucene(inputBytes); BOOST_CHECK_EQUAL(is->readLong(), 2317982030106072672LL); } BOOST_AUTO_TEST_CASE(testReadVLong) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[8] = { 213, 143, 132, 196, 172, 154, 129, 96 }; std::memcpy(inputBytes.get(), input, 8); IndexInputPtr is = newLucene(inputBytes); BOOST_CHECK_EQUAL(is->readVLong(), 54048498881988565LL); } BOOST_AUTO_TEST_CASE(testReadString) { ByteArray inputBytes(ByteArray::newInstance(30)); uint8_t input[12] = { 11, 't', 'e', 's', 't', ' ', 's', 't', 'r', 'i', 'n', 'g' }; std::memcpy(inputBytes.get(), input, 12); IndexInputPtr is = newLucene(inputBytes); BOOST_CHECK_EQUAL(is->readString(), L"test string"); } BOOST_AUTO_TEST_CASE(testReadModifiedUTF8String) { ByteArray inputBytes(ByteArray::newInstance(30)); uint8_t input[12] = { 11, 't', 'e', 's', 't', ' ', 's', 't', 'r', 'i', 'n', 'g' }; std::memcpy(inputBytes.get(), input, 12); IndexInputPtr is = newLucene(inputBytes); BOOST_CHECK_EQUAL(is->readModifiedUTF8String(), L"test string"); } BOOST_AUTO_TEST_CASE(testReadChars) { ByteArray inputBytes(ByteArray::newInstance(30)); uint8_t input[11] = { 't', 'e', 's', 't', ' ', 's', 't', 'r', 'i', 'n', 'g' }; std::memcpy(inputBytes.get(), input, 11); IndexInputPtr is = newLucene(inputBytes); ByteArray outputChars(ByteArray::newInstance(40 * sizeof(wchar_t))); is->readChars((wchar_t*)outputChars.get(), 0, 11); wchar_t expected[11] = { L't', L'e', L's', L't', L' ', L's', L't', L'r', L'i', L'n', L'g' }; BOOST_CHECK_EQUAL(std::memcmp((wchar_t*)outputChars.get(), expected, 11 * sizeof(wchar_t)), 0); } BOOST_AUTO_TEST_CASE(testSkipOneChar) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[5] = { 1, 2, 3, 4, 5 }; std::memcpy(inputBytes.get(), input, 5); IndexInputPtr is = newLucene(inputBytes); is->skipChars(1); BOOST_CHECK_EQUAL(is->getFilePointer(), 1); } BOOST_AUTO_TEST_CASE(testSkipTwoChars) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[5] = { 1, 2, 3, 4, 5 }; std::memcpy(inputBytes.get(), input, 5); IndexInputPtr is = newLucene(inputBytes); is->skipChars(2); BOOST_CHECK_EQUAL(is->getFilePointer(), 2); } BOOST_AUTO_TEST_CASE(testSkipTwoCharsAdditionalChar) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[5] = { 1, 132, 132, 4, 5 }; std::memcpy(inputBytes.get(), input, 5); IndexInputPtr is = newLucene(inputBytes); is->skipChars(2); BOOST_CHECK_EQUAL(is->getFilePointer(), 3); } BOOST_AUTO_TEST_CASE(testSkipTwoCharsAdditionalTwoChars) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[5] = { 1, 232, 232, 4, 5 }; std::memcpy(inputBytes.get(), input, 5); IndexInputPtr is = newLucene(inputBytes); is->skipChars(2); BOOST_CHECK_EQUAL(is->getFilePointer(), 4); } BOOST_AUTO_TEST_CASE(testRead) { ByteArray inputBytes(ByteArray::newInstance(100)); uint8_t input[88] = {0x80, 0x01, 0xff, 0x7f, 0x80, 0x80, 0x01, 0x81, 0x80, 0x01, 0x06, 'L', 'u', 'c', 'e', 'n', 'e', // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK") 0x02, 0xc2, 0xbf, 0x0a, 'L', 'u', 0xc2, 0xbf, 'c', 'e', 0xc2, 0xbf, 'n', 'e', // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES") 0x03, 0xe2, 0x98, 0xa0, 0x0c, 'L', 'u', 0xe2, 0x98, 0xa0, 'c', 'e', 0xe2, 0x98, 0xa0, 'n', 'e', // surrogate pairs // (U+1D11E "MUSICAL SYMBOL G CLEF") // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE") 0x04, 0xf0, 0x9d, 0x84, 0x9e, 0x08, 0xf0, 0x9d, 0x84, 0x9e, 0xf0, 0x9d, 0x85, 0xa0, 0x0e, 'L', 'u', 0xf0, 0x9d, 0x84, 0x9e, 'c', 'e', 0xf0, 0x9d, 0x85, 0xa0, 'n', 'e', // null bytes 0x01, 0x00, 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e'}; std::memcpy(inputBytes.get(), input, 88); IndexInputPtr is = newLucene(inputBytes); BOOST_CHECK_EQUAL(is->readVInt(), 128); BOOST_CHECK_EQUAL(is->readVInt(), 16383); BOOST_CHECK_EQUAL(is->readVInt(), 16384); BOOST_CHECK_EQUAL(is->readVInt(), 16385); BOOST_CHECK_EQUAL(is->readString(), L"Lucene"); const uint8_t question[] = {0xc2, 0xbf}; BOOST_CHECK_EQUAL(is->readString(), UTF8_TO_STRING(question)); const uint8_t skull[] = {0x4c, 0x75, 0xc2, 0xbf, 0x63, 0x65, 0xc2, 0xbf, 0x6e, 0x65}; BOOST_CHECK_EQUAL(is->readString(), UTF8_TO_STRING(skull)); const uint8_t gclef[] = {0xe2, 0x98, 0xa0}; BOOST_CHECK_EQUAL(is->readString(), UTF8_TO_STRING(gclef)); const uint8_t eighthnote[] = {0x4c, 0x75, 0xe2, 0x98, 0xa0, 0x63, 0x65, 0xe2, 0x98, 0xa0, 0x6e, 0x65}; BOOST_CHECK_EQUAL(is->readString(), UTF8_TO_STRING(eighthnote)); String readString(is->readString()); #ifdef LPP_UNICODE_CHAR_SIZE_2 BOOST_CHECK_EQUAL(readString[0], 55348); BOOST_CHECK_EQUAL(readString[1], 56606); #else BOOST_CHECK_EQUAL(readString[0], 119070); #endif readString = is->readString(); #ifdef LPP_UNICODE_CHAR_SIZE_2 BOOST_CHECK_EQUAL(readString[0], 55348); BOOST_CHECK_EQUAL(readString[1], 56606); BOOST_CHECK_EQUAL(readString[2], 55348); BOOST_CHECK_EQUAL(readString[3], 56672); #else BOOST_CHECK_EQUAL(readString[0], 119070); BOOST_CHECK_EQUAL(readString[1], 119136); #endif readString = is->readString(); #ifdef LPP_UNICODE_CHAR_SIZE_2 BOOST_CHECK_EQUAL(readString[0], L'L'); BOOST_CHECK_EQUAL(readString[1], L'u'); BOOST_CHECK_EQUAL(readString[2], 55348); BOOST_CHECK_EQUAL(readString[3], 56606); BOOST_CHECK_EQUAL(readString[4], L'c'); BOOST_CHECK_EQUAL(readString[5], L'e'); BOOST_CHECK_EQUAL(readString[6], 55348); BOOST_CHECK_EQUAL(readString[7], 56672); BOOST_CHECK_EQUAL(readString[8], L'n'); BOOST_CHECK_EQUAL(readString[9], L'e'); #else BOOST_CHECK_EQUAL(readString[0], L'L'); BOOST_CHECK_EQUAL(readString[1], L'u'); BOOST_CHECK_EQUAL(readString[2], 119070); BOOST_CHECK_EQUAL(readString[3], L'c'); BOOST_CHECK_EQUAL(readString[4], L'e'); BOOST_CHECK_EQUAL(readString[5], 119136); BOOST_CHECK_EQUAL(readString[6], L'n'); BOOST_CHECK_EQUAL(readString[7], L'e'); #endif readString = is->readString(); BOOST_CHECK_EQUAL(readString[0], 0); readString = is->readString(); BOOST_CHECK_EQUAL(readString[0], L'L'); BOOST_CHECK_EQUAL(readString[1], L'u'); BOOST_CHECK_EQUAL(readString[2], 0); BOOST_CHECK_EQUAL(readString[3], L'c'); BOOST_CHECK_EQUAL(readString[4], L'e'); BOOST_CHECK_EQUAL(readString[5], 0); BOOST_CHECK_EQUAL(readString[6], L'n'); BOOST_CHECK_EQUAL(readString[7], L'e'); } BOOST_AUTO_TEST_CASE(testSkipChars) { ByteArray inputBytes(ByteArray::newInstance(100)); uint8_t input[17] = {0x80, 0x01, 0xff, 0x7f, 0x80, 0x80, 0x01, 0x81, 0x80, 0x01, 0x06, 'L', 'u', 'c', 'e', 'n', 'e'}; std::memcpy(inputBytes.get(), input, 17); IndexInputPtr is = newLucene(inputBytes); BOOST_CHECK_EQUAL(is->readVInt(), 128); BOOST_CHECK_EQUAL(is->readVInt(), 16383); BOOST_CHECK_EQUAL(is->readVInt(), 16384); BOOST_CHECK_EQUAL(is->readVInt(), 16385); BOOST_CHECK_EQUAL(is->readVInt(), 6); is->skipChars(3); ByteArray remainingBytes(ByteArray::newInstance(4 * sizeof(wchar_t))); is->readChars((wchar_t*)remainingBytes.get(), 0, 3); BOOST_CHECK_EQUAL(String((wchar_t*)remainingBytes.get(), 3), L"ene"); } struct lessKey { inline bool operator()(const MapStringString::key_value& first, const MapStringString::key_value& second) const { return (first.first < second.first); } }; BOOST_AUTO_TEST_CASE(testReadStringMap) { ByteArray inputBytes(ByteArray::newInstance(100)); uint8_t input[34] = { 0, 0, 0, 3, 4, 'k', 'e', 'y', '1', 4, 'v', 'a', 'l', '1', 4, 'k', 'e', 'y', '2', 4, 'v', 'a', 'l', '2', 4, 'k', 'e', 'y', '3', 4, 'v', 'a', 'l', '3' }; std::memcpy(inputBytes.get(), input, 34); IndexInputPtr is = newLucene(inputBytes); MapStringString map = is->readStringStringMap(); BOOST_CHECK_EQUAL(map.size(), 3); Collection orderedMap(Collection::newInstance(map.begin(), map.end())); // order map by key std::sort(orderedMap.begin(), orderedMap.end(), lessKey()); int32_t count = 1; for (Collection::iterator mapEntry = orderedMap.begin(); mapEntry != orderedMap.end(); ++mapEntry, ++count) { BOOST_CHECK_EQUAL(mapEntry->first, L"key" + StringUtils::toString(count)); BOOST_CHECK_EQUAL(mapEntry->second, L"val" + StringUtils::toString(count)); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexReaderCloneNorms.cpp000066400000000000000000000000001217574114600250540ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/index/IndexReaderCloneNormsTest.cpp000066400000000000000000000277141217574114600257410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "DefaultSimilarity.h" #include "StandardAnalyzer.h" #include "FSDirectory.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "MockRAMDirectory.h" #include "LogDocMergePolicy.h" #include "WhitespaceAnalyzer.h" #include "SegmentReader.h" #include "_SegmentReader.h" #include "FileUtils.h" using namespace Lucene; class SimilarityOne : public DefaultSimilarity { public: virtual ~SimilarityOne() { } public: virtual double lengthNorm(const String& fieldName, int32_t numTokens) { return 1.0; } }; class IndexReaderCloneNormsTestFixture : public LuceneTestFixture { public: IndexReaderCloneNormsTestFixture() { similarityOne = newLucene(); anlzr = newLucene(LuceneVersion::LUCENE_CURRENT); numDocNorms = 0; lastNorm = 0.0; normDelta = 0.001; } virtual ~IndexReaderCloneNormsTestFixture() { } protected: static const int32_t NUM_FIELDS; SimilarityPtr similarityOne; AnalyzerPtr anlzr; int32_t numDocNorms; Collection norms; Collection modifiedNorms; double lastNorm; double normDelta; public: void createIndex(DirectoryPtr dir) { IndexWriterPtr iw = newLucene(dir, anlzr, true, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->setSimilarity(similarityOne); iw->setUseCompoundFile(true); iw->close(); } void createIndex(DirectoryPtr dir, bool multiSegment) { IndexWriter::unlock(dir); IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->setMergePolicy(newLucene(w)); for (int32_t i = 0; i < 100; ++i) { w->addDocument(createDocument(i, 4)); if (multiSegment && (i % 10) == 0) w->commit(); } if (!multiSegment) w->optimize(); w->close(); IndexReaderPtr r = IndexReader::open(dir, false); if (multiSegment) BOOST_CHECK(r->getSequentialSubReaders().size() > 1); else BOOST_CHECK_EQUAL(r->getSequentialSubReaders().size(), 1); r->close(); } DocumentPtr createDocument(int32_t n, int32_t numFields) { StringStream sb; DocumentPtr doc = newLucene(); sb << L"a" << n; doc->add(newLucene(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"fielda", sb.str(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); doc->add(newLucene(L"fieldb", sb.str(), Field::STORE_YES, Field::INDEX_NO)); sb << L" b" << n; for (int32_t i = 1; i < numFields; ++i) doc->add(newLucene(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED)); return doc; } /// try cloning and reopening the norms void doTestNorms(DirectoryPtr dir) { addDocs(dir, 12, true); IndexReaderPtr ir = IndexReader::open(dir, false); verifyIndex(ir); modifyNormsForF1(ir); IndexReaderPtr irc = boost::dynamic_pointer_cast(ir->clone()); verifyIndex(irc); modifyNormsForF1(irc); IndexReaderPtr irc3 = boost::dynamic_pointer_cast(irc->clone()); verifyIndex(irc3); modifyNormsForF1(irc3); verifyIndex(irc3); irc3->flush(); irc3->close(); } void modifyNormsForF1(DirectoryPtr dir) { IndexReaderPtr ir = IndexReader::open(dir, false); modifyNormsForF1(ir); } void modifyNormsForF1(IndexReaderPtr ir) { int32_t n = ir->maxDoc(); for (int32_t i = 0; i < n; i += 3) // modify for every third doc { int32_t k = (i * 3) % modifiedNorms.size(); double origNorm = modifiedNorms[i]; double newNorm = modifiedNorms[k]; modifiedNorms[i] = newNorm; modifiedNorms[k] = origNorm; ir->setNorm(i, L"f1", newNorm); ir->setNorm(k, L"f1", origNorm); } } void addDocs(DirectoryPtr dir, int32_t ndocs, bool compound) { IndexWriterPtr iw = newLucene(dir, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->setSimilarity(similarityOne); iw->setUseCompoundFile(compound); for (int32_t i = 0; i < ndocs; ++i) iw->addDocument(newDoc()); iw->close(); } DocumentPtr newDoc() { DocumentPtr d = newLucene(); double boost = nextNorm(); for (int32_t i = 0; i < 10; ++i) { FieldPtr f = newLucene(L"f" + StringUtils::toString(i), L"v" + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED); f->setBoost(boost); d->add(f); } return d; } double nextNorm() { double norm = lastNorm + normDelta; do { double norm1 = Similarity::decodeNorm(Similarity::encodeNorm(norm)); if (norm1 > lastNorm) { norm = norm1; break; } norm += normDelta; } while (true); norms.add(numDocNorms, norm); modifiedNorms.add(numDocNorms, norm); ++numDocNorms; // there's a limit to how many distinct values can be stored in a single byte lastNorm = (norm > 10 ? 0 : norm); return norm; } void verifyIndex(DirectoryPtr dir) { IndexReaderPtr ir = IndexReader::open(dir, false); verifyIndex(ir); ir->close(); } void verifyIndex(IndexReaderPtr ir) { for (int32_t i = 0; i < NUM_FIELDS; ++i) { String field = L"f" + StringUtils::toString(i); ByteArray b = ir->norms(field); BOOST_CHECK_EQUAL(numDocNorms, b.size()); Collection storedNorms = (i == 1 ? modifiedNorms : norms); for (int32_t j = 0; j < b.size(); ++j) { double norm = Similarity::decodeNorm(b[j]); double norm1 = storedNorms[j]; BOOST_CHECK_EQUAL(norm, norm1); // 0.000001 ?? } } } }; const int32_t IndexReaderCloneNormsTestFixture::NUM_FIELDS = 10; /// Tests cloning IndexReader norms BOOST_FIXTURE_TEST_SUITE(IndexReaderCloneNormsTest, IndexReaderCloneNormsTestFixture) /// Test that norms values are preserved as the index is maintained. Including separate norms. /// Including merging indexes with separate norms. Including optimize. BOOST_AUTO_TEST_CASE(testNorms) { // test with a single index: index1 String indexDir1(FileUtils::joinPath(getTempDir(), L"lucenetestindex1")); DirectoryPtr dir1 = FSDirectory::open(indexDir1); IndexWriter::unlock(dir1); norms = Collection::newInstance(); modifiedNorms = Collection::newInstance(); createIndex(dir1); doTestNorms(dir1); // test with a single index: index2 Collection norms1 = norms; Collection modifiedNorms1 = modifiedNorms; int32_t numDocNorms1 = numDocNorms; norms = Collection::newInstance(); modifiedNorms = Collection::newInstance(); numDocNorms = 0; String indexDir2(FileUtils::joinPath(getTempDir(), L"lucenetestindex2")); DirectoryPtr dir2 = FSDirectory::open(indexDir2); createIndex(dir2); doTestNorms(dir2); // add index1 and index2 to a third index: index3 String indexDir3(FileUtils::joinPath(getTempDir(), L"lucenetestindex3")); DirectoryPtr dir3 = FSDirectory::open(indexDir3); createIndex(dir3); IndexWriterPtr iw = newLucene(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->addIndexesNoOptimize(newCollection(dir1, dir2)); iw->optimize(); iw->close(); norms1.addAll(norms.begin(), norms.end()); norms = norms1; modifiedNorms1.addAll(modifiedNorms.begin(), modifiedNorms.end()); modifiedNorms = modifiedNorms1; numDocNorms += numDocNorms1; // test with index3 verifyIndex(dir3); doTestNorms(dir3); // now with optimize iw = newLucene(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->optimize(); iw->close(); verifyIndex(dir3); dir1->close(); dir2->close(); dir3->close(); } BOOST_AUTO_TEST_CASE(testNormsClose) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); SegmentReaderPtr reader1 = SegmentReader::getOnlySegmentReader(dir1); reader1->norms(L"field1"); NormPtr r1norm = reader1->_norms.get(L"field1"); SegmentReaderRefPtr r1BytesRef = r1norm->bytesRef(); SegmentReaderPtr reader2 = boost::dynamic_pointer_cast(reader1->clone()); BOOST_CHECK_EQUAL(2, r1norm->bytesRef()->refCount()); reader1->close(); BOOST_CHECK_EQUAL(1, r1BytesRef->refCount()); reader2->norms(L"field1"); reader2->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testNormsRefCounting) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); IndexReaderPtr reader1 = IndexReader::open(dir1, false); IndexReaderPtr reader2C = boost::dynamic_pointer_cast(reader1->clone()); SegmentReaderPtr segmentReader2C = SegmentReader::getOnlySegmentReader(reader2C); segmentReader2C->norms(L"field1"); // load the norms for the field NormPtr reader2CNorm = segmentReader2C->_norms.get(L"field1"); BOOST_CHECK_EQUAL(2, reader2CNorm->bytesRef()->refCount()); IndexReaderPtr reader3C = boost::dynamic_pointer_cast(reader2C->clone()); SegmentReaderPtr segmentReader3C = SegmentReader::getOnlySegmentReader(reader3C); NormPtr reader3CCNorm = segmentReader3C->_norms.get(L"field1"); BOOST_CHECK_EQUAL(3, reader3CCNorm->bytesRef()->refCount()); // edit a norm and the refcount should be 1 IndexReaderPtr reader4C = boost::dynamic_pointer_cast(reader3C->clone()); SegmentReaderPtr segmentReader4C = SegmentReader::getOnlySegmentReader(reader4C); BOOST_CHECK_EQUAL(4, reader3CCNorm->bytesRef()->refCount()); reader4C->setNorm(5, L"field1", 0.33); // generate a cannot update exception in reader1 BOOST_CHECK_EXCEPTION(reader3C->setNorm(1, L"field1", 0.99), LockObtainFailedException, check_exception(LuceneException::LockObtainFailed)); // norm values should be different BOOST_CHECK_NE(Similarity::decodeNorm(segmentReader3C->norms(L"field1")[5]), Similarity::decodeNorm(segmentReader4C->norms(L"field1")[5])); NormPtr reader4CCNorm = segmentReader4C->_norms.get(L"field1"); BOOST_CHECK_EQUAL(3, reader3CCNorm->bytesRef()->refCount()); BOOST_CHECK_EQUAL(1, reader4CCNorm->bytesRef()->refCount()); IndexReaderPtr reader5C = boost::dynamic_pointer_cast(reader4C->clone()); SegmentReaderPtr segmentReader5C = SegmentReader::getOnlySegmentReader(reader5C); NormPtr reader5CCNorm = segmentReader5C->_norms.get(L"field1"); reader5C->setNorm(5, L"field1", 0.7); BOOST_CHECK_EQUAL(1, reader5CCNorm->bytesRef()->refCount()); reader5C->close(); reader4C->close(); reader3C->close(); reader2C->close(); reader1->close(); dir1->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexReaderCloneTest.cpp000066400000000000000000000464071217574114600247220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexReader.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "SimpleAnalyzer.h" #include "LogDocMergePolicy.h" #include "Document.h" #include "Field.h" #include "ReadOnlySegmentReader.h" #include "ReadOnlyDirectoryReader.h" #include "ParallelReader.h" #include "SegmentReader.h" #include "_SegmentReader.h" #include "Similarity.h" #include "Term.h" #include "MultiReader.h" #include "MiscUtils.h" using namespace Lucene; /// Tests cloning multiple types of readers, modifying the deletedDocs and norms and verifies copy on write semantics /// of the deletedDocs and norms is implemented properly BOOST_FIXTURE_TEST_SUITE(IndexReaderCloneTest, LuceneTestFixture) static DocumentPtr createDocument(int32_t n, int32_t numFields) { StringStream sb; DocumentPtr doc = newLucene(); sb << L"a" << n; doc->add(newLucene(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"fielda", sb.str(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); doc->add(newLucene(L"fieldb", sb.str(), Field::STORE_YES, Field::INDEX_NO)); sb << L" b" << n; for (int32_t i = 1; i < numFields; ++i) doc->add(newLucene(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED)); return doc; } static void createIndex(DirectoryPtr dir, bool multiSegment) { IndexWriter::unlock(dir); IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->setMergePolicy(newLucene(w)); for (int32_t i = 0; i < 100; ++i) { w->addDocument(createDocument(i, 4)); if (multiSegment && (i % 10) == 0) w->commit(); } if (!multiSegment) w->optimize(); w->close(); IndexReaderPtr r = IndexReader::open(dir, false); if (multiSegment) BOOST_CHECK(r->getSequentialSubReaders().size() > 1); else BOOST_CHECK_EQUAL(r->getSequentialSubReaders().size(), 1); r->close(); } static bool isReadOnly(IndexReaderPtr r) { return (MiscUtils::typeOf(r) || MiscUtils::typeOf(r)); } static bool deleteWorked(int32_t doc, IndexReaderPtr r) { bool exception = false; try { // trying to delete from the original reader should throw an exception r->deleteDocument(doc); } catch (...) { exception = true; } return !exception; } /// 1. Get a norm from the original reader /// 2. Clone the original reader /// 3. Delete a document and set the norm of the cloned reader /// 4. Verify the norms are not the same on each reader /// 5. Verify the doc deleted is only in the cloned reader /// 6. Try to delete a document in the original reader, an exception should be thrown static void performDefaultTests(IndexReaderPtr r1) { double norm1 = Similarity::decodeNorm(r1->norms(L"field1")[4]); IndexReaderPtr pr1Clone = boost::dynamic_pointer_cast(r1->clone()); pr1Clone->deleteDocument(10); pr1Clone->setNorm(4, L"field1", 0.5); BOOST_CHECK(Similarity::decodeNorm(r1->norms(L"field1")[4]) == norm1); BOOST_CHECK_NE(Similarity::decodeNorm(pr1Clone->norms(L"field1")[4]), norm1); BOOST_CHECK(!r1->isDeleted(10)); BOOST_CHECK(pr1Clone->isDeleted(10)); // try to update the original reader, which should throw an exception BOOST_CHECK_EXCEPTION(r1->deleteDocument(11), LuceneException, check_exception(LuceneException::Null)); pr1Clone->close(); } static void modifyIndex(int32_t i, DirectoryPtr dir) { switch (i) { case 0: { IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->deleteDocuments(newLucene(L"field2", L"a11")); w->deleteDocuments(newLucene(L"field2", L"b30")); w->close(); break; } case 1: { IndexReaderPtr reader = IndexReader::open(dir, false); reader->setNorm(4, L"field1", (uint8_t)123); reader->setNorm(44, L"field2", (uint8_t)222); reader->setNorm(44, L"field4", (uint8_t)22); reader->close(); break; } case 2: { IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->optimize(); w->close(); break; } case 3: { IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->addDocument(createDocument(101, 4)); w->optimize(); w->addDocument(createDocument(102, 4)); w->addDocument(createDocument(103, 4)); w->close(); break; } case 4: { IndexReaderPtr reader = IndexReader::open(dir, false); reader->setNorm(5, L"field1", (uint8_t)123); reader->setNorm(55, L"field2", (uint8_t)222); reader->close(); break; } case 5: { IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->addDocument(createDocument(101, 4)); w->close(); break; } } } static void checkDelDocsRefCountEquals(int32_t refCount, SegmentReaderPtr reader) { BOOST_CHECK_EQUAL(refCount, reader->deletedDocsRef->refCount()); } static void checkDocDeleted(SegmentReaderPtr reader, SegmentReaderPtr reader2, int32_t doc) { BOOST_CHECK_EQUAL(reader->isDeleted(doc), reader2->isDeleted(doc)); } BOOST_AUTO_TEST_CASE(testCloneReadOnlySegmentReader) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); IndexReaderPtr reader = IndexReader::open(dir1, false); IndexReaderPtr readOnlyReader = boost::dynamic_pointer_cast(reader->clone(true)); BOOST_CHECK(isReadOnly(readOnlyReader)); BOOST_CHECK(!deleteWorked(1, readOnlyReader)); reader->close(); readOnlyReader->close(); dir1->close(); } /// Open non-readOnly reader1, clone to non-readOnly reader2, make sure we can change reader2 BOOST_AUTO_TEST_CASE(testCloneNoChangesStillReadOnly) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr r1 = IndexReader::open(dir1, false); IndexReaderPtr r2 = boost::dynamic_pointer_cast(r1->clone(false)); BOOST_CHECK(deleteWorked(1, r2)); r1->close(); r2->close(); dir1->close(); } /// Open non-readOnly reader1, clone to non-readOnly reader2, make sure we can change reader1 BOOST_AUTO_TEST_CASE(testCloneWriteToOrig) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr r1 = IndexReader::open(dir1, false); IndexReaderPtr r2 = boost::dynamic_pointer_cast(r1->clone(false)); BOOST_CHECK(deleteWorked(1, r1)); r1->close(); r2->close(); dir1->close(); } /// Open non-readOnly reader1, clone to non-readOnly reader2, make sure we can change reader2 BOOST_AUTO_TEST_CASE(testCloneWriteToClone) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr r1 = IndexReader::open(dir1, false); IndexReaderPtr r2 = boost::dynamic_pointer_cast(r1->clone(false)); BOOST_CHECK(deleteWorked(1, r2)); // should fail because reader1 holds the write lock BOOST_CHECK(!deleteWorked(1, r1)); r2->close(); // should fail because we are now stale (reader1 committed changes) BOOST_CHECK(!deleteWorked(1, r1)); r1->close(); dir1->close(); } /// Create single-segment index, open non-readOnly SegmentReader, add docs, reopen to multireader, then do delete BOOST_AUTO_TEST_CASE(testReopenSegmentReaderToMultiReader) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); IndexReaderPtr reader1 = IndexReader::open(dir1, false); modifyIndex(5, dir1); IndexReaderPtr reader2 = reader1->reopen(); BOOST_CHECK_NE(reader1, reader2); BOOST_CHECK(deleteWorked(1, reader2)); reader1->close(); reader2->close(); dir1->close(); } /// Open non-readOnly reader1, clone to readOnly reader2 BOOST_AUTO_TEST_CASE(testCloneWriteableToReadOnly) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr reader = IndexReader::open(dir1, false); IndexReaderPtr readOnlyReader = boost::dynamic_pointer_cast(reader->clone(true)); BOOST_CHECK(isReadOnly(readOnlyReader)); BOOST_CHECK(!deleteWorked(1, readOnlyReader)); BOOST_CHECK(!readOnlyReader->hasChanges()); reader->close(); readOnlyReader->close(); dir1->close(); } /// Open non-readOnly reader1, reopen to readOnly reader2 BOOST_AUTO_TEST_CASE(testReopenWriteableToReadOnly) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr reader = IndexReader::open(dir1, false); int32_t docCount = reader->numDocs(); BOOST_CHECK(deleteWorked(1, reader)); BOOST_CHECK_EQUAL(docCount - 1, reader->numDocs()); IndexReaderPtr readOnlyReader = reader->reopen(true); BOOST_CHECK(isReadOnly(readOnlyReader)); BOOST_CHECK(!deleteWorked(1, readOnlyReader)); BOOST_CHECK_EQUAL(docCount - 1, readOnlyReader->numDocs()); reader->close(); readOnlyReader->close(); dir1->close(); } /// Open readOnly reader1, clone to non-readOnly reader2 BOOST_AUTO_TEST_CASE(testCloneReadOnlyToWriteable) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr reader1 = IndexReader::open(dir1, true); IndexReaderPtr reader2 = boost::dynamic_pointer_cast(reader1->clone(false)); BOOST_CHECK(!isReadOnly(reader2)); BOOST_CHECK(!deleteWorked(1, reader1)); // this readonly reader shouldn't yet have a write lock BOOST_CHECK(!reader2->hasChanges()); BOOST_CHECK(deleteWorked(1, reader2)); reader1->close(); reader2->close(); dir1->close(); } /// Open non-readOnly reader1 on multi-segment index, then optimize the index, then clone to readOnly reader2 BOOST_AUTO_TEST_CASE(testReadOnlyCloneAfterOptimize) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr reader1 = IndexReader::open(dir1, false); IndexWriterPtr w = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->optimize(); w->close(); IndexReaderPtr reader2 = boost::dynamic_pointer_cast(reader1->clone(true)); BOOST_CHECK(isReadOnly(reader2)); reader1->close(); reader2->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testCloneReadOnlyDirectoryReader) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr reader = IndexReader::open(dir1, false); IndexReaderPtr readOnlyReader = boost::dynamic_pointer_cast(reader->clone(true)); BOOST_CHECK(isReadOnly(readOnlyReader)); reader->close(); readOnlyReader->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testParallelReader) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); DirectoryPtr dir2 = newLucene(); createIndex(dir2, true); IndexReaderPtr r1 = IndexReader::open(dir1, false); IndexReaderPtr r2 = IndexReader::open(dir2, false); ParallelReaderPtr pr1 = newLucene(); pr1->add(r1); pr1->add(r2); performDefaultTests(pr1); pr1->close(); dir1->close(); dir2->close(); } BOOST_AUTO_TEST_CASE(testMixedReaders) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); DirectoryPtr dir2 = newLucene(); createIndex(dir2, true); IndexReaderPtr r1 = IndexReader::open(dir1, false); IndexReaderPtr r2 = IndexReader::open(dir2, false); Collection multiReaders = newCollection(r1, r2); MultiReaderPtr multiReader = newLucene(multiReaders); performDefaultTests(multiReader); multiReader->close(); dir1->close(); dir2->close(); } BOOST_AUTO_TEST_CASE(testSegmentReaderUndeleteall) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); SegmentReaderPtr origSegmentReader = SegmentReader::getOnlySegmentReader(dir1); origSegmentReader->deleteDocument(10); checkDelDocsRefCountEquals(1, origSegmentReader); origSegmentReader->undeleteAll(); BOOST_CHECK(!origSegmentReader->deletedDocsRef); origSegmentReader->close(); // need to test norms? dir1->close(); } BOOST_AUTO_TEST_CASE(testSegmentReaderCloseReferencing) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); SegmentReaderPtr origSegmentReader = SegmentReader::getOnlySegmentReader(dir1); origSegmentReader->deleteDocument(1); origSegmentReader->setNorm(4, L"field1", 0.5); SegmentReaderPtr clonedSegmentReader = boost::dynamic_pointer_cast(origSegmentReader->clone()); checkDelDocsRefCountEquals(2, origSegmentReader); origSegmentReader->close(); checkDelDocsRefCountEquals(1, origSegmentReader); // check the norm refs NormPtr norm = clonedSegmentReader->_norms.get(L"field1"); BOOST_CHECK_EQUAL(1, norm->bytesRef()->refCount()); clonedSegmentReader->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testSegmentReaderDelDocsReferenceCounting) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); IndexReaderPtr origReader = IndexReader::open(dir1, false); SegmentReaderPtr origSegmentReader = SegmentReader::getOnlySegmentReader(origReader); // deletedDocsRef should be null because nothing has updated yet BOOST_CHECK(!origSegmentReader->deletedDocsRef); // we deleted a document, so there is now a deletedDocs bitvector and a reference to it origReader->deleteDocument(1); checkDelDocsRefCountEquals(1, origSegmentReader); // the cloned segmentreader should have 2 references, 1 to itself, and 1 to the original segmentreader IndexReaderPtr clonedReader = boost::dynamic_pointer_cast(origReader->clone()); SegmentReaderPtr clonedSegmentReader = SegmentReader::getOnlySegmentReader(clonedReader); checkDelDocsRefCountEquals(2, origSegmentReader); // deleting a document creates a new deletedDocs bitvector, the refs goes to 1 clonedReader->deleteDocument(2); checkDelDocsRefCountEquals(1, origSegmentReader); checkDelDocsRefCountEquals(1, clonedSegmentReader); // make sure the deletedocs objects are different (copy on write) BOOST_CHECK_NE(origSegmentReader->deletedDocs, clonedSegmentReader->deletedDocs); checkDocDeleted(origSegmentReader, clonedSegmentReader, 1); BOOST_CHECK(!origSegmentReader->isDeleted(2)); // doc 2 should not be deleted in original segmentreader BOOST_CHECK(clonedSegmentReader->isDeleted(2)); // doc 2 should be deleted in cloned segmentreader BOOST_CHECK_EXCEPTION(origReader->deleteDocument(4), LockObtainFailedException, check_exception(LuceneException::LockObtainFailed)); origReader->close(); // try closing the original segment reader to see if it affects the clonedSegmentReader clonedReader->deleteDocument(3); clonedReader->flush(); checkDelDocsRefCountEquals(1, clonedSegmentReader); // test a reopened reader IndexReaderPtr reopenedReader = clonedReader->reopen(); IndexReaderPtr cloneReader2 = boost::dynamic_pointer_cast(reopenedReader->clone()); SegmentReaderPtr cloneSegmentReader2 = SegmentReader::getOnlySegmentReader(cloneReader2); checkDelDocsRefCountEquals(2, cloneSegmentReader2); clonedReader->close(); reopenedReader->close(); cloneReader2->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testCloneWithDeletes) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); IndexReaderPtr origReader = IndexReader::open(dir1, false); origReader->deleteDocument(1); IndexReaderPtr clonedReader = boost::dynamic_pointer_cast(origReader->clone()); origReader->close(); clonedReader->close(); IndexReaderPtr r = IndexReader::open(dir1, false); BOOST_CHECK(r->isDeleted(1)); r->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testCloneWithSetNorm) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); IndexReaderPtr orig = IndexReader::open(dir1, false); orig->setNorm(1, L"field1", 17.0); uint8_t encoded = Similarity::encodeNorm(17.0); BOOST_CHECK_EQUAL(encoded, orig->norms(L"field1")[1]); // the cloned segmentreader should have 2 references, 1 to itself, and 1 to the original segmentreader IndexReaderPtr clonedReader = boost::dynamic_pointer_cast(orig->clone()); orig->close(); clonedReader->close(); IndexReaderPtr r = IndexReader::open(dir1, false); BOOST_CHECK_EQUAL(encoded, r->norms(L"field1")[1]); r->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testCloneSubreaders) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr reader = IndexReader::open(dir1, false); reader->deleteDocument(1); // acquire write lock Collection subs = reader->getSequentialSubReaders(); BOOST_CHECK(subs.size() > 1); Collection clones = Collection::newInstance(subs.size()); for (int32_t x = 0; x < subs.size(); ++x) clones[x] = boost::dynamic_pointer_cast(subs[x]->clone()); reader->close(); for (int32_t x = 0; x < subs.size(); ++x) clones[x]->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testIncDecRef) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); IndexReaderPtr r1 = IndexReader::open(dir1, false); r1->incRef(); IndexReaderPtr r2 = boost::dynamic_pointer_cast(r1->clone(false)); r1->deleteDocument(5); r1->decRef(); r1->incRef(); r2->close(); r1->decRef(); r1->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testCloseStoredFields) { DirectoryPtr dir = newLucene(); IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); w->setUseCompoundFile(false); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"yes it's stored", Field::STORE_YES, Field::INDEX_ANALYZED)); w->addDocument(doc); w->close(); IndexReaderPtr r1 = IndexReader::open(dir, false); IndexReaderPtr r2 = boost::dynamic_pointer_cast(r1->clone(false)); r1->close(); r2->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexReaderReopenTest.cpp000066400000000000000000001400131217574114600250760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexReader.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Term.h" #include "MockRAMDirectory.h" #include "LogDocMergePolicy.h" #include "Document.h" #include "Field.h" #include "TermEnum.h" #include "TermPositions.h" #include "SegmentReader.h" #include "DirectoryReader.h" #include "MultiReader.h" #include "ParallelReader.h" #include "FilterIndexReader.h" #include "FSDirectory.h" #include "KeywordAnalyzer.h" #include "SerialMergeScheduler.h" #include "StandardAnalyzer.h" #include "LuceneThread.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "BitVector.h" #include "IndexDeletionPolicy.h" #include "Random.h" #include "MiscUtils.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexReaderReopenTest, LuceneTestFixture) namespace TestReopen { DECLARE_SHARED_PTR(TestableReopen) DECLARE_SHARED_PTR(ReaderCouple) DECLARE_SHARED_PTR(ReaderThread) DECLARE_SHARED_PTR(ReaderThreadTask) class TestableReopen { public: virtual ~TestableReopen() { } public: virtual IndexReaderPtr openReader() = 0; virtual void modifyIndex(int32_t i) = 0; }; class ReaderCouple { public: ReaderCouple(IndexReaderPtr r1, IndexReaderPtr r2) { newReader = r1; refreshedReader = r2; } virtual ~ReaderCouple() { } public: IndexReaderPtr newReader; IndexReaderPtr refreshedReader; }; class ReaderThreadTask : public LuceneObject { public: ReaderThreadTask() { stopped = false; } virtual ~ReaderThreadTask() { } LUCENE_CLASS(ReaderThreadTask); protected: bool stopped; public: void stop() { stopped = true; } virtual void run() = 0; }; class ReaderThread : public LuceneThread { public: ReaderThread(ReaderThreadTaskPtr task) { this->task = task; } virtual ~ReaderThread() { } LUCENE_CLASS(ReaderThread); protected: ReaderThreadTaskPtr task; public: void stopThread() { task->stop(); } virtual void run() { try { task->run(); } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; } static DocumentPtr createDocument(int32_t n, int32_t numFields) { StringStream sb; DocumentPtr doc = newLucene(); sb << L"a" << n; doc->add(newLucene(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"fielda", sb.str(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); doc->add(newLucene(L"fieldb", sb.str(), Field::STORE_YES, Field::INDEX_NO)); sb << L" b" << n; for (int32_t i = 1; i < numFields; ++i) doc->add(newLucene(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED)); return doc; } static void createIndex(DirectoryPtr dir, bool multiSegment) { IndexWriter::unlock(dir); IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->setMergePolicy(newLucene(w)); for (int32_t i = 0; i < 100; ++i) { w->addDocument(createDocument(i, 4)); if (multiSegment && (i % 10) == 0) w->commit(); } if (!multiSegment) w->optimize(); w->close(); IndexReaderPtr r = IndexReader::open(dir, false); if (multiSegment) BOOST_CHECK(r->getSequentialSubReaders().size() > 1); else BOOST_CHECK_EQUAL(r->getSequentialSubReaders().size(), 1); r->close(); } static void _modifyIndex(int32_t i, DirectoryPtr dir) { switch (i) { case 0: { IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->deleteDocuments(newLucene(L"field2", L"a11")); w->deleteDocuments(newLucene(L"field2", L"b30")); w->close(); break; } case 1: { IndexReaderPtr reader = IndexReader::open(dir, false); reader->setNorm(4, L"field1", (uint8_t)123); reader->setNorm(44, L"field2", (uint8_t)222); reader->setNorm(44, L"field4", (uint8_t)22); reader->close(); break; } case 2: { IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->optimize(); w->close(); break; } case 3: { IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->addDocument(createDocument(101, 4)); w->optimize(); w->addDocument(createDocument(102, 4)); w->addDocument(createDocument(103, 4)); w->close(); break; } case 4: { IndexReaderPtr reader = IndexReader::open(dir, false); reader->setNorm(5, L"field1", (uint8_t)123); reader->setNorm(55, L"field2", (uint8_t)222); reader->close(); break; } case 5: { IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); w->addDocument(createDocument(101, 4)); w->close(); break; } } } static void checkIndexEquals(IndexReaderPtr index1, IndexReaderPtr index2) { BOOST_CHECK_EQUAL(index1->numDocs(), index2->numDocs()); BOOST_CHECK_EQUAL(index1->maxDoc(), index2->maxDoc()); BOOST_CHECK_EQUAL(index1->hasDeletions(), index2->hasDeletions()); BOOST_CHECK_EQUAL(index1->isOptimized(), index2->isOptimized()); // check field names HashSet _fields1 = index1->getFieldNames(IndexReader::FIELD_OPTION_ALL); Collection fields1 = Collection::newInstance(_fields1.begin(), _fields1.end()); std::sort(fields1.begin(), fields1.end()); HashSet _fields2 = index1->getFieldNames(IndexReader::FIELD_OPTION_ALL); Collection fields2 = Collection::newInstance(_fields2.begin(), _fields2.end()); std::sort(fields2.begin(), fields2.end()); BOOST_CHECK_EQUAL(fields1.size(), fields2.size()); for (int32_t i = 0; i < fields1.size(); ++i) BOOST_CHECK_EQUAL(fields1[i], fields2[i]); // check norms for (int32_t i = 0; i < fields1.size(); ++i) { String curField = fields1[i]; ByteArray norms1 = index1->norms(curField); ByteArray norms2 = index2->norms(curField); if (norms1 && norms2) { BOOST_CHECK(norms1.equals(norms2)); } else BOOST_CHECK(norms1 == norms2); } // check deletions for (int32_t i = 0; i < index1->maxDoc(); ++i) BOOST_CHECK_EQUAL(index1->isDeleted(i), index2->isDeleted(i)); // check stored fields for (int32_t i = 0; i < index1->maxDoc(); ++i) { if (!index1->isDeleted(i)) { DocumentPtr doc1 = index1->document(i); DocumentPtr doc2 = index2->document(i); Collection storedFields1 = doc1->getFields(); Collection storedFields2 = doc2->getFields(); BOOST_CHECK_EQUAL(storedFields1.size(), storedFields2.size()); for (int32_t j = 0; j < storedFields1.size(); ++j) { BOOST_CHECK_EQUAL(storedFields1[j]->name(), storedFields2[j]->name()); BOOST_CHECK_EQUAL(storedFields1[j]->stringValue(), storedFields2[j]->stringValue()); } } } // check dictionary and posting lists TermEnumPtr enum1 = index1->terms(); TermEnumPtr enum2 = index2->terms(); TermPositionsPtr tp1 = index1->termPositions(); TermPositionsPtr tp2 = index2->termPositions(); while (enum1->next()) { BOOST_CHECK(enum2->next()); BOOST_CHECK(enum1->term()->equals(enum2->term())); tp1->seek(enum1->term()); tp2->seek(enum1->term()); while (tp1->next()) { BOOST_CHECK(tp2->next()); BOOST_CHECK_EQUAL(tp1->doc(), tp2->doc()); BOOST_CHECK_EQUAL(tp1->freq(), tp2->freq()); for (int32_t i = 0; i < tp1->freq(); ++i) BOOST_CHECK_EQUAL(tp1->nextPosition(), tp2->nextPosition()); } } } static void checkReaderClosed(IndexReaderPtr reader, bool checkSubReaders, bool checkNormsClosed) { BOOST_CHECK_EQUAL(0, reader->getRefCount()); if (checkNormsClosed && MiscUtils::typeOf(reader)) BOOST_CHECK(boost::dynamic_pointer_cast(reader)->normsClosed()); if (checkSubReaders) { if (MiscUtils::typeOf(reader)) { Collection subReaders = reader->getSequentialSubReaders(); for (int32_t i = 0; i < subReaders.size(); ++i) checkReaderClosed(subReaders[i], checkSubReaders, checkNormsClosed); } if (MiscUtils::typeOf(reader)) { Collection subReaders = reader->getSequentialSubReaders(); for (int32_t i = 0; i < subReaders.size(); ++i) checkReaderClosed(subReaders[i], checkSubReaders, checkNormsClosed); } if (MiscUtils::typeOf(reader)) { Collection subReaders = boost::dynamic_pointer_cast(reader)->getSubReaders(); for (int32_t i = 0; i < subReaders.size(); ++i) checkReaderClosed(subReaders[i], checkSubReaders, checkNormsClosed); } } } static TestReopen::ReaderCouplePtr refreshReader(IndexReaderPtr reader, TestReopen::TestableReopenPtr test, int32_t modify, bool hasChanges) { static SynchronizePtr createReaderMutex = newInstance(); SyncLock readersLock(createReaderMutex); IndexReaderPtr r; if (test) { test->modifyIndex(modify); r = test->openReader(); } IndexReaderPtr refreshed; LuceneException finally; try { refreshed = reader->reopen(); } catch (LuceneException& e) { finally = e; } if (!refreshed && r) { // Hit exception - close opened reader r->close(); } finally.throwException(); if (hasChanges) { if (refreshed == reader) BOOST_FAIL("No new IndexReader instance created during refresh."); } else { if (refreshed != reader) BOOST_FAIL("New IndexReader instance created during refresh even though index had no changes."); } return newInstance(r, refreshed); } static TestReopen::ReaderCouplePtr refreshReader(IndexReaderPtr reader, bool hasChanges) { return refreshReader(reader, TestReopen::TestableReopenPtr(), -1, hasChanges); } static void performDefaultTests(TestReopen::TestableReopenPtr test) { IndexReaderPtr index1 = test->openReader(); IndexReaderPtr index2 = test->openReader(); checkIndexEquals(index1, index2); // verify that reopen() does not return a new reader instance in case the index has no changes TestReopen::ReaderCouplePtr couple = refreshReader(index2, false); BOOST_CHECK_EQUAL(couple->refreshedReader, index2); couple = refreshReader(index2, test, 0, true); index1->close(); index1 = couple->newReader; IndexReaderPtr index2_refreshed = couple->refreshedReader; index2->close(); // test if refreshed reader and newly opened reader return equal results checkIndexEquals(index1, index2_refreshed); index2_refreshed->close(); checkReaderClosed(index2, true, true); checkReaderClosed(index2_refreshed, true, true); index2 = test->openReader(); for (int32_t i = 1; i < 4; ++i) { index1->close(); couple = refreshReader(index2, test, i, true); // refresh IndexReader index2->close(); index2 = couple->refreshedReader; index1 = couple->newReader; checkIndexEquals(index1, index2); } index1->close(); index2->close(); checkReaderClosed(index1, true, true); checkReaderClosed(index2, true, true); } static void performTestsWithExceptionInReopen(TestReopen::TestableReopenPtr test) { IndexReaderPtr index1 = test->openReader(); IndexReaderPtr index2 = test->openReader(); checkIndexEquals(index1, index2); BOOST_CHECK_EXCEPTION(refreshReader(index1, test, 0, true), LuceneException, check_exception(LuceneException::Null)); // index2 should still be usable and unaffected by the failed reopen() call checkIndexEquals(index1, index2); index1->close(); index2->close(); } static void checkRefCountEquals(int32_t refCount, IndexReaderPtr reader) { BOOST_CHECK_EQUAL(refCount, reader->getRefCount()); } namespace TestReopen { class BasicReopen : public TestableReopen { public: BasicReopen(DirectoryPtr dir) { this->dir = dir; } virtual ~BasicReopen() { } protected: DirectoryPtr dir; public: virtual IndexReaderPtr openReader() { return IndexReader::open(dir, false); } virtual void modifyIndex(int32_t i) { _modifyIndex(i, dir); } }; } BOOST_AUTO_TEST_CASE(testReopen) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); performDefaultTests(newInstance(dir1)); dir1->close(); DirectoryPtr dir2 = newLucene(); createIndex(dir2, true); performDefaultTests(newInstance(dir2)); dir2->close(); } namespace TestParallelReaderReopen { class FirstReopen : public TestReopen::TestableReopen { public: FirstReopen(DirectoryPtr dir1, DirectoryPtr dir2) { this->dir1 = dir1; this->dir2 = dir2; } virtual ~FirstReopen() { } protected: DirectoryPtr dir1; DirectoryPtr dir2; public: virtual IndexReaderPtr openReader() { ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); return pr; } virtual void modifyIndex(int32_t i) { _modifyIndex(i, dir1); _modifyIndex(i, dir2); } }; class SecondReopen : public TestReopen::TestableReopen { public: SecondReopen(DirectoryPtr dir3, DirectoryPtr dir4) { this->dir3 = dir3; this->dir4 = dir4; } virtual ~SecondReopen() { } protected: DirectoryPtr dir3; DirectoryPtr dir4; public: virtual IndexReaderPtr openReader() { ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir3, false)); pr->add(IndexReader::open(dir4, false)); // Does not implement reopen, so hits exception pr->add(newLucene(IndexReader::open(dir3, false))); return pr; } virtual void modifyIndex(int32_t i) { _modifyIndex(i, dir3); _modifyIndex(i, dir4); } }; } BOOST_AUTO_TEST_CASE(testParallelReaderReopen) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); DirectoryPtr dir2 = newLucene(); createIndex(dir2, true); performDefaultTests(newInstance(dir1, dir2)); dir1->close(); dir2->close(); DirectoryPtr dir3 = newLucene(); createIndex(dir3, true); DirectoryPtr dir4 = newLucene(); createIndex(dir4, true); performTestsWithExceptionInReopen(newInstance(dir3, dir4)); dir3->close(); dir4->close(); } static void doTestReopenWithCommit(DirectoryPtr dir, bool withReopen) { IndexWriterPtr iwriter = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); iwriter->setMergeScheduler(newLucene()); IndexReaderPtr reader = IndexReader::open(dir, false); LuceneException finally; try { int32_t M = 3; for (int32_t i = 0; i < 4; ++i) { for (int32_t j = 0; j < M; ++j) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i) + L"_" + StringUtils::toString(j), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"id2", StringUtils::toString(i) + L"_" + StringUtils::toString(j), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); doc->add(newLucene(L"id3", StringUtils::toString(i) + L"_" + StringUtils::toString(j), Field::STORE_YES, Field::INDEX_NO)); iwriter->addDocument(doc); if (i > 0) { int32_t k = i - 1; int32_t n = j + k * M; DocumentPtr prevItereationDoc = reader->document(n); BOOST_CHECK(prevItereationDoc); String id = prevItereationDoc->get(L"id"); BOOST_CHECK_EQUAL(StringUtils::toString(k) + L"_" + StringUtils::toString(j), id); } } iwriter->commit(); if (withReopen) { // reopen IndexReaderPtr r2 = reader->reopen(); if (reader != r2) { reader->close(); reader = r2; } } else { // recreate reader->close(); reader = IndexReader::open(dir, false); } } } catch (LuceneException& e) { finally = e; } iwriter->close(); reader->close(); finally.throwException(); } /// IndexWriter.commit() does not update the index version populate an index in iterations. /// At the end of every iteration, commit the index and reopen/recreate the reader. /// In each iteration verify the work of previous iteration. /// Try this once with reopen once recreate, on both RAMDir and FSDir. BOOST_AUTO_TEST_CASE(testCommitReopenFS) { String indexDir(FileUtils::joinPath(getTempDir(), L"IndexReaderReopen")); DirectoryPtr dir = FSDirectory::open(indexDir); doTestReopenWithCommit(dir, true); dir->close(); } BOOST_AUTO_TEST_CASE(testCommitRecreateFS) { String indexDir(FileUtils::joinPath(getTempDir(), L"IndexReaderReopen")); DirectoryPtr dir = FSDirectory::open(indexDir); doTestReopenWithCommit(dir, false); dir->close(); } BOOST_AUTO_TEST_CASE(testCommitRecreateRAM) { DirectoryPtr dir = newLucene(); doTestReopenWithCommit(dir, false); dir->close(); } namespace TestMultiReaderReopen { class FirstReopen : public TestReopen::TestableReopen { public: FirstReopen(DirectoryPtr dir1, DirectoryPtr dir2) { this->dir1 = dir1; this->dir2 = dir2; } protected: DirectoryPtr dir1; DirectoryPtr dir2; public: virtual IndexReaderPtr openReader() { Collection readers = newCollection(IndexReader::open(dir1, false), IndexReader::open(dir2, false)); return newLucene(readers); } virtual void modifyIndex(int32_t i) { _modifyIndex(i, dir1); _modifyIndex(i, dir2); } }; class SecondReopen : public TestReopen::TestableReopen { public: SecondReopen(DirectoryPtr dir3, DirectoryPtr dir4) { this->dir3 = dir3; this->dir4 = dir4; } protected: DirectoryPtr dir3; DirectoryPtr dir4; public: virtual IndexReaderPtr openReader() { Collection readers = Collection::newInstance(3); readers[0] = IndexReader::open(dir3, false); readers[1] = IndexReader::open(dir4, false); // Does not implement reopen, so hits exception readers[2] = newLucene(IndexReader::open(dir3, false)); return newLucene(readers); } virtual void modifyIndex(int32_t i) { _modifyIndex(i, dir3); _modifyIndex(i, dir4); } }; } BOOST_AUTO_TEST_CASE(testMultiReaderReopen) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); DirectoryPtr dir2 = newLucene(); createIndex(dir2, true); performDefaultTests(newInstance(dir1, dir2)); dir1->close(); dir2->close(); DirectoryPtr dir3 = newLucene(); createIndex(dir3, true); DirectoryPtr dir4 = newLucene(); createIndex(dir4, true); performTestsWithExceptionInReopen(newInstance(dir3, dir4)); dir3->close(); dir4->close(); } namespace TestMixedReaders { class MixedReopen : public TestReopen::TestableReopen { public: MixedReopen(DirectoryPtr dir1, DirectoryPtr dir2, DirectoryPtr dir3, DirectoryPtr dir4, DirectoryPtr dir5) { this->dir1 = dir1; this->dir2 = dir2; this->dir3 = dir3; this->dir4 = dir4; this->dir5 = dir5; } protected: DirectoryPtr dir1; DirectoryPtr dir2; DirectoryPtr dir3; DirectoryPtr dir4; DirectoryPtr dir5; public: virtual IndexReaderPtr openReader() { ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); Collection readers = newCollection(IndexReader::open(dir3, false), IndexReader::open(dir4, false)); MultiReaderPtr mr = newLucene(readers); Collection mixedReaders = newCollection(pr, mr, IndexReader::open(dir5, false)); return newLucene(mixedReaders); } virtual void modifyIndex(int32_t i) { // only change norms in this index to maintain the same number of docs // for each of ParallelReader's subreaders if (i == 1) _modifyIndex(i, dir1); _modifyIndex(i, dir4); _modifyIndex(i, dir5); } }; } BOOST_AUTO_TEST_CASE(testMixedReaders) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); DirectoryPtr dir2 = newLucene(); createIndex(dir2, true); DirectoryPtr dir3 = newLucene(); createIndex(dir3, false); DirectoryPtr dir4 = newLucene(); createIndex(dir4, true); DirectoryPtr dir5 = newLucene(); createIndex(dir5, false); performDefaultTests(newInstance(dir1, dir2, dir3, dir4, dir5)); dir1->close(); dir2->close(); dir3->close(); dir4->close(); dir5->close(); } BOOST_AUTO_TEST_CASE(testReferenceCounting) { for (int32_t mode = 0; mode < 4; ++mode) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, true); IndexReaderPtr reader0 = IndexReader::open(dir1, false); checkRefCountEquals(1, reader0); BOOST_CHECK(MiscUtils::typeOf(reader0)); Collection subReaders0 = reader0->getSequentialSubReaders(); for (int32_t i = 0; i < subReaders0.size(); ++i) checkRefCountEquals(1, subReaders0[i]); // delete first document, so that only one of the subReaders have to be re-opened IndexReaderPtr modifier = IndexReader::open(dir1, false); modifier->deleteDocument(0); modifier->close(); IndexReaderPtr reader1 = refreshReader(reader0, true)->refreshedReader; BOOST_CHECK(MiscUtils::typeOf(reader1)); Collection subReaders1 = reader1->getSequentialSubReaders(); BOOST_CHECK_EQUAL(subReaders0.size(), subReaders1.size()); for (int32_t i = 0; i < subReaders0.size(); ++i) { if (subReaders0[i] != subReaders1[i]) { checkRefCountEquals(1, subReaders0[i]); checkRefCountEquals(1, subReaders1[i]); } else checkRefCountEquals(2, subReaders0[i]); } // delete first document, so that only one of the subReaders have to be re-opened modifier = IndexReader::open(dir1, false); modifier->deleteDocument(1); modifier->close(); IndexReaderPtr reader2 = refreshReader(reader1, true)->refreshedReader; BOOST_CHECK(MiscUtils::typeOf(reader2)); Collection subReaders2 = reader2->getSequentialSubReaders(); BOOST_CHECK_EQUAL(subReaders1.size(), subReaders2.size()); for (int32_t i = 0; i < subReaders2.size(); ++i) { if (subReaders2[i] == subReaders1[i]) { if (subReaders1[i] == subReaders0[i]) checkRefCountEquals(3, subReaders2[i]); else checkRefCountEquals(2, subReaders2[i]); } else { checkRefCountEquals(1, subReaders2[i]); if (subReaders0[i] == subReaders1[i]) { checkRefCountEquals(2, subReaders2[i]); checkRefCountEquals(2, subReaders0[i]); } else { checkRefCountEquals(1, subReaders0[i]); checkRefCountEquals(1, subReaders1[i]); } } } IndexReaderPtr reader3 = refreshReader(reader0, true)->refreshedReader; BOOST_CHECK(MiscUtils::typeOf(reader3)); Collection subReaders3 = reader3->getSequentialSubReaders(); BOOST_CHECK_EQUAL(subReaders3.size(), subReaders0.size()); // try some permutations switch (mode) { case 0: reader0->close(); reader1->close(); reader2->close(); reader3->close(); break; case 1: reader3->close(); reader2->close(); reader1->close(); reader0->close(); break; case 2: reader2->close(); reader3->close(); reader0->close(); reader1->close(); break; case 3: reader1->close(); reader3->close(); reader2->close(); reader0->close(); break; } checkReaderClosed(reader0, true, true); checkReaderClosed(reader1, true, true); checkReaderClosed(reader2, true, true); checkReaderClosed(reader3, true, true); dir1->close(); } } BOOST_AUTO_TEST_CASE(testReferenceCountingMultiReader) { for (int32_t mode = 0; mode <= 1; ++mode) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); DirectoryPtr dir2 = newLucene(); createIndex(dir2, true); IndexReaderPtr reader1 = IndexReader::open(dir1, false); checkRefCountEquals(1, reader1); IndexReaderPtr initReader2 = IndexReader::open(dir2, false); Collection readers = newCollection(reader1, initReader2); IndexReaderPtr multiReader1 = newLucene(readers, (mode == 0)); _modifyIndex(0, dir2); checkRefCountEquals(1 + mode, reader1); IndexReaderPtr multiReader2 = multiReader1->reopen(); // index1 hasn't changed, so multiReader2 should share reader1 now with multiReader1 checkRefCountEquals(2 + mode, reader1); _modifyIndex(0, dir1); IndexReaderPtr reader2 = reader1->reopen(); checkRefCountEquals(2 + mode, reader1); if (mode == 1) initReader2->close(); _modifyIndex(1, dir1); IndexReaderPtr reader3 = reader2->reopen(); checkRefCountEquals(2 + mode, reader1); checkRefCountEquals(1, reader2); multiReader1->close(); checkRefCountEquals(1 + mode, reader1); multiReader1->close(); checkRefCountEquals(1 + mode, reader1); if (mode == 1) initReader2->close(); reader1->close(); checkRefCountEquals(1, reader1); multiReader2->close(); checkRefCountEquals(0, reader1); multiReader2->close(); checkRefCountEquals(0, reader1); reader3->close(); checkRefCountEquals(0, reader1); checkReaderClosed(reader1, true, false); reader2->close(); checkRefCountEquals(0, reader1); checkReaderClosed(reader1, true, false); reader2->close(); checkRefCountEquals(0, reader1); reader3->close(); checkRefCountEquals(0, reader1); checkReaderClosed(reader1, true, true); dir1->close(); dir2->close(); } } BOOST_AUTO_TEST_CASE(testReferenceCountingParallelReader) { for (int32_t mode = 0; mode <= 1; ++mode) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); DirectoryPtr dir2 = newLucene(); createIndex(dir2, true); IndexReaderPtr reader1 = IndexReader::open(dir1, false); checkRefCountEquals(1, reader1); ParallelReaderPtr parallelReader1 = newLucene(mode == 0); parallelReader1->add(reader1); IndexReaderPtr initReader2 = IndexReader::open(dir2, false); parallelReader1->add(initReader2); _modifyIndex(1, dir2); checkRefCountEquals(1 + mode, reader1); IndexReaderPtr parallelReader2 = parallelReader1->reopen(); // index1 hasn't changed, so parallelReader2 should share reader1 now with parallelReader2 checkRefCountEquals(2 + mode, reader1); _modifyIndex(0, dir1); _modifyIndex(0, dir2); IndexReaderPtr reader2 = reader1->reopen(); checkRefCountEquals(2 + mode, reader1); if (mode == 1) initReader2->close(); _modifyIndex(4, dir1); IndexReaderPtr reader3 = reader2->reopen(); checkRefCountEquals(2 + mode, reader1); checkRefCountEquals(1, reader2); parallelReader1->close(); checkRefCountEquals(1 + mode, reader1); parallelReader1->close(); checkRefCountEquals(1 + mode, reader1); if (mode == 1) initReader2->close(); reader1->close(); checkRefCountEquals(1, reader1); parallelReader2->close(); checkRefCountEquals(0, reader1); parallelReader2->close(); checkRefCountEquals(0, reader1); reader3->close(); checkRefCountEquals(0, reader1); checkReaderClosed(reader1, true, false); reader2->close(); checkRefCountEquals(0, reader1); checkReaderClosed(reader1, true, false); reader2->close(); checkRefCountEquals(0, reader1); reader3->close(); checkRefCountEquals(0, reader1); checkReaderClosed(reader1, true, true); dir1->close(); dir2->close(); } } BOOST_AUTO_TEST_CASE(testNormsRefCounting) { DirectoryPtr dir1 = newLucene(); createIndex(dir1, false); IndexReaderPtr reader1 = IndexReader::open(dir1, false); SegmentReaderPtr segmentReader1 = SegmentReader::getOnlySegmentReader(reader1); IndexReaderPtr modifier = IndexReader::open(dir1, false); modifier->deleteDocument(0); modifier->close(); IndexReaderPtr reader2 = reader1->reopen(); modifier = IndexReader::open(dir1, false); modifier->setNorm(1, L"field1", (uint8_t)50); modifier->setNorm(1, L"field2", (uint8_t)50); modifier->close(); IndexReaderPtr reader3 = reader2->reopen(); SegmentReaderPtr segmentReader3 = SegmentReader::getOnlySegmentReader(reader3); modifier = IndexReader::open(dir1, false); modifier->deleteDocument(2); modifier->close(); IndexReaderPtr reader4 = reader3->reopen(); modifier = IndexReader::open(dir1, false); modifier->deleteDocument(3); modifier->close(); IndexReaderPtr reader5 = reader3->reopen(); // Now reader2-reader5 references reader1. reader1 and reader2 // share the same norms. reader3, reader4, reader5 also share norms. checkRefCountEquals(1, reader1); BOOST_CHECK(!segmentReader1->normsClosed()); reader1->close(); checkRefCountEquals(0, reader1); BOOST_CHECK(!segmentReader1->normsClosed()); reader2->close(); checkRefCountEquals(0, reader1); // now the norms for field1 and field2 should be closed BOOST_CHECK(segmentReader1->normsClosed(L"field1")); BOOST_CHECK(segmentReader1->normsClosed(L"field2")); // but the norms for field3 and field4 should still be open BOOST_CHECK(!segmentReader1->normsClosed(L"field3")); BOOST_CHECK(!segmentReader1->normsClosed(L"field4")); reader3->close(); checkRefCountEquals(0, reader1); BOOST_CHECK(!segmentReader3->normsClosed()); reader5->close(); checkRefCountEquals(0, reader1); BOOST_CHECK(!segmentReader3->normsClosed()); reader4->close(); checkRefCountEquals(0, reader1); // and now all norms that reader1 used should be closed BOOST_CHECK(segmentReader1->normsClosed()); // now that reader3, reader4 and reader5 are closed, the norms that those three // readers shared should be closed as well BOOST_CHECK(segmentReader3->normsClosed()); dir1->close(); } namespace TestReopen { class ThreadReopen : public TestableReopen { public: ThreadReopen(DirectoryPtr dir, int32_t n) { this->dir = dir; this->n = n; } protected: DirectoryPtr dir; int32_t n; public: virtual IndexReaderPtr openReader() { return IndexReader::open(dir, false); } virtual void modifyIndex(int32_t i) { if (i % 3 == 0) { IndexReaderPtr modifier = IndexReader::open(dir, false); modifier->setNorm(i, L"field1", (uint8_t)50); modifier->close(); } else if (i % 3 == 1) { IndexReaderPtr modifier = IndexReader::open(dir, false); modifier->deleteDocument(i % modifier->maxDoc()); modifier->close(); } else { IndexWriterPtr modifier = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); modifier->addDocument(createDocument(n + i, 6)); modifier->close(); } } }; class FirstThreadTask : public ReaderThreadTask { public: FirstThreadTask(IndexReaderPtr r, TestableReopenPtr test, int32_t index, HashSet readersToClose, Collection readers) { this->r = r; this->test = test; this->index = index; this->readersToClose = readersToClose; this->readers = readers; this->rnd = newLucene(); } virtual ~FirstThreadTask() { } protected: IndexReaderPtr r; TestableReopenPtr test; int32_t index; HashSet readersToClose; Collection readers; RandomPtr rnd; public: virtual void run() { while (!stopped) { if (index % 2 == 0) { // refresh reader synchronized ReaderCouplePtr c = refreshReader(r, test, index, true); { SyncLock readersLock(&readersToClose); readersToClose.add(c->newReader); readersToClose.add(c->refreshedReader); } { SyncLock readersLock(&readers); readers.add(c); } // prevent too many readers break; } else { // not synchronized IndexReaderPtr refreshed = r->reopen(); IndexSearcherPtr searcher = newLucene(refreshed); Collection hits = searcher->search(newLucene(newLucene(L"field1", L"a" + StringUtils::toString(rnd->nextInt(refreshed->maxDoc())))), FilterPtr(), 1000)->scoreDocs; if (!hits.empty()) searcher->doc(hits[0]->doc); // r might have changed because this is not a synchronized method. However we don't want to make it synchronized to test // thread-safety of IndexReader.close(). That's why we add refreshed also to readersToClose, because double closing is fine. if (refreshed != r) refreshed->close(); { SyncLock readersLock(&readersToClose); readersToClose.add(refreshed); } } LuceneThread::threadSleep(1000); } } }; class SecondThreadTask : public ReaderThreadTask { public: SecondThreadTask(IndexReaderPtr r, TestableReopenPtr test, int32_t index, HashSet readersToClose, Collection readers) { this->r = r; this->test = test; this->index = index; this->readersToClose = readersToClose; this->readers = readers; this->rnd = newLucene(); } virtual ~SecondThreadTask() { } protected: IndexReaderPtr r; TestableReopenPtr test; int32_t index; HashSet readersToClose; Collection readers; RandomPtr rnd; public: virtual void run() { while (!stopped) { int32_t numReaders = 0; ReaderCouplePtr c; { SyncLock readersLock(&readers); numReaders = readers.size(); if (numReaders > 0) c = readers[rnd->nextInt(numReaders)]; } if (c) { static SynchronizePtr checkIndexMutex = newInstance(); SyncLock readersLock(checkIndexMutex); checkIndexEquals(c->newReader, c->refreshedReader); } LuceneThread::threadSleep(100); } } }; } BOOST_AUTO_TEST_CASE(testThreadSafety) { DirectoryPtr dir = newLucene(); int32_t n = 150; IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < n; ++i) writer->addDocument(createDocument(i, 3)); writer->optimize(); writer->close(); TestReopen::TestableReopenPtr test = newInstance(dir, n); Collection readers = Collection::newInstance(); IndexReaderPtr firstReader = IndexReader::open(dir, false); IndexReaderPtr reader = firstReader; RandomPtr rnd = newLucene(); Collection threads = Collection::newInstance(n); HashSet readersToClose = HashSet::newInstance(); for (int32_t i = 0; i < n; ++i) { if (i % 10 == 0) { IndexReaderPtr refreshed = reader->reopen(); if (refreshed != reader) { SyncLock readersLock(&readersToClose); readersToClose.add(reader); } reader = refreshed; } IndexReaderPtr r = reader; int32_t index = i; TestReopen::ReaderThreadTaskPtr task; if (i < 20 ||( i >= 50 && i < 70) || i > 90) task = newLucene(r, test, index, readersToClose, readers); else task = newLucene(r, test, index, readersToClose, readers); threads[i] = newLucene(task); threads[i]->start(); } LuceneThread::threadSleep(15000); for (int32_t i = 0; i < n; ++i) { if (threads[i]) threads[i]->stopThread(); } for (int32_t i = 0; i < n; ++i) { if (threads[i]) threads[i]->join(); } { SyncLock readersLock(&readersToClose); for (HashSet::iterator reader = readersToClose.begin(); reader != readersToClose.end(); ++reader) (*reader)->close(); } firstReader->close(); reader->close(); { SyncLock readersLock(&readersToClose); for (HashSet::iterator reader = readersToClose.begin(); reader != readersToClose.end(); ++reader) checkReaderClosed(*reader, true, true); } checkReaderClosed(reader, true, true); checkReaderClosed(firstReader, true, true); dir->close(); } BOOST_AUTO_TEST_CASE(testCloseOrig) { DirectoryPtr dir = newLucene(); createIndex(dir, false); IndexReaderPtr r1 = IndexReader::open(dir, false); IndexReaderPtr r2 = IndexReader::open(dir, false); r2->deleteDocument(0); r2->close(); IndexReaderPtr r3 = r1->reopen(); BOOST_CHECK_NE(r1, r3); r1->close(); BOOST_CHECK_EXCEPTION(r1->document(2), AlreadyClosedException, check_exception(LuceneException::AlreadyClosed)); r3->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testDeletes) { DirectoryPtr dir = newLucene(); createIndex(dir, false); // Create an index with a bunch of docs (1 segment) _modifyIndex(0, dir); // Get delete bitVector on 1st segment _modifyIndex(5, dir); // Add a doc (2 segments) IndexReaderPtr r1 = IndexReader::open(dir, false); _modifyIndex(5, dir); // Add another doc (3 segments) IndexReaderPtr r2 = r1->reopen(); BOOST_CHECK_NE(r1, r2); // Get SRs for the first segment from original SegmentReaderPtr sr1 = boost::dynamic_pointer_cast(r1->getSequentialSubReaders()[0]); // and reopened IRs SegmentReaderPtr sr2 = boost::dynamic_pointer_cast(r2->getSequentialSubReaders()[0]); // At this point they share the same BitVector BOOST_CHECK_EQUAL(sr1->deletedDocs, sr2->deletedDocs); r2->deleteDocument(0); // r1 should not see the delete BOOST_CHECK(!r1->isDeleted(0)); // Now r2 should have made a private copy of deleted docs: BOOST_CHECK_NE(sr1->deletedDocs, sr2->deletedDocs); r1->close(); r2->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testDeletes2) { DirectoryPtr dir = newLucene(); createIndex(dir, false); // Get delete bitVector _modifyIndex(0, dir); IndexReaderPtr r1 = IndexReader::open(dir, false); // Add doc _modifyIndex(5, dir); IndexReaderPtr r2 = r1->reopen(); BOOST_CHECK_NE(r1, r2); Collection rs2 = r2->getSequentialSubReaders(); SegmentReaderPtr sr1 = SegmentReader::getOnlySegmentReader(r1); SegmentReaderPtr sr2 = boost::dynamic_pointer_cast(rs2[0]); // At this point they share the same BitVector BOOST_CHECK_EQUAL(sr1->deletedDocs, sr2->deletedDocs); BitVectorPtr delDocs = sr1->deletedDocs; r1->close(); r2->deleteDocument(0); BOOST_CHECK_EQUAL(delDocs, sr2->deletedDocs); r2->close(); dir->close(); } namespace TestReopen { class KeepAllCommits : public IndexDeletionPolicy { public: virtual ~KeepAllCommits() { } LUCENE_CLASS(KeepAllCommits); public: virtual void onInit(Collection commits) { } virtual void onCommit(Collection commits) { } }; } BOOST_AUTO_TEST_CASE(testReopenOnCommit) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), (IndexDeletionPolicyPtr)newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < 4; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); MapStringString data = MapStringString::newInstance(); data.put(L"index", StringUtils::toString(i)); writer->commit(data); } for (int32_t i = 0; i < 4; ++i) { writer->deleteDocuments(newLucene(L"id", StringUtils::toString(i))); MapStringString data = MapStringString::newInstance(); data.put(L"index", StringUtils::toString(4 + i)); writer->commit(data); } writer->close(); IndexReaderPtr r = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(0, r->numDocs()); BOOST_CHECK_EQUAL(4, r->maxDoc()); Collection commits = IndexReader::listCommits(dir); for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { IndexReaderPtr r2 = r->reopen(*commit); BOOST_CHECK_NE(r2, r); BOOST_CHECK_EXCEPTION(r2->deleteDocument(0), UnsupportedOperationException, check_exception(LuceneException::UnsupportedOperation)); MapStringString s = (*commit)->getUserData(); int32_t v = s.empty() ? -1 : StringUtils::toInt(s.get(L"index")); if (v < 4) BOOST_CHECK_EQUAL(1 + v, r2->numDocs()); else BOOST_CHECK_EQUAL(7 - v, r2->numDocs()); r->close(); r = r2; } r->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexReaderTest.cpp000066400000000000000000001641051217574114600237350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "Document.h" #include "Field.h" #include "DateTools.h" #include "FileReader.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "WhitespaceAnalyzer.h" #include "IndexReader.h" #include "SegmentInfos.h" #include "IndexCommit.h" #include "FieldSortedTermVectorMapper.h" #include "TermVectorEntryFreqSortedComparator.h" #include "Term.h" #include "TermDocs.h" #include "SetBasedFieldSelector.h" #include "FieldSelector.h" #include "FSDirectory.h" #include "IndexFileDeleter.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "TermQuery.h" #include "SegmentReader.h" #include "FieldCache.h" #include "ReadOnlyDirectoryReader.h" #include "ReadOnlySegmentReader.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexReaderTest, LuceneTestFixture) static void addDocumentWithFields(IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"keyword", L"test1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"text", L"test1", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"unindexed", L"test1", Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"unstored", L"test1", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } static void addDocumentWithDifferentFields(IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"keyword2", L"test1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"text2", L"test1", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"unindexed2", L"test1", Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"unstored2", L"test1", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } static void addDocumentWithTermVectorFields(IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"tvnot", L"tvnot", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO)); doc->add(newLucene(L"termvector", L"termvector", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); doc->add(newLucene(L"tvoffset", L"tvoffset", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_OFFSETS)); doc->add(newLucene(L"tvposition", L"tvposition", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS)); doc->add(newLucene(L"tvpositionoffset", L"tvpositionoffset", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); } static void addDoc(IndexWriterPtr writer, const String& value) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", value, Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } static void checkTermDocsCount(IndexReaderPtr reader, TermPtr term, int32_t expected) { TermDocsPtr tdocs; LuceneException finally; try { tdocs = reader->termDocs(term); BOOST_CHECK(tdocs); int32_t count = 0; while (tdocs->next()) ++count; BOOST_CHECK_EQUAL(expected, count); } catch (LuceneException& e) { finally = e; } if (tdocs) tdocs->close(); finally.throwException(); } static DirectoryPtr getDirectory() { return FSDirectory::open(FileUtils::joinPath(getTempDir(), L"testIndex")); } static DocumentPtr createDocument(const String& id) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", id, Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); return doc; } BOOST_AUTO_TEST_CASE(testCommitUserData) { RAMDirectoryPtr d = newLucene(); MapStringString commitUserData = MapStringString::newInstance(); commitUserData.put(L"foo", L"fighters"); // set up writer IndexWriterPtr writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < 27; ++i) addDocumentWithFields(writer); writer->close(); IndexReaderPtr r = IndexReader::open(d, false); r->deleteDocument(5); r->flush(commitUserData); r->close(); SegmentInfosPtr sis = newLucene(); sis->read(d); IndexReaderPtr r2 = IndexReader::open(d, false); IndexCommitPtr c = r->getIndexCommit(); MapStringString expectedData = c->getUserData(); BOOST_CHECK_EQUAL(expectedData.size(), commitUserData.size()); for (MapStringString::iterator expected = expectedData.begin(); expected != expectedData.end(); ++expected) BOOST_CHECK(commitUserData.find(expected->first) != commitUserData.end()); for (MapStringString::iterator commit = commitUserData.begin(); commit != commitUserData.end(); ++commit) BOOST_CHECK(expectedData.find(commit->first) != expectedData.end()); BOOST_CHECK_EQUAL(sis->getCurrentSegmentFileName(), c->getSegmentsFileName()); BOOST_CHECK(c->equals(r->getIndexCommit())); // Change the index writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < 7; ++i) addDocumentWithFields(writer); writer->close(); IndexReaderPtr r3 = r2->reopen(); BOOST_CHECK(!c->equals(r3->getIndexCommit())); BOOST_CHECK(!r2->getIndexCommit()->isOptimized()); r3->close(); writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); writer->close(); r3 = r2->reopen(); BOOST_CHECK(r3->getIndexCommit()->isOptimized()); r2->close(); r3->close(); d->close(); } BOOST_AUTO_TEST_CASE(testIsCurrent) { RAMDirectoryPtr d = newLucene(); IndexWriterPtr writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); writer->close(); // set up reader IndexReaderPtr reader = IndexReader::open(d, false); BOOST_CHECK(reader->isCurrent()); // modify index by adding another document writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); writer->close(); BOOST_CHECK(!reader->isCurrent()); // re-create index writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); writer->close(); BOOST_CHECK(!reader->isCurrent()); reader->close(); d->close(); } BOOST_AUTO_TEST_CASE(testGetFieldNames) { RAMDirectoryPtr d = newLucene(); // set up writer IndexWriterPtr writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); writer->close(); // set up reader IndexReaderPtr reader = IndexReader::open(d, false); BOOST_CHECK(reader->isCurrent()); HashSet fieldNames = reader->getFieldNames(IndexReader::FIELD_OPTION_ALL); BOOST_CHECK(fieldNames.contains(L"keyword")); BOOST_CHECK(fieldNames.contains(L"text")); BOOST_CHECK(fieldNames.contains(L"unindexed")); BOOST_CHECK(fieldNames.contains(L"unstored")); reader->close(); // add more documents writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); // want to get some more segments here for (int32_t i = 0; i < 5 * writer->getMergeFactor(); ++i) addDocumentWithFields(writer); // new fields are in some different segments (we hope) for (int32_t i = 0; i < 5 * writer->getMergeFactor(); ++i) addDocumentWithDifferentFields(writer); // new termvector fields for (int32_t i = 0; i < 5 * writer->getMergeFactor(); ++i) addDocumentWithTermVectorFields(writer); writer->close(); // verify fields again reader = IndexReader::open(d, false); fieldNames = reader->getFieldNames(IndexReader::FIELD_OPTION_ALL); BOOST_CHECK_EQUAL(13, fieldNames.size()); // the following fields BOOST_CHECK(fieldNames.contains(L"keyword")); BOOST_CHECK(fieldNames.contains(L"text")); BOOST_CHECK(fieldNames.contains(L"unindexed")); BOOST_CHECK(fieldNames.contains(L"unstored")); BOOST_CHECK(fieldNames.contains(L"keyword2")); BOOST_CHECK(fieldNames.contains(L"text2")); BOOST_CHECK(fieldNames.contains(L"unindexed2")); BOOST_CHECK(fieldNames.contains(L"unstored2")); BOOST_CHECK(fieldNames.contains(L"tvnot")); BOOST_CHECK(fieldNames.contains(L"termvector")); BOOST_CHECK(fieldNames.contains(L"tvposition")); BOOST_CHECK(fieldNames.contains(L"tvoffset")); BOOST_CHECK(fieldNames.contains(L"tvpositionoffset")); // verify that only indexed fields were returned fieldNames = reader->getFieldNames(IndexReader::FIELD_OPTION_INDEXED); BOOST_CHECK_EQUAL(11, fieldNames.size()); // 6 original + the 5 termvector fields BOOST_CHECK(fieldNames.contains(L"keyword")); BOOST_CHECK(fieldNames.contains(L"text")); BOOST_CHECK(fieldNames.contains(L"unstored")); BOOST_CHECK(fieldNames.contains(L"keyword2")); BOOST_CHECK(fieldNames.contains(L"text2")); BOOST_CHECK(fieldNames.contains(L"unstored2")); BOOST_CHECK(fieldNames.contains(L"tvnot")); BOOST_CHECK(fieldNames.contains(L"termvector")); BOOST_CHECK(fieldNames.contains(L"tvposition")); BOOST_CHECK(fieldNames.contains(L"tvoffset")); BOOST_CHECK(fieldNames.contains(L"tvpositionoffset")); // verify that only unindexed fields were returned fieldNames = reader->getFieldNames(IndexReader::FIELD_OPTION_UNINDEXED); BOOST_CHECK_EQUAL(2, fieldNames.size()); // the following fields BOOST_CHECK(fieldNames.contains(L"unindexed")); BOOST_CHECK(fieldNames.contains(L"unindexed2")); // verify index term vector fields fieldNames = reader->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR); BOOST_CHECK_EQUAL(1, fieldNames.size()); // 1 field has term vector only BOOST_CHECK(fieldNames.contains(L"termvector")); fieldNames = reader->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION); BOOST_CHECK_EQUAL(1, fieldNames.size()); // 4 fields are indexed with term vectors BOOST_CHECK(fieldNames.contains(L"tvposition")); fieldNames = reader->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_OFFSET); BOOST_CHECK_EQUAL(1, fieldNames.size()); // 4 fields are indexed with term vectors BOOST_CHECK(fieldNames.contains(L"tvoffset")); fieldNames = reader->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET); BOOST_CHECK_EQUAL(1, fieldNames.size()); // 4 fields are indexed with term vectors BOOST_CHECK(fieldNames.contains(L"tvpositionoffset")); reader->close(); d->close(); } BOOST_AUTO_TEST_CASE(testTermVectors) { RAMDirectoryPtr d = newLucene(); // set up writer IndexWriterPtr writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); // want to get some more segments here // new termvector fields for (int32_t i = 0; i < 5 * writer->getMergeFactor(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"tvnot", L"one two two three three three", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO)); doc->add(newLucene(L"termvector", L"one two two three three three", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); doc->add(newLucene(L"tvoffset", L"one two two three three three", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_OFFSETS)); doc->add(newLucene(L"tvposition", L"one two two three three three", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS)); doc->add(newLucene(L"tvpositionoffset", L"one two two three three three", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); } writer->close(); IndexReaderPtr reader = IndexReader::open(d, false); FieldSortedTermVectorMapperPtr mapper = newLucene(TermVectorEntryFreqSortedComparator::compare); reader->getTermFreqVector(0, mapper); MapStringCollectionTermVectorEntry map = mapper->getFieldToTerms(); BOOST_CHECK(map); BOOST_CHECK_EQUAL(map.size(), 4); Collection set = map.get(L"termvector"); for (Collection::iterator entry = set.begin(); entry != set.end(); ++entry) BOOST_CHECK(*entry); } BOOST_AUTO_TEST_CASE(testBasicDelete) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); TermPtr searchTerm = newLucene(L"content", L"aaa"); // add 100 documents with term : aaa for (int32_t i = 0; i < 100; ++i) addDoc(writer, searchTerm->text()); writer->close(); // open reader at this point - this should fix the view of the // index at the point of having 100 "aaa" documents and 0 "bbb" IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm)); checkTermDocsCount(reader, searchTerm, 100); reader->close(); // delete documents containing term: aaa int32_t deleted = 0; reader = IndexReader::open(dir, false); deleted = reader->deleteDocuments(searchTerm); BOOST_CHECK_EQUAL(100, deleted); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm)); checkTermDocsCount(reader, searchTerm, 0); // open a 2nd reader to make sure first reader can commit its changes (.del) // while second reader is open IndexReaderPtr reader2 = IndexReader::open(dir, false); reader->close(); // create a new reader and re-test reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm)); checkTermDocsCount(reader, searchTerm, 0); reader->close(); reader2->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testBinaryFields) { DirectoryPtr dir = newLucene(); uint8_t _bin[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; ByteArray bin(ByteArray::newInstance(10)); std::memcpy(bin.get(), _bin, 10); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < 10; ++i) { addDoc(writer, L"document number " + StringUtils::toString(i + 1)); addDocumentWithFields(writer); addDocumentWithDifferentFields(writer); addDocumentWithTermVectorFields(writer); } writer->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"bin1", bin, Field::STORE_YES)); doc->add(newLucene(L"junk", L"junk text", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); doc = reader->document(reader->maxDoc() - 1); Collection fields = doc->getFields(L"bin1"); BOOST_CHECK(fields); BOOST_CHECK_EQUAL(1, fields.size()); FieldPtr b1 = fields[0]; BOOST_CHECK(b1->isBinary()); ByteArray data1 = b1->getBinaryValue(); BOOST_CHECK_EQUAL(bin.size(), b1->getBinaryLength()); BOOST_CHECK(std::memcmp(bin.get(), data1.get() + b1->getBinaryOffset(), bin.size()) == 0); HashSet lazyFields = HashSet::newInstance(); lazyFields.add(L"bin1"); FieldSelectorPtr sel = newLucene(HashSet::newInstance(), lazyFields); doc = reader->document(reader->maxDoc() - 1, sel); Collection fieldables = doc->getFieldables(L"bin1"); BOOST_CHECK(fieldables); BOOST_CHECK_EQUAL(1, fieldables.size()); FieldablePtr fb1 = fieldables[0]; BOOST_CHECK(fb1->isBinary()); BOOST_CHECK_EQUAL(bin.size(), fb1->getBinaryLength()); data1 = fb1->getBinaryValue(); BOOST_CHECK_EQUAL(bin.size(), fb1->getBinaryLength()); BOOST_CHECK(std::memcmp(bin.get(), data1.get() + fb1->getBinaryOffset(), bin.size()) == 0); reader->close(); // force optimize writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); writer->close(); reader = IndexReader::open(dir, false); doc = reader->document(reader->maxDoc() - 1); fields = doc->getFields(L"bin1"); BOOST_CHECK(fields); BOOST_CHECK_EQUAL(1, fields.size()); b1 = fields[0]; BOOST_CHECK(b1->isBinary()); data1 = b1->getBinaryValue(); BOOST_CHECK_EQUAL(bin.size(), b1->getBinaryLength()); BOOST_CHECK(std::memcmp(bin.get(), data1.get() + b1->getBinaryOffset(), bin.size()) == 0); reader->close(); } /// Make sure attempts to make changes after reader is closed throws IOException BOOST_AUTO_TEST_CASE(testChangesAfterClose) { DirectoryPtr dir = newLucene(); TermPtr searchTerm = newLucene(L"content", L"aaa"); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // add 11 documents with term : aaa for (int32_t i = 0; i < 11; ++i) addDoc(writer, searchTerm->text()); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); // Close reader reader->close(); // Then, try to make changes BOOST_CHECK_EXCEPTION(reader->deleteDocument(4), AlreadyClosedException, check_exception(LuceneException::AlreadyClosed)); BOOST_CHECK_EXCEPTION(reader->setNorm(5, L"aaa", 2.0), AlreadyClosedException, check_exception(LuceneException::AlreadyClosed)); BOOST_CHECK_EXCEPTION(reader->undeleteAll(), AlreadyClosedException, check_exception(LuceneException::AlreadyClosed)); } /// Make sure we get lock obtain failed exception with 2 writers BOOST_AUTO_TEST_CASE(testLockObtainFailed) { DirectoryPtr dir = newLucene(); TermPtr searchTerm = newLucene(L"content", L"aaa"); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // add 11 documents with term : aaa for (int32_t i = 0; i < 11; ++i) addDoc(writer, searchTerm->text()); IndexReaderPtr reader = IndexReader::open(dir, false); // Try to make changes BOOST_CHECK_EXCEPTION(reader->deleteDocument(4), LockObtainFailedException, check_exception(LuceneException::LockObtainFailed)); BOOST_CHECK_EXCEPTION(reader->setNorm(5, L"aaa", 2.0), LockObtainFailedException, check_exception(LuceneException::LockObtainFailed)); BOOST_CHECK_EXCEPTION(reader->undeleteAll(), LockObtainFailedException, check_exception(LuceneException::LockObtainFailed)); writer->close(); reader->close(); } BOOST_AUTO_TEST_CASE(testWritingNorms) { String indexDir(FileUtils::joinPath(getTempDir(), L"lucenetestnormwriter")); DirectoryPtr dir = FSDirectory::open(indexDir); TermPtr searchTerm = newLucene(L"content", L"aaa"); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(writer, searchTerm->text()); writer->close(); // now open reader & set norm for doc 0 IndexReaderPtr reader = IndexReader::open(dir, false); reader->setNorm(0, L"content", 2.0); // we should be holding the write lock now BOOST_CHECK(IndexWriter::isLocked(dir)); reader->commit(MapStringString()); // we should not be holding the write lock now BOOST_CHECK(!IndexWriter::isLocked(dir)); // open a 2nd reader IndexReaderPtr reader2 = IndexReader::open(dir, false); // set norm again for doc 0 reader->setNorm(0, L"content", 3.0); BOOST_CHECK(IndexWriter::isLocked(dir)); reader->close(); // we should not be holding the write lock now BOOST_CHECK(!IndexWriter::isLocked(dir)); reader2->close(); dir->close(); FileUtils::removeDirectory(indexDir); } /// Make sure you can set norms and commit, and there are no extra norms files left BOOST_AUTO_TEST_CASE(testWritingNormsNoReader) { RAMDirectoryPtr dir = newLucene(); // add 1 documents with term : aaa IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); TermPtr searchTerm = newLucene(L"content", L"aaa"); writer->setUseCompoundFile(false); addDoc(writer, searchTerm->text()); writer->close(); // now open reader & set norm for doc 0 (writes to _0_1.s0) IndexReaderPtr reader = IndexReader::open(dir, false); reader->setNorm(0, L"content", 2.0); reader->close(); // now open reader again & set norm for doc 0 (writes to _0_2.s0) reader = IndexReader::open(dir, false); reader->setNorm(0, L"content", 2.0); reader->close(); BOOST_CHECK(!dir->fileExists(L"_0_1.s0")); dir->close(); } static void deleteReaderWriterConflict(bool optimize) { DirectoryPtr dir = getDirectory(); TermPtr searchTerm = newLucene(L"content", L"aaa"); TermPtr searchTerm2 = newLucene(L"content", L"bbb"); // add 100 documents with term : aaa IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 100; ++i) addDoc(writer, searchTerm->text()); writer->close(); // open reader at this point - this should fix the view of the index at the point of // having 100 "aaa" documents and 0 "bbb" IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm)); BOOST_CHECK_EQUAL(0, reader->docFreq(searchTerm2)); checkTermDocsCount(reader, searchTerm, 100); checkTermDocsCount(reader, searchTerm2, 0); // add 100 documents with term : bbb writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 100; ++i) addDoc(writer, searchTerm2->text()); // request optimization // This causes a new segment to become current for all subsequent // searchers. Because of this, deletions made via a previously open // reader, which would be applied to that reader's segment, are lost // for subsequent searchers/readers if (optimize) writer->optimize(); writer->close(); // The reader should not see the new data BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm)); BOOST_CHECK_EQUAL(0, reader->docFreq(searchTerm2)); checkTermDocsCount(reader, searchTerm, 100); checkTermDocsCount(reader, searchTerm2, 0); // delete documents containing term: aaa // NOTE: the reader was created when only "aaa" documents were in int32_t deleted = 0; BOOST_CHECK_EXCEPTION(deleted = reader->deleteDocuments(searchTerm), StaleReaderException, check_exception(LuceneException::StaleReader)); // Re-open index reader and try again. This time it should see the new data. reader->close(); reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm)); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm2)); checkTermDocsCount(reader, searchTerm, 100); checkTermDocsCount(reader, searchTerm2, 100); deleted = reader->deleteDocuments(searchTerm); BOOST_CHECK_EQUAL(100, deleted); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm)); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm2)); checkTermDocsCount(reader, searchTerm, 0); checkTermDocsCount(reader, searchTerm2, 100); reader->close(); // create a new reader and re-test reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm)); BOOST_CHECK_EQUAL(100, reader->docFreq(searchTerm2)); checkTermDocsCount(reader, searchTerm, 0); checkTermDocsCount(reader, searchTerm2, 100); reader->close(); } BOOST_AUTO_TEST_CASE(testDeleteReaderWriterConflictUnoptimized) { deleteReaderWriterConflict(false); } BOOST_AUTO_TEST_CASE(testDeleteReaderWriterConflictOptimized) { deleteReaderWriterConflict(true); } BOOST_AUTO_TEST_CASE(testFilesOpenClose) { // Create initial data set String dirFile = FileUtils::joinPath(getTempDir(), L"testIndex"); DirectoryPtr dir = getDirectory(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(writer, L"test"); writer->close(); dir->close(); // Try to erase the data - this ensures that the writer closed all files FileUtils::removeDirectory(dirFile); dir = getDirectory(); // Now create the data set again, just as before writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(writer, L"test"); writer->close(); dir->close(); // Now open existing directory and test that reader closes all files dir = getDirectory(); IndexReaderPtr reader1 = IndexReader::open(dir, false); reader1->close(); dir->close(); // The following will fail if reader did not close all files BOOST_CHECK(FileUtils::removeDirectory(dirFile)); } BOOST_AUTO_TEST_CASE(testLastModified) { String fileDir = FileUtils::joinPath(getTempDir(), L"testIndex"); for (int32_t i = 0; i < 2; ++i) { LuceneException finally; try { DirectoryPtr dir = i == 0 ? newLucene() : getDirectory(); BOOST_CHECK(!IndexReader::indexExists(dir)); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); BOOST_CHECK(IndexWriter::isLocked(dir)); // writer open, so dir is locked writer->close(); BOOST_CHECK(IndexReader::indexExists(dir)); IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK(!IndexWriter::isLocked(dir)); // reader only, no lock int64_t version = IndexReader::lastModified(dir); if (i == 1) { int64_t version2 = IndexReader::lastModified(dir); BOOST_CHECK_EQUAL(version, version2); } reader->close(); // modify index and check version has been incremented LuceneThread::threadSleep(1000); writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); writer->close(); reader = IndexReader::open(dir, false); BOOST_CHECK(version <= IndexReader::lastModified(dir)); reader->close(); dir->close(); } catch (LuceneException& e) { finally = e; } if (i == 1) BOOST_CHECK(FileUtils::removeDirectory(fileDir)); finally.throwException(); } } BOOST_AUTO_TEST_CASE(testVersion) { DirectoryPtr dir = newLucene(); BOOST_CHECK(!IndexReader::indexExists(dir)); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); BOOST_CHECK(IndexWriter::isLocked(dir)); // writer open, so dir is locked writer->close(); BOOST_CHECK(IndexReader::indexExists(dir)); IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK(!IndexWriter::isLocked(dir)); // reader only, no lock int64_t version = IndexReader::getCurrentVersion(dir); reader->close(); // modify index and check version has been incremented writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); writer->close(); reader = IndexReader::open(dir, false); BOOST_CHECK(version < IndexReader::getCurrentVersion(dir)); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testLock) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); writer->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK_EXCEPTION(reader->deleteDocument(0), LockObtainFailedException, check_exception(LuceneException::LockObtainFailed)); IndexWriter::unlock(dir); // this should not be done in the real world! reader->deleteDocument(0); reader->close(); writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testUndeleteAll) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); addDocumentWithFields(writer); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocument(0); reader->deleteDocument(1); reader->undeleteAll(); reader->close(); reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(2, reader->numDocs()); // nothing has really been deleted thanks to undeleteAll() reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testUndeleteAllAfterClose) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); addDocumentWithFields(writer); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocument(0); reader->deleteDocument(1); reader->close(); reader = IndexReader::open(dir, false); reader->undeleteAll(); BOOST_CHECK_EQUAL(2, reader->numDocs()); // nothing has really been deleted thanks to undeleteAll() reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testUndeleteAllAfterCloseThenReopen) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); addDocumentWithFields(writer); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocument(0); reader->deleteDocument(1); reader->close(); reader = IndexReader::open(dir, false); reader->undeleteAll(); reader->close(); reader = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(2, reader->numDocs()); // nothing has really been deleted thanks to undeleteAll() reader->close(); dir->close(); } static void deleteReaderReaderConflict(bool optimize) { DirectoryPtr dir = getDirectory(); TermPtr searchTerm1 = newLucene(L"content", L"aaa"); TermPtr searchTerm2 = newLucene(L"content", L"bbb"); TermPtr searchTerm3 = newLucene(L"content", L"ccc"); // add 100 documents with term : aaa // add 100 documents with term : bbb // add 100 documents with term : ccc IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 100; ++i) { addDoc(writer, searchTerm1->text()); addDoc(writer, searchTerm2->text()); addDoc(writer, searchTerm3->text()); } if (optimize) writer->optimize(); writer->close(); // open two readers // both readers get segment info as exists at this time IndexReaderPtr reader1 = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm1)); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm2)); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm3)); checkTermDocsCount(reader1, searchTerm1, 100); checkTermDocsCount(reader1, searchTerm2, 100); checkTermDocsCount(reader1, searchTerm3, 100); IndexReaderPtr reader2 = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(100, reader2->docFreq(searchTerm1)); BOOST_CHECK_EQUAL(100, reader2->docFreq(searchTerm2)); BOOST_CHECK_EQUAL(100, reader2->docFreq(searchTerm3)); checkTermDocsCount(reader2, searchTerm1, 100); checkTermDocsCount(reader2, searchTerm2, 100); checkTermDocsCount(reader2, searchTerm3, 100); // delete docs from reader 2 and close it // delete documents containing term: aaa // when the reader is closed, the segment info is updated and // the first reader is now stale reader2->deleteDocuments(searchTerm1); BOOST_CHECK_EQUAL(100, reader2->docFreq(searchTerm1)); BOOST_CHECK_EQUAL(100, reader2->docFreq(searchTerm2)); BOOST_CHECK_EQUAL(100, reader2->docFreq(searchTerm3)); checkTermDocsCount(reader2, searchTerm1, 0); checkTermDocsCount(reader2, searchTerm2, 100); checkTermDocsCount(reader2, searchTerm3, 100); reader2->close(); // Make sure reader 1 is unchanged since it was open earlier BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm1)); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm2)); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm3)); checkTermDocsCount(reader1, searchTerm1, 100); checkTermDocsCount(reader1, searchTerm2, 100); checkTermDocsCount(reader1, searchTerm3, 100); // attempt to delete from stale reader // delete documents containing term: bbb BOOST_CHECK_EXCEPTION(reader1->deleteDocuments(searchTerm2), StaleReaderException, check_exception(LuceneException::StaleReader)); // recreate reader and try again reader1->close(); reader1 = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm1)); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm2)); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm3)); checkTermDocsCount(reader1, searchTerm1, 0); checkTermDocsCount(reader1, searchTerm2, 100); checkTermDocsCount(reader1, searchTerm3, 100); reader1->deleteDocuments(searchTerm2); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm1)); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm2)); BOOST_CHECK_EQUAL(100, reader1->docFreq(searchTerm3)); checkTermDocsCount(reader1, searchTerm1, 0); checkTermDocsCount(reader1, searchTerm2, 0); checkTermDocsCount(reader1, searchTerm3, 100); reader1->close(); // Open another reader to confirm that everything is deleted reader2 = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(100, reader2->docFreq(searchTerm1)); BOOST_CHECK_EQUAL(100, reader2->docFreq(searchTerm2)); BOOST_CHECK_EQUAL(100, reader2->docFreq(searchTerm3)); checkTermDocsCount(reader2, searchTerm1, 0); checkTermDocsCount(reader2, searchTerm2, 0); checkTermDocsCount(reader2, searchTerm3, 100); reader2->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testDeleteReaderReaderConflictUnoptimized) { deleteReaderReaderConflict(false); } BOOST_AUTO_TEST_CASE(testDeleteReaderReaderConflictOptimized) { deleteReaderReaderConflict(true); } /// Make sure if reader tries to commit but hits disk full that reader remains consistent and usable. BOOST_AUTO_TEST_CASE(testDiskFull) { TermPtr searchTerm = newLucene(L"content", L"aaa"); int32_t START_COUNT = 157; int32_t END_COUNT = 144; // First build up a starting index RAMDirectoryPtr startDir = newLucene(); IndexWriterPtr writer = newLucene(startDir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 157; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"content", L"aaa " + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); int64_t diskUsage = startDir->sizeInBytes(); int64_t diskFree = diskUsage + 100; LuceneException err; bool done = false; // Iterate with ever increasing free disk space while (!done) { MockRAMDirectoryPtr dir = newLucene(startDir); // If IndexReader hits disk full, it can write to the same files again. dir->setPreventDoubleWrite(false); IndexReaderPtr reader = IndexReader::open(dir, false); // For each disk size, first try to commit against dir that will hit random IOExceptions and // disk full; after, give it infinite disk space and turn off random IOExceptions and // retry with same reader bool success = false; for (int32_t x = 0; x < 2; ++x) { double rate = 0.05; double diskRatio = ((double)diskFree) / (double)diskUsage; int64_t thisDiskFree = 0; String testName; if (x == 0) { thisDiskFree = diskFree; if (diskRatio >= 2.0) rate /= 2; if (diskRatio >= 4.0) rate /= 2; if (diskRatio >= 6.0) rate = 0.0; testName = L"disk full during reader.close() @ " + StringUtils::toString(thisDiskFree) + L" bytes"; } else { thisDiskFree = 0; rate = 0.0; testName = L"reader re-use after disk full"; } dir->setMaxSizeInBytes(thisDiskFree); dir->setRandomIOExceptionRate(rate, diskFree); try { if (x == 0) { int32_t docId = 12; for (int32_t i = 0; i < 13; ++i) { reader->deleteDocument(docId); reader->setNorm(docId, L"contents", 2.0); docId += 12; } } reader->close(); success = true; if (x == 0) done = true; } catch (IOException& e) { err = e; if (x == 1) BOOST_FAIL(testName << " hit IOException after disk space was freed up"); } // Whether we succeeded or failed, check that all un-referenced files were in fact deleted (ie, // we did not create garbage). Just create a new IndexFileDeleter, have it delete unreferenced // files, then verify that in fact no files were deleted HashSet _startFiles = dir->listAll(); SegmentInfosPtr infos = newLucene(); infos->read(dir); IndexFileDeleterPtr deleter = newLucene(dir, newLucene(), infos, InfoStreamPtr(), DocumentsWriterPtr(), HashSet()); HashSet _endFiles = dir->listAll(); Collection startFiles = Collection::newInstance(_startFiles.begin(), _startFiles.end()); Collection endFiles = Collection::newInstance(_endFiles.begin(), _endFiles.end()); std::sort(startFiles.begin(), startFiles.end()); std::sort(endFiles.begin(), endFiles.end()); if (!startFiles.equals(endFiles)) { String successStr = success ? L"success" : L"IOException"; BOOST_FAIL("reader.close() failed to delete unreferenced files after " << successStr << " (" << diskFree << " bytes)"); } // Finally, verify index is not corrupt, and, if we succeeded, we see all docs changed, and if // we failed, we see either all docs or no docs changed (transactional semantics) IndexReaderPtr newReader; BOOST_CHECK_NO_THROW(newReader = IndexReader::open(dir, false)); IndexSearcherPtr searcher = newLucene(newReader); Collection hits; BOOST_CHECK_NO_THROW(hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs); int32_t result2 = hits.size(); if (success) { if (result2 != END_COUNT) BOOST_FAIL(testName << ": method did not throw exception but hits.size() for search on term 'aaa' is " << result2 << " instead of expected " << END_COUNT); } else { // On hitting exception we still may have added all docs if (result2 != START_COUNT && result2 != END_COUNT) BOOST_FAIL(testName << ": method did throw exception but hits.size() for search on term 'aaa' is " << result2 << " instead of expected " << END_COUNT); } searcher->close(); newReader->close(); if (result2 == END_COUNT) break; } dir->close(); // Try again with 10 more bytes of free space diskFree += 10; } startDir->close(); } BOOST_AUTO_TEST_CASE(testDocsOutOfOrder) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 11; ++i) addDoc(writer, L"aaa"); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); // Try to delete an invalid docId, yet, within range of the final bits of the BitVector BOOST_CHECK_EXCEPTION(reader->deleteDocument(11), IndexOutOfBoundsException, check_exception(LuceneException::IndexOutOfBounds)); reader->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); // We must add more docs to get a new segment written for (int32_t i = 0; i < 11; ++i) addDoc(writer, L"aaa"); BOOST_CHECK_NO_THROW(writer->optimize()); writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testExceptionReleaseWriteLock) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(writer, L"aaa"); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); BOOST_CHECK_EXCEPTION(reader->deleteDocument(1), IndexOutOfBoundsException, check_exception(LuceneException::IndexOutOfBounds)); reader->close(); BOOST_CHECK(!IndexWriter::isLocked(dir)); reader = IndexReader::open(dir, false); BOOST_CHECK_EXCEPTION(reader->setNorm(1, L"content", 2.0), IndexOutOfBoundsException, check_exception(LuceneException::IndexOutOfBounds)); reader->close(); BOOST_CHECK(!IndexWriter::isLocked(dir)); dir->close(); } BOOST_AUTO_TEST_CASE(testOpenReaderAfterDelete) { String indexDir(FileUtils::joinPath(getTempDir(), L"deletetest")); DirectoryPtr dir = FSDirectory::open(indexDir); BOOST_CHECK_EXCEPTION(IndexReader::open(dir, false), NoSuchDirectoryException, check_exception(LuceneException::NoSuchDirectory)); FileUtils::removeDirectory(indexDir); BOOST_CHECK_EXCEPTION(IndexReader::open(dir, false), NoSuchDirectoryException, check_exception(LuceneException::NoSuchDirectory)); dir->close(); } BOOST_AUTO_TEST_CASE(testGetIndexCommit) { RAMDirectoryPtr d = newLucene(); // set up writer IndexWriterPtr writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < 27; ++i) addDocumentWithFields(writer); writer->close(); SegmentInfosPtr sis = newLucene(); sis->read(d); IndexReaderPtr r = IndexReader::open(d, false); IndexCommitPtr c = r->getIndexCommit(); BOOST_CHECK_EQUAL(sis->getCurrentSegmentFileName(), c->getSegmentsFileName()); BOOST_CHECK(c->equals(r->getIndexCommit())); // Change the index writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < 7; ++i) addDocumentWithFields(writer); writer->close(); IndexReaderPtr r2 = r->reopen(); BOOST_CHECK(!c->equals(r2->getIndexCommit())); BOOST_CHECK(!r2->getIndexCommit()->isOptimized()); r2->close(); writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); writer->close(); r2 = r->reopen(); BOOST_CHECK(r2->getIndexCommit()->isOptimized()); r->close(); r2->close(); d->close(); } BOOST_AUTO_TEST_CASE(testReadOnly) { RAMDirectoryPtr d = newLucene(); IndexWriterPtr writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); writer->commit(); addDocumentWithFields(writer); writer->close(); IndexReaderPtr r = IndexReader::open(d, true); BOOST_CHECK_EXCEPTION(r->deleteDocument(0), UnsupportedOperationException, check_exception(LuceneException::UnsupportedOperation)); writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); addDocumentWithFields(writer); writer->close(); // Make sure reopen is still readonly IndexReaderPtr r2 = r->reopen(); r->close(); BOOST_CHECK_NE(r, r2); BOOST_CHECK_EXCEPTION(r2->deleteDocument(0), UnsupportedOperationException, check_exception(LuceneException::UnsupportedOperation)); writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); writer->close(); // Make sure reopen to a single segment is still readonly IndexReaderPtr r3 = r2->reopen(); r2->close(); BOOST_CHECK_NE(r, r2); BOOST_CHECK_EXCEPTION(r3->deleteDocument(0), UnsupportedOperationException, check_exception(LuceneException::UnsupportedOperation)); // Make sure write lock isn't held writer = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writer->close(); r3->close(); } BOOST_AUTO_TEST_CASE(testIndexReader) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthUNLIMITED); writer->addDocument(createDocument(L"a")); writer->addDocument(createDocument(L"b")); writer->addDocument(createDocument(L"c")); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocuments(newLucene(L"id", L"a")); reader->flush(); reader->deleteDocuments(newLucene(L"id", L"b")); reader->close(); IndexReader::open(dir, true)->close(); } BOOST_AUTO_TEST_CASE(testIndexReaderUnDeleteAll) { MockRAMDirectoryPtr dir = newLucene(); dir->setPreventDoubleWrite(false); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthUNLIMITED); writer->addDocument(createDocument(L"a")); writer->addDocument(createDocument(L"b")); writer->addDocument(createDocument(L"c")); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocuments(newLucene(L"id", L"a")); reader->flush(); reader->deleteDocuments(newLucene(L"id", L"b")); reader->undeleteAll(); reader->deleteDocuments(newLucene(L"id", L"b")); reader->close(); IndexReader::open(dir, true)->close(); dir->close(); } /// Make sure on attempting to open an IndexReader on a non-existent directory, you get a good exception BOOST_AUTO_TEST_CASE(testNoDir) { String indexDir(FileUtils::joinPath(getTempDir(), L"doesnotexist")); DirectoryPtr dir = FSDirectory::open(indexDir); BOOST_CHECK_EXCEPTION(IndexReader::open(dir, true), NoSuchDirectoryException, check_exception(LuceneException::NoSuchDirectory)); dir->close(); } BOOST_AUTO_TEST_CASE(testNoDupCommitFileNames) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->addDocument(createDocument(L"a")); writer->addDocument(createDocument(L"a")); writer->addDocument(createDocument(L"a")); writer->close(); Collection commits = IndexReader::listCommits(dir); for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { HashSet files = (*commit)->getFileNames(); HashSet seen = HashSet::newInstance(); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { BOOST_CHECK(!seen.contains(*fileName)); seen.add(*fileName); } } dir->close(); } /// Ensure that on a cloned reader, segments reuse the doc values arrays in FieldCache BOOST_AUTO_TEST_CASE(testFieldCacheReuseAfterClone) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"number", L"17", Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); writer->close(); // Open reader IndexReaderPtr r = SegmentReader::getOnlySegmentReader(dir); Collection ints = FieldCache::DEFAULT()->getInts(r, L"number"); BOOST_CHECK_EQUAL(1, ints.size()); BOOST_CHECK_EQUAL(17, ints[0]); // Clone reader IndexReaderPtr r2 = boost::dynamic_pointer_cast(r->clone()); r->close(); BOOST_CHECK_NE(r2, r); Collection ints2 = FieldCache::DEFAULT()->getInts(r2, L"number"); r2->close(); BOOST_CHECK_EQUAL(1, ints2.size()); BOOST_CHECK_EQUAL(17, ints2[0]); BOOST_CHECK(ints.equals(ints2)); dir->close(); } /// Ensure that on a reopened reader, that any shared segments reuse the doc values arrays in FieldCache BOOST_AUTO_TEST_CASE(testFieldCacheReuseAfterReopen) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"number", L"17", Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); writer->commit(); // Open reader1 IndexReaderPtr r = IndexReader::open(dir, false); IndexReaderPtr r1 = SegmentReader::getOnlySegmentReader(r); Collection ints = FieldCache::DEFAULT()->getInts(r1, L"number"); BOOST_CHECK_EQUAL(1, ints.size()); BOOST_CHECK_EQUAL(17, ints[0]); // Add new segment writer->addDocument(doc); writer->commit(); // Reopen reader1 --> reader2 IndexReaderPtr r2 = r->reopen(); r->close(); IndexReaderPtr sub0 = r2->getSequentialSubReaders()[0]; Collection ints2 = FieldCache::DEFAULT()->getInts(sub0, L"number"); r2->close(); BOOST_CHECK(ints.equals(ints2)); dir->close(); } /// Make sure all SegmentReaders are new when reopen switches readOnly BOOST_AUTO_TEST_CASE(testReopenChangeReadonly) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"number", L"17", Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); writer->commit(); // Open reader1 IndexReaderPtr r = IndexReader::open(dir, false); BOOST_CHECK(boost::dynamic_pointer_cast(r)); IndexReaderPtr r1 = SegmentReader::getOnlySegmentReader(r); Collection ints = FieldCache::DEFAULT()->getInts(r1, L"number"); BOOST_CHECK_EQUAL(1, ints.size()); BOOST_CHECK_EQUAL(17, ints[0]); // Reopen to readonly with no chnages IndexReaderPtr r3 = r->reopen(true); BOOST_CHECK(boost::dynamic_pointer_cast(r3)); r3->close(); // Add new segment writer->addDocument(doc); writer->commit(); // Reopen reader1 --> reader2 IndexReaderPtr r2 = r->reopen(true); r->close(); BOOST_CHECK(boost::dynamic_pointer_cast(r2)); Collection subs = r2->getSequentialSubReaders(); Collection ints2 = FieldCache::DEFAULT()->getInts(subs[0], L"number"); r2->close(); BOOST_CHECK(boost::dynamic_pointer_cast(subs[0])); BOOST_CHECK(boost::dynamic_pointer_cast(subs[1])); BOOST_CHECK(ints.equals(ints2)); dir->close(); } BOOST_AUTO_TEST_CASE(testUniqueTermCount) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a b c d e f g h i j k l m n o p q r s t u v w x y z", Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"number", L"0 1 2 3 4 5 6 7 8 9", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->addDocument(doc); writer->commit(); IndexReaderPtr r = IndexReader::open(dir, false); IndexReaderPtr r1 = SegmentReader::getOnlySegmentReader(r); BOOST_CHECK_EQUAL(36, r1->getUniqueTermCount()); writer->addDocument(doc); writer->commit(); IndexReaderPtr r2 = r->reopen(); r->close(); BOOST_CHECK_EXCEPTION(r2->getUniqueTermCount(), UnsupportedOperationException, check_exception(LuceneException::UnsupportedOperation)); Collection subs = r2->getSequentialSubReaders(); for (Collection::iterator sub = subs.begin(); sub != subs.end(); ++sub) BOOST_CHECK_EQUAL(36, (*sub)->getUniqueTermCount()); r2->close(); writer->close(); dir->close(); } /// don't load terms index BOOST_AUTO_TEST_CASE(testNoTermsIndex) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a b c d e f g h i j k l m n o p q r s t u v w x y z", Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"number", L"0 1 2 3 4 5 6 7 8 9", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->addDocument(doc); writer->close(); IndexReaderPtr r = IndexReader::open(dir, IndexDeletionPolicyPtr(), true, -1); BOOST_CHECK_EXCEPTION(r->docFreq(newLucene(L"field", L"f")), IllegalStateException, check_exception(LuceneException::IllegalState)); BOOST_CHECK(!boost::dynamic_pointer_cast(r->getSequentialSubReaders()[0])->termsIndexLoaded()); BOOST_CHECK_EQUAL(-1, boost::dynamic_pointer_cast(r->getSequentialSubReaders()[0])->getTermInfosIndexDivisor()); writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); writer->addDocument(doc); writer->close(); // ensure re-open carries over no terms index IndexReaderPtr r2 = r->reopen(); r->close(); Collection subReaders = r2->getSequentialSubReaders(); BOOST_CHECK_EQUAL(2, subReaders.size()); for (Collection::iterator sub = subReaders.begin(); sub != subReaders.end(); ++sub) { SegmentReaderPtr subReader = boost::dynamic_pointer_cast(*sub); BOOST_CHECK(!subReader->termsIndexLoaded()); } r2->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testPrepareCommitIsCurrent) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); writer->addDocument(doc); IndexReaderPtr r = IndexReader::open(dir, true); BOOST_CHECK(r->isCurrent()); writer->addDocument(doc); writer->prepareCommit(); BOOST_CHECK(r->isCurrent()); IndexReaderPtr r2 = r->reopen(); BOOST_CHECK(r == r2); writer->commit(); BOOST_CHECK(!r->isCurrent()); writer->close(); r->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexWriterDeleteTest.cpp000066400000000000000000000633101217574114600251260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "Term.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "TopDocs.h" #include "IndexReader.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "IndexFileDeleter.h" #include "TestPoint.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexWriterDeleteTest, LuceneTestFixture) DECLARE_SHARED_PTR(FailOnlyOnDeleteFlush) class FailOnlyOnDeleteFlush : public MockDirectoryFailure { public: FailOnlyOnDeleteFlush() { sawMaybe = false; failed = false; } virtual ~FailOnlyOnDeleteFlush() { } public: bool sawMaybe; bool failed; public: virtual MockDirectoryFailurePtr reset() { sawMaybe = false; failed = false; return shared_from_this(); } virtual void eval(MockRAMDirectoryPtr dir) { if (sawMaybe && !failed) { if (!TestPoint::getTestPoint(L"applyDeletes")) { // Only fail once we are no longer in applyDeletes failed = true; boost::throw_exception(IOException(L"fail after applyDeletes")); } } if (!failed) { if (TestPoint::getTestPoint(L"applyDeletes")) sawMaybe = true; } } }; DECLARE_SHARED_PTR(FailOnlyOnAdd) class FailOnlyOnAdd : public MockDirectoryFailure { public: FailOnlyOnAdd() { failed = false; } virtual ~FailOnlyOnAdd() { } public: bool failed; public: virtual MockDirectoryFailurePtr reset() { failed = false; return shared_from_this(); } virtual void eval(MockRAMDirectoryPtr dir) { if (!failed) { failed = true; boost::throw_exception(IOException(L"fail in add doc")); } } }; static int32_t getHitCount(DirectoryPtr dir, TermPtr term) { IndexSearcherPtr searcher = newLucene(dir, true); int32_t hitCount = searcher->search(newLucene(term), FilterPtr(), 1000)->totalHits; searcher->close(); return hitCount; } static void addDoc(IndexWriterPtr modifier, int32_t id, int32_t value) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"id", StringUtils::toString(id), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"value", StringUtils::toString(value), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); modifier->addDocument(doc); } static void updateDoc(IndexWriterPtr modifier, int32_t id, int32_t value) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"id", StringUtils::toString(id), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"value", StringUtils::toString(value), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); modifier->updateDocument(newLucene(L"id", StringUtils::toString(id)), doc); } static void checkNoUnreferencedFiles(DirectoryPtr dir) { HashSet _startFiles = dir->listAll(); SegmentInfosPtr infos = newLucene(); infos->read(dir); IndexFileDeleterPtr deleter = newLucene(dir, newLucene(), infos, InfoStreamPtr(), DocumentsWriterPtr(), HashSet()); HashSet _endFiles = dir->listAll(); Collection startFiles = Collection::newInstance(_startFiles.begin(), _startFiles.end()); Collection endFiles = Collection::newInstance(_endFiles.begin(), _endFiles.end()); std::sort(startFiles.begin(), startFiles.end()); std::sort(endFiles.begin(), endFiles.end()); BOOST_CHECK(startFiles.equals(endFiles)); } BOOST_AUTO_TEST_CASE(testSimpleCase) { Collection keywords = newCollection(L"1", L"2"); Collection unindexed = newCollection(L"Netherlands", L"Italy"); Collection unstored = newCollection(L"Amsterdam has lots of bridges", L"Venice has lots of canals"); Collection text = newCollection(L"Amsterdam", L"Venice"); DirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setUseCompoundFile(true); modifier->setMaxBufferedDeleteTerms(1); for (int32_t i = 0; i < keywords.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", keywords[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"country", unindexed[i], Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"contents", unstored[i], Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"city", text[i], Field::STORE_YES, Field::INDEX_ANALYZED)); modifier->addDocument(doc); } modifier->optimize(); modifier->commit(); TermPtr term = newLucene(L"city", L"Amsterdam"); int32_t hitCount = getHitCount(dir, term); BOOST_CHECK_EQUAL(1, hitCount); modifier->deleteDocuments(term); modifier->commit(); hitCount = getHitCount(dir, term); BOOST_CHECK_EQUAL(0, hitCount); modifier->close(); dir->close(); } /// test when delete terms only apply to disk segments BOOST_AUTO_TEST_CASE(testNonRAMDelete) { DirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setMaxBufferedDocs(2); modifier->setMaxBufferedDeleteTerms(2); int32_t id = 0; int32_t value = 100; for (int32_t i = 0; i < 7; ++i) addDoc(modifier, ++id, value); modifier->commit(); BOOST_CHECK_EQUAL(0, modifier->getNumBufferedDocuments()); BOOST_CHECK(0 < modifier->getSegmentCount()); modifier->commit(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(7, reader->numDocs()); reader->close(); modifier->deleteDocuments(newLucene(L"value", StringUtils::toString(value))); modifier->commit(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader->numDocs()); reader->close(); modifier->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testMaxBufferedDeletes) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDeleteTerms(1); writer->deleteDocuments(newLucene(L"foobar", L"1")); writer->deleteDocuments(newLucene(L"foobar", L"1")); writer->deleteDocuments(newLucene(L"foobar", L"1")); BOOST_CHECK_EQUAL(3, writer->getFlushDeletesCount()); writer->close(); dir->close(); } /// test when delete terms only apply to ram segments BOOST_AUTO_TEST_CASE(testRAMDeletes) { for (int32_t t = 0; t < 2; ++t) { DirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setMaxBufferedDocs(4); modifier->setMaxBufferedDeleteTerms(4); int32_t id = 0; int32_t value = 100; addDoc(modifier, ++id, value); if (t == 0) modifier->deleteDocuments(newLucene(L"value", StringUtils::toString(value))); else modifier->deleteDocuments(newLucene(newLucene(L"value", StringUtils::toString(value)))); addDoc(modifier, ++id, value); if (t == 0) { modifier->deleteDocuments(newLucene(L"value", StringUtils::toString(value))); BOOST_CHECK_EQUAL(2, modifier->getNumBufferedDeleteTerms()); BOOST_CHECK_EQUAL(1, modifier->getBufferedDeleteTermsSize()); } else modifier->deleteDocuments(newLucene(newLucene(L"value", StringUtils::toString(value)))); addDoc(modifier, ++id, value); BOOST_CHECK_EQUAL(0, modifier->getSegmentCount()); modifier->commit(); modifier->commit(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(1, reader->numDocs()); int32_t hitCount = getHitCount(dir, newLucene(L"id", StringUtils::toString(id))); BOOST_CHECK_EQUAL(1, hitCount); reader->close(); modifier->close(); dir->close(); } } /// test when delete terms apply to both disk and ram segments BOOST_AUTO_TEST_CASE(testBothDeletes) { DirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setMaxBufferedDocs(100); modifier->setMaxBufferedDeleteTerms(100); int32_t id = 0; int32_t value = 100; for (int32_t i = 0; i < 5; ++i) addDoc(modifier, ++id, value); value = 200; for (int32_t i = 0; i < 5; ++i) addDoc(modifier, ++id, value); modifier->commit(); for (int32_t i = 0; i < 5; ++i) addDoc(modifier, ++id, value); modifier->deleteDocuments(newLucene(L"value", StringUtils::toString(value))); modifier->commit(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(5, reader->numDocs()); modifier->close(); reader->close(); } /// test that batched delete terms are flushed together BOOST_AUTO_TEST_CASE(testBatchDeletes) { DirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setMaxBufferedDocs(100); modifier->setMaxBufferedDeleteTerms(100); int32_t id = 0; int32_t value = 100; for (int32_t i = 0; i < 7; ++i) addDoc(modifier, ++id, value); modifier->commit(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(7, reader->numDocs()); reader->close(); id = 0; modifier->deleteDocuments(newLucene(L"id", StringUtils::toString(++id))); modifier->deleteDocuments(newLucene(L"id", StringUtils::toString(++id))); modifier->commit(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(5, reader->numDocs()); reader->close(); Collection terms = Collection::newInstance(3); for (int32_t i = 0; i < terms.size(); ++i) terms[i] = newLucene(L"id", StringUtils::toString(++id)); modifier->deleteDocuments(terms); modifier->commit(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(2, reader->numDocs()); reader->close(); modifier->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testDeleteAll) { DirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setMaxBufferedDocs(2); modifier->setMaxBufferedDeleteTerms(2); int32_t id = 0; int32_t value = 100; for (int32_t i = 0; i < 7; ++i) addDoc(modifier, ++id, value); modifier->commit(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(7, reader->numDocs()); reader->close(); // Add 1 doc (so we will have something buffered) addDoc(modifier, 99, value); // Delete all modifier->deleteAll(); // Delete all shouldn't be on disk yet reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(7, reader->numDocs()); reader->close(); // Add a doc and update a doc (after the deleteAll, before the commit) addDoc(modifier, 101, value); updateDoc(modifier, 102, value); // commit the delete all modifier->commit(); // Validate there are no docs left reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(2, reader->numDocs()); reader->close(); modifier->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testDeleteAllRollback) { DirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setMaxBufferedDocs(2); modifier->setMaxBufferedDeleteTerms(2); int32_t id = 0; int32_t value = 100; for (int32_t i = 0; i < 7; ++i) addDoc(modifier, ++id, value); modifier->commit(); addDoc(modifier, 99, value); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(7, reader->numDocs()); reader->close(); // Delete all modifier->deleteAll(); // Roll it back modifier->rollback(); modifier->close(); // Validate that the docs are still there reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(7, reader->numDocs()); reader->close(); dir->close(); } /// test deleteAll() with near real-time reader BOOST_AUTO_TEST_CASE(testDeleteAllNRT) { DirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setMaxBufferedDocs(2); modifier->setMaxBufferedDeleteTerms(2); int32_t id = 0; int32_t value = 100; for (int32_t i = 0; i < 7; ++i) addDoc(modifier, ++id, value); modifier->commit(); IndexReaderPtr reader = modifier->getReader(); BOOST_CHECK_EQUAL(7, reader->numDocs()); reader->close(); addDoc(modifier, ++id, value); addDoc(modifier, ++id, value); // Delete all modifier->deleteAll(); reader = modifier->getReader(); BOOST_CHECK_EQUAL(0, reader->numDocs()); reader->close(); // Roll it back modifier->rollback(); modifier->close(); // Validate that the docs are still there reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(7, reader->numDocs()); reader->close(); dir->close(); } /// Make sure if modifier tries to commit but hits disk full that modifier /// remains consistent and usable. static void testOperationsOnDiskFull(bool updates) { TermPtr searchTerm = newLucene(L"content", L"aaa"); int32_t START_COUNT = 157; int32_t END_COUNT = 144; // First build up a starting index RAMDirectoryPtr startDir = newLucene(); IndexWriterPtr writer = newLucene(startDir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < 157; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"content", L"aaa " + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); int64_t diskUsage = startDir->sizeInBytes(); int64_t diskFree = diskUsage + 10; LuceneException err; bool done = false; // Iterate with ever increasing free disk space while (!done) { MockRAMDirectoryPtr dir = newLucene(startDir); // If IndexReader hits disk full, it can write to the same files again. dir->setPreventDoubleWrite(false); IndexWriterPtr modifier = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); modifier->setMaxBufferedDocs(1000); // use flush or close modifier->setMaxBufferedDeleteTerms(1000); // use flush or close // For each disk size, first try to commit against dir that will hit random IOExceptions and // disk full; after, give it infinite disk space and turn off random IOExceptions and // retry with same reader bool success = false; for (int32_t x = 0; x < 2; ++x) { double rate = 0.1; double diskRatio = ((double)diskFree) / (double)diskUsage; int64_t thisDiskFree = 0; String testName; if (x == 0) { thisDiskFree = diskFree; if (diskRatio >= 2.0) rate /= 2; if (diskRatio >= 4.0) rate /= 2; if (diskRatio >= 6.0) rate = 0.0; testName = L"disk full during reader.close() @ " + StringUtils::toString(thisDiskFree) + L" bytes"; } else { thisDiskFree = 0; rate = 0.0; testName = L"reader re-use after disk full"; } dir->setMaxSizeInBytes(thisDiskFree); dir->setRandomIOExceptionRate(rate, diskFree); try { if (x == 0) { int32_t docId = 12; for (int32_t i = 0; i < 13; ++i) { if (updates) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"content", L"bbb " + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_ANALYZED)); modifier->updateDocument(newLucene(L"id", StringUtils::toString(docId)), doc); } else // deletes modifier->deleteDocuments(newLucene(L"id", StringUtils::toString(docId))); docId += 12; } } modifier->close(); success = true; if (x == 0) done = true; } catch (IOException& e) { err = e; if (x == 1) BOOST_FAIL(testName << " hit IOException after disk space was freed up"); } // If the close() succeeded, make sure there are no unreferenced files. if (success) { checkIndex(dir); checkNoUnreferencedFiles(dir); } // Finally, verify index is not corrupt, and, if we succeeded, we see all docs changed, and if // we failed, we see either all docs or no docs changed (transactional semantics): IndexReaderPtr newReader; try { newReader = IndexReader::open(dir, true); } catch (IOException& e) { BOOST_FAIL(testName << ":exception when creating IndexReader after disk full during close:" << e.getError()); } IndexSearcherPtr searcher = newLucene(newReader); Collection hits; BOOST_CHECK_NO_THROW(hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs); int32_t result2 = hits.size(); if (success) { if (x == 0 && result2 != END_COUNT) BOOST_FAIL(testName << ": method did not throw exception but hits.size() for search on term 'aaa' is " << result2 << " instead of expected " << END_COUNT); else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT) { // It's possible that the first exception was "recoverable" wrt pending deletes, in which // case the pending deletes are retained and then re-flushing (with plenty of disk space) // will succeed in flushing the deletes BOOST_FAIL(testName << ": method did not throw exception but hits.size() for search on term 'aaa' is " << result2 << " instead of expected " << START_COUNT << " or " << END_COUNT); } } else { // On hitting exception we still may have added all docs if (result2 != START_COUNT && result2 != END_COUNT) BOOST_FAIL(testName << ": method did throw exception but hits.size() for search on term 'aaa' is " << result2 << " instead of expected " << START_COUNT << " or " << END_COUNT); } searcher->close(); newReader->close(); if (result2 == END_COUNT) break; } dir->close(); // Try again with 10 more bytes of free space diskFree += 10; } startDir->close(); } BOOST_AUTO_TEST_CASE(testDeletesOnDiskFull) { testOperationsOnDiskFull(false); } BOOST_AUTO_TEST_CASE(testUpdatesOnDiskFull) { testOperationsOnDiskFull(true); } /// This test tests that buffered deletes are cleared when an Exception is hit during flush. BOOST_AUTO_TEST_CASE(testErrorAfterApplyDeletes) { Collection keywords = newCollection(L"1", L"2"); Collection unindexed = newCollection(L"Netherlands", L"Italy"); Collection unstored = newCollection(L"Amsterdam has lots of bridges", L"Venice has lots of canals"); Collection text = newCollection(L"Amsterdam", L"Venice"); MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setUseCompoundFile(true); modifier->setMaxBufferedDeleteTerms(2); FailOnlyOnDeleteFlushPtr failure = newLucene(); dir->failOn(failure->reset()); for (int32_t i = 0; i < keywords.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", keywords[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"country", unindexed[i], Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"contents", unstored[i], Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"city", text[i], Field::STORE_YES, Field::INDEX_ANALYZED)); modifier->addDocument(doc); } modifier->optimize(); modifier->commit(); TermPtr term = newLucene(L"city", L"Amsterdam"); int32_t hitCount = getHitCount(dir, term); BOOST_CHECK_EQUAL(1, hitCount); modifier->deleteDocuments(term); DocumentPtr doc = newLucene(); modifier->addDocument(doc); // The failure object will fail on the first write after the del file gets created when // processing the buffered delete // In the ac case, this will be when writing the new segments files so we really don't // need the new doc, but it's harmless // In the !ac case, a new segments file won't be created but in this case, creation of // the cfs file happens next so we need the doc (to test that it's okay that we don't // lose deletes if failing while creating the cfs file) BOOST_CHECK_EXCEPTION(modifier->commit(), IOException, check_exception(LuceneException::IO)); // The commit above failed, so we need to retry it (which will succeed, because the // failure is a one-shot) modifier->commit(); hitCount = getHitCount(dir, term); // Make sure the delete was successfully flushed BOOST_CHECK_EQUAL(0, hitCount); modifier->close(); dir->close(); } /// This test tests that the files created by the docs writer before a segment is written are /// cleaned up if there's an i/o error BOOST_AUTO_TEST_CASE(testErrorInDocsWriterAdd) { Collection keywords = newCollection(L"1", L"2"); Collection unindexed = newCollection(L"Netherlands", L"Italy"); Collection unstored = newCollection(L"Amsterdam has lots of bridges", L"Venice has lots of canals"); Collection text = newCollection(L"Amsterdam", L"Venice"); MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr modifier = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); FailOnlyOnAddPtr failure = newLucene(); dir->failOn(failure->reset()); for (int32_t i = 0; i < keywords.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", keywords[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"country", unindexed[i], Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"contents", unstored[i], Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"city", text[i], Field::STORE_YES, Field::INDEX_ANALYZED)); try { modifier->addDocument(doc); } catch (IOException&) { break; } } checkNoUnreferencedFiles(dir); modifier->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexWriterExceptionsTest.cpp000066400000000000000000000171671217574114600260560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexWriter.h" #include "LuceneThread.h" #include "Document.h" #include "Field.h" #include "CloseableThreadLocal.h" #include "Term.h" #include "MockRAMDirectory.h" #include "WhitespaceAnalyzer.h" #include "IndexReader.h" #include "ConcurrentMergeScheduler.h" #include "Random.h" #include "MiscUtils.h" using namespace Lucene; class IndexWriterExceptionsTestFixture : public LuceneTestFixture { public: IndexWriterExceptionsTestFixture() { random = newLucene(); tvSettings = newCollection( Field::TERM_VECTOR_NO, Field::TERM_VECTOR_YES, Field::TERM_VECTOR_WITH_OFFSETS, Field::TERM_VECTOR_WITH_POSITIONS, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS ); } virtual ~IndexWriterExceptionsTestFixture() { } protected: RandomPtr random; Collection tvSettings; public: Field::TermVector randomTVSetting() { return tvSettings[random->nextInt(tvSettings.size())]; } }; BOOST_FIXTURE_TEST_SUITE(IndexWriterExceptionsTest, IndexWriterExceptionsTestFixture) static CloseableThreadLocal doFail; DECLARE_SHARED_PTR(IndexerThread) class IndexerThread : public LuceneThread { public: IndexerThread(IndexWriterPtr writer, IndexWriterExceptionsTestFixture* fixture) { this->writer = writer; this->fixture = fixture; this->r = newLucene(47); } virtual ~IndexerThread() { } LUCENE_CLASS(IndexerThread); public: IndexWriterPtr writer; IndexWriterExceptionsTestFixture* fixture; LuceneException failure; RandomPtr r; public: virtual void run() { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content1", L"aaa bbb ccc ddd", Field::STORE_YES, Field::INDEX_ANALYZED, fixture->randomTVSetting())); doc->add(newLucene(L"content6", L"aaa bbb ccc ddd", Field::STORE_NO, Field::INDEX_ANALYZED, fixture->randomTVSetting())); doc->add(newLucene(L"content2", L"aaa bbb ccc ddd", Field::STORE_YES, Field::INDEX_NOT_ANALYZED, fixture->randomTVSetting())); doc->add(newLucene(L"content3", L"aaa bbb ccc ddd", Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"content4", L"aaa bbb ccc ddd", Field::STORE_NO, Field::INDEX_ANALYZED, fixture->randomTVSetting())); doc->add(newLucene(L"content5", L"aaa bbb ccc ddd", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, fixture->randomTVSetting())); doc->add(newLucene(L"content7", L"aaa bbb ccc ddd", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, fixture->randomTVSetting())); FieldPtr idField = newLucene(L"id", L"", Field::STORE_YES, Field::INDEX_NOT_ANALYZED, fixture->randomTVSetting()); doc->add(idField); int64_t stopTime = MiscUtils::currentTimeMillis() + 3000; while ((int64_t)MiscUtils::currentTimeMillis() < stopTime) { doFail.set(shared_from_this()); String id = StringUtils::toString(r->nextInt(50)); idField->setValue(id); TermPtr idTerm = newLucene(L"id", id); try { writer->updateDocument(idTerm, doc); } catch (RuntimeException&) { try { checkIndex(writer->getDirectory()); } catch (IOException& ioe) { failure = ioe; break; } } catch (LuceneException& e) { failure = e; break; } doFail.set(LuceneThreadPtr()); // After a possible exception (above) I should be able to add a new document // without hitting an exception try { writer->updateDocument(idTerm, doc); } catch (LuceneException& e) { failure = e; break; } } } }; class MockIndexWriter : public IndexWriter { public: MockIndexWriter(DirectoryPtr dir, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(dir, a, create, mfl) { this->r = newLucene(17); } virtual ~MockIndexWriter() { } protected: RandomPtr r; public: virtual bool testPoint(const String& name) { if (doFail.get() && name != L"startDoFlush" && r->nextInt(20) == 17) boost::throw_exception(RuntimeException(L"intentionally failing at " + name)); return true; } }; BOOST_AUTO_TEST_CASE(testRandomExceptions) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); boost::dynamic_pointer_cast(writer->getMergeScheduler())->setSuppressExceptions(); writer->setRAMBufferSizeMB(0.1); IndexerThreadPtr thread = newLucene(writer, this); thread->run(); if (!thread->failure.isNull()) BOOST_FAIL("thread hit unexpected failure"); writer->commit(); try { writer->close(); } catch (LuceneException&) { writer->rollback(); } // Confirm that when doc hits exception partway through tokenization, it's deleted IndexReaderPtr r2 = IndexReader::open(dir, true); int32_t count = r2->docFreq(newLucene(L"content4", L"aaa")); int32_t count2 = r2->docFreq(newLucene(L"content4", L"ddd")); BOOST_CHECK_EQUAL(count, count2); r2->close(); checkIndex(dir); } BOOST_AUTO_TEST_CASE(testRandomExceptionsThreads) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); boost::dynamic_pointer_cast(writer->getMergeScheduler())->setSuppressExceptions(); writer->setRAMBufferSizeMB(0.2); int32_t NUM_THREADS = 4; Collection threads = Collection::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) { threads[i] = newLucene(writer, this); threads[i]->start(); } for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); for (int32_t i = 0; i < NUM_THREADS; ++i) { if (!threads[i]->failure.isNull()) BOOST_FAIL("thread hit unexpected failure: " << threads[i]->failure.getError()); } writer->commit(); try { writer->close(); } catch (LuceneException&) { writer->rollback(); } // Confirm that when doc hits exception partway through tokenization, it's deleted IndexReaderPtr r2 = IndexReader::open(dir, true); int32_t count = r2->docFreq(newLucene(L"content4", L"aaa")); int32_t count2 = r2->docFreq(newLucene(L"content4", L"ddd")); BOOST_CHECK_EQUAL(count, count2); r2->close(); checkIndex(dir); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexWriterLockReleaseTest.cpp000066400000000000000000000026131217574114600261140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexWriter.h" #include "FSDirectory.h" #include "StandardAnalyzer.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexWriterLockReleaseTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testIndexWriterLockRelease) { String testDir(getTempDir(L"testIndexWriter")); FileUtils::createDirectory(testDir); DirectoryPtr dir = FSDirectory::open(testDir); IndexWriterPtr im; BOOST_CHECK_EXCEPTION(im = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED), FileNotFoundException, check_exception(LuceneException::FileNotFound)); BOOST_CHECK_EXCEPTION(im = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED), FileNotFoundException, check_exception(LuceneException::FileNotFound)); dir->close(); FileUtils::removeDirectory(testDir); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexWriterMergePolicyTest.cpp000066400000000000000000000174041217574114600261460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexWriter.h" #include "RAMDirectory.h" #include "WhitespaceAnalyzer.h" #include "LogDocMergePolicy.h" #include "Document.h" #include "Field.h" #include "ConcurrentMergeScheduler.h" #include "IndexReader.h" #include "Term.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexWriterMergePolicyTest, LuceneTestFixture) static void addDoc(IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } static void checkInvariants(IndexWriterPtr writer) { writer->waitForMerges(); int32_t maxBufferedDocs = writer->getMaxBufferedDocs(); int32_t mergeFactor = writer->getMergeFactor(); int32_t maxMergeDocs = writer->getMaxMergeDocs(); int32_t ramSegmentCount = writer->getNumBufferedDocuments(); BOOST_CHECK(ramSegmentCount < maxBufferedDocs); int32_t lowerBound = -1; int32_t upperBound = maxBufferedDocs; int32_t numSegments = 0; int32_t segmentCount = writer->getSegmentCount(); for (int32_t i = segmentCount - 1; i >= 0; --i) { int32_t docCount = writer->getDocCount(i); BOOST_CHECK(docCount > lowerBound); if (docCount <= upperBound) ++numSegments; else { if (upperBound * mergeFactor <= maxMergeDocs) BOOST_CHECK(numSegments < mergeFactor); do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) BOOST_CHECK(numSegments < mergeFactor); } BOOST_AUTO_TEST_CASE(testNormalCase) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); writer->setMergeFactor(10); writer->setMergePolicy(newLucene(writer)); for (int32_t i = 0; i < 100; ++i) { addDoc(writer); checkInvariants(writer); } writer->close(); } BOOST_AUTO_TEST_CASE(testNoOverMerge) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); writer->setMergeFactor(10); writer->setMergePolicy(newLucene(writer)); bool noOverMerge = false; for (int32_t i = 0; i < 100; ++i) { addDoc(writer); checkInvariants(writer); if (writer->getNumBufferedDocuments() + writer->getSegmentCount() >= 18) noOverMerge = true; } BOOST_CHECK(noOverMerge); writer->close(); } /// Test the case where flush is forced after every addDoc BOOST_AUTO_TEST_CASE(testForceFlush) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); writer->setMergeFactor(10); LogDocMergePolicyPtr mp = newLucene(writer); mp->setMinMergeDocs(100); writer->setMergePolicy(mp); for (int32_t i = 0; i < 100; ++i) { addDoc(writer); writer->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); writer->setMergePolicy(mp); mp->setMinMergeDocs(100); writer->setMergeFactor(10); checkInvariants(writer); } writer->close(); } /// Test the case where mergeFactor changes BOOST_AUTO_TEST_CASE(testMergeFactorChange) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); writer->setMergeFactor(100); writer->setMergePolicy(newLucene(writer)); for (int32_t i = 0; i < 250; ++i) { addDoc(writer); checkInvariants(writer); } writer->setMergeFactor(5); // merge policy only fixes segments on levels where merges have been triggered, so check invariants after all adds for (int32_t i = 0; i < 10; ++i) addDoc(writer); checkInvariants(writer); writer->close(); } /// Test the case where both mergeFactor and maxBufferedDocs change BOOST_AUTO_TEST_CASE(testMaxBufferedDocsChange) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(101); writer->setMergeFactor(101); writer->setMergePolicy(newLucene(writer)); // leftmost* segment has 1 doc // rightmost* segment has 100 docs for (int32_t i = 1; i <= 100; ++i) { for (int32_t j = 0; j < i; ++j) { addDoc(writer); checkInvariants(writer); } writer->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(101); writer->setMergeFactor(101); writer->setMergePolicy(newLucene(writer)); } writer->setMaxBufferedDocs(10); writer->setMergeFactor(10); // merge policy only fixes segments on levels where merges have been triggered, so check invariants after all adds for (int32_t i = 0; i < 100; ++i) addDoc(writer); checkInvariants(writer); for (int32_t i = 100; i < 1000; ++i) addDoc(writer); writer->commit(); boost::dynamic_pointer_cast(writer->getMergeScheduler())->sync(); writer->commit(); checkInvariants(writer); writer->close(); } /// Test the case where a merge results in no doc at all BOOST_AUTO_TEST_CASE(testMergeDocCount0) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMergePolicy(newLucene(writer)); writer->setMaxBufferedDocs(10); writer->setMergeFactor(100); for (int32_t i = 0; i < 250; ++i) { addDoc(writer); checkInvariants(writer); } writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocuments(newLucene(L"content", L"aaa")); reader->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMergePolicy(newLucene(writer)); writer->setMaxBufferedDocs(10); writer->setMergeFactor(5); // merge factor is changed, so check invariants after all adds for (int32_t i = 0; i < 10; ++i) addDoc(writer); writer->commit(); boost::dynamic_pointer_cast(writer->getMergeScheduler())->sync(); writer->commit(); checkInvariants(writer); BOOST_CHECK_EQUAL(10, writer->maxDoc()); writer->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexWriterMergingTest.cpp000066400000000000000000000051061217574114600253130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexWriterMergingTest, LuceneTestFixture) static bool verifyIndex(DirectoryPtr directory, int32_t startAt) { bool fail = false; IndexReaderPtr reader = IndexReader::open(directory, true); int32_t max = reader->maxDoc(); for (int32_t i = 0; i < max; ++i) { DocumentPtr temp = reader->document(i); if (temp->getField(L"count")->stringValue() != StringUtils::toString(i + startAt)) fail = true; } reader->close(); return fail; } static void fillIndex(DirectoryPtr dir, int32_t start, int32_t numDocs) { IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(2); writer->setMaxBufferedDocs(2); for (int32_t i = start; i < (start + numDocs); ++i) { DocumentPtr temp = newLucene(); temp->add(newLucene(L"count", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(temp); } writer->close(); } /// Tests that index merging (specifically addIndexesNoOptimize()) doesn't change the index order of documents. BOOST_AUTO_TEST_CASE(testIndexWriterMerging) { int32_t num = 100; DirectoryPtr indexA = newLucene(); DirectoryPtr indexB = newLucene(); fillIndex(indexA, 0, num); BOOST_CHECK(!verifyIndex(indexA, 0)); fillIndex(indexB, num, num); BOOST_CHECK(!verifyIndex(indexB, num)); DirectoryPtr merged = newLucene(); IndexWriterPtr writer = newLucene(merged, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(2); writer->addIndexesNoOptimize(newCollection(indexA, indexB)); writer->optimize(); writer->close(); BOOST_CHECK(!verifyIndex(merged, 0)); merged->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexWriterReaderTest.cpp000066400000000000000000000701041217574114600251250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "TermDocs.h" #include "Term.h" #include "TermQuery.h" #include "LuceneThread.h" #include "ConcurrentMergeScheduler.h" #include "IndexSearcher.h" #include "TopDocs.h" #include "Random.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexWriterReaderTest, LuceneTestFixture) DECLARE_SHARED_PTR(TestableIndexWriter) DECLARE_SHARED_PTR(AddDirectoriesThread) DECLARE_SHARED_PTR(AddDirectoriesThreads) DECLARE_SHARED_PTR(HeavyAtomicInt) static DocumentPtr createDocument(int32_t n, const String& indexName, int32_t numFields) { StringStream sb; DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(n), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"indexname", indexName, Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); sb << L"a" << n; doc->add(newLucene(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); sb << L" b" << n; for (int32_t i = 1; i < numFields; ++i) doc->add(newLucene(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); return doc; } static void createIndexNoClose(bool multiSegment, const String& indexName, IndexWriterPtr w) { for (int32_t i = 0; i < 100; ++i) w->addDocument(createDocument(i, indexName, 4)); if (!multiSegment) w->optimize(); } static int32_t count(TermPtr t, IndexReaderPtr r) { int32_t count = 0; TermDocsPtr td = r->termDocs(t); while (td->next()) { td->doc(); ++count; } td->close(); return count; } class TestableIndexWriter : public IndexWriter { public: TestableIndexWriter(DirectoryPtr d, AnalyzerPtr a, int32_t mfl) : IndexWriter(d, a, mfl) { } virtual ~TestableIndexWriter() { } LUCENE_CLASS(TestableIndexWriter); public: using IndexWriter::flush; }; class HeavyAtomicInt : public LuceneObject { public: HeavyAtomicInt(int32_t start) { value = start; } virtual ~HeavyAtomicInt() { } protected: int32_t value; public: int32_t addAndGet(int32_t inc) { SyncLock syncLock(this); value += inc; return value; } int32_t incrementAndGet() { SyncLock syncLock(this); return ++value; } int32_t intValue() { SyncLock syncLock(this); return value; } }; class AddDirectoriesThread : public LuceneThread { public: AddDirectoriesThread(AddDirectoriesThreadsPtr addDirectories, int32_t numIter) { this->_addDirectories = addDirectories; this->numIter = numIter; } virtual ~AddDirectoriesThread() { } LUCENE_CLASS(AddDirectoriesThread); protected: AddDirectoriesThreadsWeakPtr _addDirectories; int32_t numIter; public: virtual void run(); }; class AddDirectoriesThreads : public LuceneObject { public: AddDirectoriesThreads(int32_t numDirs, IndexWriterPtr mainWriter) { this->numDirs = numDirs; this->mainWriter = mainWriter; threads = Collection::newInstance(NUM_THREADS); failures = Collection::newInstance(); didClose = false; count = newLucene(0); numAddIndexesNoOptimize = newLucene(0); addDir = newLucene(); IndexWriterPtr writer = newLucene(addDir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < NUM_INIT_DOCS; ++i) { DocumentPtr doc = createDocument(i, L"addindex", 4); writer->addDocument(doc); } writer->close(); readers = Collection::newInstance(numDirs); for (int32_t i = 0; i < numDirs; ++i) readers[i] = IndexReader::open(addDir, false); } virtual ~AddDirectoriesThreads() { } LUCENE_CLASS(AddDirectoriesThreads); public: static const int32_t NUM_THREADS; static const int32_t NUM_INIT_DOCS; DirectoryPtr addDir; int32_t numDirs; Collection threads; IndexWriterPtr mainWriter; Collection failures; Collection readers; bool didClose; HeavyAtomicIntPtr count; HeavyAtomicIntPtr numAddIndexesNoOptimize; public: void joinThreads() { for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); } void close(bool doWait) { didClose = true; mainWriter->close(doWait); } void closeDir() { for (int32_t i = 0; i < numDirs; ++i) readers[i]->close(); addDir->close(); } void handle(const LuceneException& t) { BOOST_FAIL(t.getError()); SyncLock syncLock(&failures); failures.add(t); } void launchThreads(int32_t numIter) { for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i] = newLucene(shared_from_this(), numIter); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); } void doBody(int32_t j, Collection dirs) { switch (j % 4) { case 0: mainWriter->addIndexesNoOptimize(dirs); mainWriter->optimize(); break; case 1: mainWriter->addIndexesNoOptimize(dirs); numAddIndexesNoOptimize->incrementAndGet(); break; case 2: mainWriter->addIndexes(readers); break; case 3: mainWriter->commit(); break; } count->addAndGet(dirs.size() * NUM_INIT_DOCS); } }; const int32_t AddDirectoriesThreads::NUM_THREADS = 5; const int32_t AddDirectoriesThreads::NUM_INIT_DOCS = 100; void AddDirectoriesThread::run() { AddDirectoriesThreadsPtr addDirectories(_addDirectories); try { Collection dirs = Collection::newInstance(addDirectories->numDirs); for (int32_t k = 0; k < addDirectories->numDirs; ++k) dirs[k] = newLucene(addDirectories->addDir); for (int32_t x = 0; x < numIter; ++x) { // only do addIndexesNoOptimize addDirectories->doBody(x, dirs); } } catch (LuceneException& e) { addDirectories->handle(e); } } BOOST_AUTO_TEST_CASE(testUpdateDocument) { bool optimize = true; DirectoryPtr dir1 = newLucene(); IndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); // create the index createIndexNoClose(!optimize, L"index1", writer); // get a reader IndexReaderPtr r1 = writer->getReader(); BOOST_CHECK(r1->isCurrent()); String id10 = r1->document(10)->getField(L"id")->stringValue(); DocumentPtr newDoc = r1->document(10); newDoc->removeField(L"id"); newDoc->add(newLucene(L"id", StringUtils::toString(8000), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->updateDocument(newLucene(L"id", id10), newDoc); BOOST_CHECK(!r1->isCurrent()); IndexReaderPtr r2 = writer->getReader(); BOOST_CHECK(r2->isCurrent()); BOOST_CHECK_EQUAL(0, count(newLucene(L"id", id10), r2)); BOOST_CHECK_EQUAL(1, count(newLucene(L"id", StringUtils::toString(8000)), r2)); r1->close(); writer->close(); BOOST_CHECK(r2->isCurrent()); IndexReaderPtr r3 = IndexReader::open(dir1, true); BOOST_CHECK(r3->isCurrent()); BOOST_CHECK(r2->isCurrent()); BOOST_CHECK_EQUAL(0, count(newLucene(L"id", id10), r3)); BOOST_CHECK_EQUAL(1, count(newLucene(L"id", StringUtils::toString(8000)), r3)); writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); BOOST_CHECK(r2->isCurrent()); BOOST_CHECK(r3->isCurrent()); writer->close(); BOOST_CHECK(!r2->isCurrent()); BOOST_CHECK(!r3->isCurrent()); r2->close(); r3->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testAddIndexes) { bool optimize = false; DirectoryPtr dir1 = newLucene(); TestableIndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); // create the index createIndexNoClose(!optimize, L"index1", writer); writer->flush(false, true, true); // create a 2nd index DirectoryPtr dir2 = newLucene(); TestableIndexWriterPtr writer2 = newLucene(dir2, newLucene(), IndexWriter::MaxFieldLengthLIMITED); createIndexNoClose(!optimize, L"index2", writer2); writer2->close(); IndexReaderPtr r0 = writer->getReader(); BOOST_CHECK(r0->isCurrent()); writer->addIndexesNoOptimize(newCollection(dir2)); BOOST_CHECK(!r0->isCurrent()); r0->close(); IndexReaderPtr r1 = writer->getReader(); BOOST_CHECK(r1->isCurrent()); writer->commit(); BOOST_CHECK(!r1->isCurrent()); BOOST_CHECK_EQUAL(200, r1->maxDoc()); int32_t index2df = r1->docFreq(newLucene(L"indexname", L"index2")); BOOST_CHECK_EQUAL(100, index2df); // verify the docs are from different indexes DocumentPtr doc5 = r1->document(5); BOOST_CHECK_EQUAL(L"index1", doc5->get(L"indexname")); DocumentPtr doc150 = r1->document(150); BOOST_CHECK_EQUAL(L"index2", doc150->get(L"indexname")); r1->close(); writer->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testAddIndexes2) { bool optimize = false; DirectoryPtr dir1 = newLucene(); IndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DirectoryPtr dir2 = newLucene(); IndexWriterPtr writer2 = newLucene(dir2, newLucene(), IndexWriter::MaxFieldLengthLIMITED); createIndexNoClose(!optimize, L"index2", writer2); writer2->close(); Collection dirs = newCollection(dir2); writer->addIndexesNoOptimize(dirs); writer->addIndexesNoOptimize(dirs); writer->addIndexesNoOptimize(dirs); writer->addIndexesNoOptimize(dirs); writer->addIndexesNoOptimize(dirs); IndexReaderPtr r1 = writer->getReader(); BOOST_CHECK_EQUAL(500, r1->maxDoc()); r1->close(); writer->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testDeleteFromIndexWriter) { bool optimize = true; DirectoryPtr dir1 = newLucene(); TestableIndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setReaderTermsIndexDivisor(2); // create the index createIndexNoClose(!optimize, L"index1", writer); writer->flush(false, true, true); // get a reader IndexReaderPtr r1 = writer->getReader(); String id10 = r1->document(10)->getField(L"id")->stringValue(); // deleted IW docs should not show up in the next getReader writer->deleteDocuments(newLucene(L"id", id10)); IndexReaderPtr r2 = writer->getReader(); BOOST_CHECK_EQUAL(1, count(newLucene(L"id", id10), r1)); BOOST_CHECK_EQUAL(0, count(newLucene(L"id", id10), r2)); String id50 = r1->document(50)->getField(L"id")->stringValue(); BOOST_CHECK_EQUAL(1, count(newLucene(L"id", id50), r1)); writer->deleteDocuments(newLucene(L"id", id50)); IndexReaderPtr r3 = writer->getReader(); BOOST_CHECK_EQUAL(0, count(newLucene(L"id", id10), r3)); BOOST_CHECK_EQUAL(0, count(newLucene(L"id", id50), r3)); String id75 = r1->document(75)->getField(L"id")->stringValue(); writer->deleteDocuments(newLucene(newLucene(L"id", id75))); IndexReaderPtr r4 = writer->getReader(); BOOST_CHECK_EQUAL(1, count(newLucene(L"id", id75), r3)); BOOST_CHECK_EQUAL(0, count(newLucene(L"id", id75), r4)); r1->close(); r2->close(); r3->close(); r4->close(); writer->close(); // reopen the writer to verify the delete made it to the directory writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); IndexReaderPtr w2r1 = writer->getReader(); BOOST_CHECK_EQUAL(0, count(newLucene(L"id", id10), w2r1)); w2r1->close(); writer->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testAddIndexesAndDoDeletesThreads) { int32_t numIter = 5; int32_t numDirs = 3; DirectoryPtr mainDir = newLucene(); IndexWriterPtr mainWriter = newLucene(mainDir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); AddDirectoriesThreadsPtr addDirThreads = newLucene(numIter, mainWriter); addDirThreads->launchThreads(numDirs); addDirThreads->joinThreads(); BOOST_CHECK_EQUAL(addDirThreads->count->intValue(), addDirThreads->mainWriter->numDocs()); addDirThreads->close(true); BOOST_CHECK(addDirThreads->failures.empty()); checkIndex(mainDir); IndexReaderPtr reader = IndexReader::open(mainDir, true); BOOST_CHECK_EQUAL(addDirThreads->count->intValue(), reader->numDocs()); reader->close(); addDirThreads->closeDir(); mainDir->close(); } /// Tests creating a segment, then check to insure the segment can be seen via IndexWriter.getReader static void doTestIndexWriterReopenSegment(bool optimize) { DirectoryPtr dir1 = newLucene(); TestableIndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); IndexReaderPtr r1 = writer->getReader(); BOOST_CHECK_EQUAL(0, r1->maxDoc()); createIndexNoClose(false, L"index1", writer); writer->flush(!optimize, true, true); IndexReaderPtr iwr1 = writer->getReader(); BOOST_CHECK_EQUAL(100, iwr1->maxDoc()); IndexReaderPtr r2 = writer->getReader(); BOOST_CHECK_EQUAL(100, r2->maxDoc()); // add 100 documents for (int32_t x = 10000; x < 10000 + 100; ++x) { DocumentPtr d = createDocument(x, L"index1", 5); writer->addDocument(d); } writer->flush(false, true, true); // verify the reader was reopened internally IndexReaderPtr iwr2 = writer->getReader(); BOOST_CHECK_NE(iwr2, r1); BOOST_CHECK_EQUAL(200, iwr2->maxDoc()); // should have flushed out a segment IndexReaderPtr r3 = writer->getReader(); BOOST_CHECK_NE(r2, r3); BOOST_CHECK_EQUAL(200, r3->maxDoc()); // dec ref the readers rather than close them because closing flushes changes to the writer r1->close(); iwr1->close(); r2->close(); r3->close(); iwr2->close(); writer->close(); // test whether the changes made it to the directory writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); IndexReaderPtr w2r1 = writer->getReader(); // insure the deletes were actually flushed to the directory BOOST_CHECK_EQUAL(200, w2r1->maxDoc()); w2r1->close(); writer->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testIndexWriterReopenSegmentOptimize) { doTestIndexWriterReopenSegment(true); } BOOST_AUTO_TEST_CASE(testIndexWriterReopenSegment) { doTestIndexWriterReopenSegment(false); } namespace TestMergeWarmer { DECLARE_SHARED_PTR(MyWarmer) class MyWarmer : public IndexReaderWarmer { public: MyWarmer() { warmCount = 0; } virtual ~MyWarmer() { } LUCENE_CLASS(MyWarmer); public: int32_t warmCount; public: virtual void warm(IndexReaderPtr reader) { ++warmCount; } }; } BOOST_AUTO_TEST_CASE(testMergeWarmer) { DirectoryPtr dir1 = newLucene(); IndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); // create the index createIndexNoClose(false, L"test", writer); // get a reader to put writer into near real-time mode IndexReaderPtr r1 = writer->getReader(); // Enroll warmer TestMergeWarmer::MyWarmerPtr warmer = newLucene(); writer->setMergedSegmentWarmer(warmer); writer->setMergeFactor(2); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < 100; ++i) writer->addDocument(createDocument(i, L"test", 4)); boost::dynamic_pointer_cast(writer->getMergeScheduler())->sync(); BOOST_CHECK(warmer->warmCount > 0); int32_t count = warmer->warmCount; writer->addDocument(createDocument(17, L"test", 4)); writer->optimize(); BOOST_CHECK(warmer->warmCount > count); writer->close(); r1->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testAfterCommit) { DirectoryPtr dir1 = newLucene(); IndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); // create the index createIndexNoClose(false, L"test", writer); // get a reader to put writer into near real-time mode IndexReaderPtr r1 = writer->getReader(); checkIndex(dir1); writer->commit(); checkIndex(dir1); BOOST_CHECK_EQUAL(100, r1->numDocs()); for (int32_t i = 0; i < 10; ++i) writer->addDocument(createDocument(i, L"test", 4)); boost::dynamic_pointer_cast(writer->getMergeScheduler())->sync(); IndexReaderPtr r2 = r1->reopen(); if (r2 != r1) { r1->close(); r1 = r2; } BOOST_CHECK_EQUAL(110, r1->numDocs()); writer->close(); r1->close(); dir1->close(); } /// Make sure reader remains usable even if IndexWriter closes BOOST_AUTO_TEST_CASE(testAfterClose) { DirectoryPtr dir1 = newLucene(); IndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); // create the index createIndexNoClose(false, L"test", writer); IndexReaderPtr r = writer->getReader(); writer->close(); checkIndex(dir1); // reader should remain usable even after IndexWriter is closed BOOST_CHECK_EQUAL(100, r->numDocs()); QueryPtr q = newLucene(newLucene(L"indexname", L"test")); BOOST_CHECK_EQUAL(100, newLucene(r)->search(q, 10)->totalHits); BOOST_CHECK_EXCEPTION(r->reopen(), AlreadyClosedException, check_exception(LuceneException::AlreadyClosed)); r->close(); dir1->close(); } namespace TestDuringAddIndexes { class AddIndexesThread : public LuceneThread { public: AddIndexesThread(int64_t endTime, IndexWriterPtr writer, Collection dirs) { this->endTime = endTime; this->writer = writer; this->dirs = dirs; } virtual ~AddIndexesThread() { } LUCENE_CLASS(AddIndexesThread); protected: int64_t endTime; IndexWriterPtr writer; Collection dirs; public: virtual void run() { while ((int64_t)MiscUtils::currentTimeMillis() < endTime) { try { writer->addIndexesNoOptimize(dirs); } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } } }; } /// Stress test reopen during addIndexes BOOST_AUTO_TEST_CASE(testDuringAddIndexes) { MockRAMDirectoryPtr dir1 = newLucene(); IndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(2); // create the index createIndexNoClose(false, L"test", writer); writer->commit(); Collection dirs = Collection::newInstance(10); for (int32_t i = 0; i < 10; ++i) dirs[i] = newLucene(dir1); IndexReaderPtr r = writer->getReader(); int32_t NUM_THREAD = 5; int32_t SECONDS = 3; int64_t endTime = MiscUtils::currentTimeMillis() + 1000 * SECONDS; Collection threads = Collection::newInstance(NUM_THREAD); for (int32_t i = 0; i < NUM_THREAD; ++i) { threads[i] = newLucene(endTime, writer, dirs); threads[i]->start(); } int32_t lastCount = 0; while ((int64_t)MiscUtils::currentTimeMillis() < endTime) { IndexReaderPtr r2 = r->reopen(); if (r2 != r) { r->close(); r = r2; } QueryPtr q = newLucene(newLucene(L"indexname", L"test")); int32_t count = newLucene(r)->search(q, 10)->totalHits; BOOST_CHECK(count >= lastCount); lastCount = count; } for (int32_t i = 0; i < NUM_THREAD; ++i) threads[i]->join(); writer->close(); r->close(); BOOST_CHECK_EQUAL(0, dir1->getOpenDeletedFiles().size()); checkIndex(dir1); dir1->close(); } namespace TestDuringAddDelete { class AddDeleteThread : public LuceneThread { public: AddDeleteThread(int64_t endTime, IndexWriterPtr writer) { this->endTime = endTime; this->writer = writer; this->random = newLucene(); } virtual ~AddDeleteThread() { } LUCENE_CLASS(AddDeleteThread); protected: int64_t endTime; IndexWriterPtr writer; RandomPtr random; public: virtual void run() { int32_t count = 0; while ((int64_t)MiscUtils::currentTimeMillis() < endTime) { try { for (int32_t docUpto = 0; docUpto < 10; ++docUpto) writer->addDocument(createDocument(10 * count + docUpto, L"test", 4)); ++count; int32_t limit = count * 10; for (int32_t delUpto = 0; delUpto < 5;++delUpto) { int32_t x = random->nextInt(limit); writer->deleteDocuments(newLucene(L"field3", L"b" + StringUtils::toString(x))); } } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } } }; } /// Stress test reopen during add/delete BOOST_AUTO_TEST_CASE(testDuringAddDelete) { DirectoryPtr dir1 = newLucene(); IndexWriterPtr writer = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(2); // create the index createIndexNoClose(false, L"test", writer); writer->commit(); IndexReaderPtr r = writer->getReader(); int32_t NUM_THREAD = 5; int32_t SECONDS = 3; int64_t endTime = MiscUtils::currentTimeMillis() + 1000 * SECONDS; Collection threads = Collection::newInstance(NUM_THREAD); for (int32_t i = 0; i < NUM_THREAD; ++i) { threads[i] = newLucene(endTime, writer); threads[i]->start(); } int32_t sum = 0; while ((int64_t)MiscUtils::currentTimeMillis() < endTime) { IndexReaderPtr r2 = r->reopen(); if (r2 != r) { r->close(); r = r2; } QueryPtr q = newLucene(newLucene(L"indexname", L"test")); sum += newLucene(r)->search(q, 10)->totalHits; } for (int32_t i = 0; i < NUM_THREAD; ++i) threads[i]->join(); BOOST_CHECK(sum > 0); writer->close(); checkIndex(dir1); r->close(); dir1->close(); } BOOST_AUTO_TEST_CASE(testExpungeDeletes) { DirectoryPtr dir = newLucene(); IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED)); FieldPtr id = newLucene(L"id", L"", Field::STORE_NO, Field::INDEX_ANALYZED); doc->add(id); id->setValue(L"0"); w->addDocument(doc); id->setValue(L"1"); w->addDocument(doc); w->deleteDocuments(newLucene(L"id", L"0")); IndexReaderPtr r = w->getReader(); w->expungeDeletes(); w->close(); r->close(); r = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(1, r->numDocs()); BOOST_CHECK(!r->hasDeletions()); r->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testDeletesNumDocs) { DirectoryPtr dir = newLucene(); IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED)); FieldPtr id = newLucene(L"id", L"", Field::STORE_NO, Field::INDEX_NOT_ANALYZED); doc->add(id); id->setValue(L"0"); w->addDocument(doc); id->setValue(L"1"); w->addDocument(doc); IndexReaderPtr r = w->getReader(); BOOST_CHECK_EQUAL(2, r->numDocs()); r->close(); w->deleteDocuments(newLucene(L"id", L"0")); r = w->getReader(); BOOST_CHECK_EQUAL(1, r->numDocs()); r->close(); w->deleteDocuments(newLucene(L"id", L"1")); r = w->getReader(); BOOST_CHECK_EQUAL(0, r->numDocs()); r->close(); w->close(); dir->close(); } namespace TestSegmentWarmer { DECLARE_SHARED_PTR(SegmentWarmer) class SegmentWarmer : public IndexReaderWarmer { public: virtual ~SegmentWarmer() { } LUCENE_CLASS(SegmentWarmer); public: virtual void warm(IndexReaderPtr reader) { IndexSearcherPtr s = newLucene(reader); TopDocsPtr hits = s->search(newLucene(newLucene(L"foo", L"bar")), 10); BOOST_CHECK_EQUAL(20, hits->totalHits); } }; } BOOST_AUTO_TEST_CASE(testSegmentWarmer) { DirectoryPtr dir = newLucene(); IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); w->setMaxBufferedDocs(2); w->getReader()->close(); w->setMergedSegmentWarmer(newLucene()); DocumentPtr doc = newLucene(); doc->add(newLucene(L"foo", L"bar", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); for (int32_t i = 0; i < 20; ++i) w->addDocument(doc); w->waitForMerges(); w->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/IndexWriterTest.cpp000066400000000000000000005161731217574114600240150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "IndexReader.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "Term.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "ConcurrentMergeScheduler.h" #include "SerialMergeScheduler.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "IndexFileDeleter.h" #include "StandardAnalyzer.h" #include "DocumentsWriter.h" #include "TermPositions.h" #include "LogDocMergePolicy.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "FSDirectory.h" #include "IndexInput.h" #include "IndexOutput.h" #include "IndexFileNames.h" #include "SingleInstanceLockFactory.h" #include "TokenFilter.h" #include "StandardTokenizer.h" #include "TestPoint.h" #include "WhitespaceTokenizer.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "PhraseQuery.h" #include "SpanTermQuery.h" #include "TermPositionVector.h" #include "TermVectorOffsetInfo.h" #include "SimpleAnalyzer.h" #include "CachingTokenFilter.h" #include "StringReader.h" #include "TeeSinkTokenFilter.h" #include "StopAnalyzer.h" #include "Random.h" #include "UTF8Stream.h" #include "InfoStream.h" #include "MiscUtils.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexWriterTest, LuceneTestFixture) static void addDoc(IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } static void addDocWithIndex(IndexWriterPtr writer, int32_t index) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa " + StringUtils::toString(index), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"id", StringUtils::toString(index), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } static void checkNoUnreferencedFiles(DirectoryPtr dir) { HashSet _startFiles = dir->listAll(); SegmentInfosPtr infos = newLucene(); infos->read(dir); IndexFileDeleterPtr deleter = newLucene(dir, newLucene(), infos, InfoStreamPtr(), DocumentsWriterPtr(), HashSet()); HashSet _endFiles = dir->listAll(); Collection startFiles = Collection::newInstance(_startFiles.begin(), _startFiles.end()); Collection endFiles = Collection::newInstance(_endFiles.begin(), _endFiles.end()); std::sort(startFiles.begin(), startFiles.end()); std::sort(endFiles.begin(), endFiles.end()); BOOST_CHECK(startFiles.equals(endFiles)); } DECLARE_SHARED_PTR(FailOnlyOnFlush) DECLARE_SHARED_PTR(FailOnlyOnAbortOrFlush) DECLARE_SHARED_PTR(FailOnlyInCloseDocStore) DECLARE_SHARED_PTR(FailOnlyInWriteSegment) DECLARE_SHARED_PTR(FailOnlyInSync) DECLARE_SHARED_PTR(FailOnlyInCommit) class FailOnlyOnFlush : public MockDirectoryFailure { public: FailOnlyOnFlush() { count = 0; TestPoint::clear(); } virtual ~FailOnlyOnFlush() { } public: int32_t count; public: virtual void eval(MockRAMDirectoryPtr dir) { if (this->doFail) { if (TestPoint::getTestPoint(L"FreqProxTermsWriter", L"appendPostings") && TestPoint::getTestPoint(L"doFlush") && count++ >= 30) { doFail = false; boost::throw_exception(IOException(L"now failing during flush")); } } } }; /// Throws IOException during FieldsWriter.flushDocument and during DocumentsWriter.abort class FailOnlyOnAbortOrFlush : public MockDirectoryFailure { public: FailOnlyOnAbortOrFlush(bool onlyOnce) { onlyOnce = false; } virtual ~FailOnlyOnAbortOrFlush() { } protected: bool onlyOnce; public: virtual void eval(MockRAMDirectoryPtr dir) { if (doFail) { if (TestPoint::getTestPoint(L"abort") || TestPoint::getTestPoint(L"flushDocument")) { if (onlyOnce) doFail = false; boost::throw_exception(IOException(L"now failing on purpose")); } } } }; /// Throws IOException during DocumentsWriter.closeDocStore class FailOnlyInCloseDocStore : public MockDirectoryFailure { public: FailOnlyInCloseDocStore(bool onlyOnce) { onlyOnce = false; } virtual ~FailOnlyInCloseDocStore() { } protected: bool onlyOnce; public: virtual void eval(MockRAMDirectoryPtr dir) { if (doFail) { if (TestPoint::getTestPoint(L"closeDocStore")) { if (onlyOnce) doFail = false; boost::throw_exception(IOException(L"now failing on purpose")); } } } }; /// Throws IOException during DocumentsWriter.writeSegment class FailOnlyInWriteSegment : public MockDirectoryFailure { public: FailOnlyInWriteSegment(bool onlyOnce) { onlyOnce = false; } virtual ~FailOnlyInWriteSegment() { } protected: bool onlyOnce; public: virtual void eval(MockRAMDirectoryPtr dir) { if (doFail) { if (TestPoint::getTestPoint(L"DocFieldProcessor", L"flush")) { if (onlyOnce) doFail = false; boost::throw_exception(IOException(L"now failing on purpose")); } } } }; /// Throws IOException during MockRAMDirectory.sync class FailOnlyInSync : public MockDirectoryFailure { public: FailOnlyInSync() { didFail = false; } virtual ~FailOnlyInSync() { } public: bool didFail; public: virtual void eval(MockRAMDirectoryPtr dir) { if (doFail) { if (TestPoint::getTestPoint(L"MockRAMDirectory", L"sync")) { didFail = true; boost::throw_exception(IOException(L"now failing on purpose during sync")); } } } }; class FailOnlyInCommit : public MockDirectoryFailure { public: FailOnlyInCommit() { fail1 = false; fail2 = false; } virtual ~FailOnlyInCommit() { } public: bool fail1; bool fail2; public: virtual void eval(MockRAMDirectoryPtr dir) { bool isCommit = TestPoint::getTestPoint(L"SegmentInfos", L"prepareCommit"); bool isDelete = TestPoint::getTestPoint(L"MockRAMDirectory", L"deleteFile"); if (isCommit) { if (!isDelete) { fail1 = true; boost::throw_exception(RuntimeException(L"now fail first")); } else { fail2 = true; boost::throw_exception(IOException(L"now fail during delete")); } } } }; class CrashingFilter : public TokenFilter { public: CrashingFilter(const String& fieldName, TokenStreamPtr input) : TokenFilter(input) { this->count = 0; this->fieldName = fieldName; } virtual ~CrashingFilter() { } LUCENE_CLASS(CrashingFilter); public: String fieldName; int32_t count; public: virtual bool incrementToken() { if (fieldName == L"crash" && count++ >= 4) boost::throw_exception(IOException(L"now failing on purpose")); return input->incrementToken(); } virtual void reset() { TokenFilter::reset(); count = 0; } }; DECLARE_SHARED_PTR(IndexerThread) class IndexerThread : public LuceneThread { public: IndexerThread(IndexWriterPtr writer, bool noErrors) { this->writer = writer; this->noErrors = noErrors; this->addCount = 0; } virtual ~IndexerThread() { } LUCENE_CLASS(IndexerThread); public: IndexWriterPtr writer; bool noErrors; int32_t addCount; public: virtual void run() { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); int32_t idUpto = 0; int32_t fullCount = 0; int64_t stopTime = MiscUtils::currentTimeMillis() + 500; while ((int64_t)MiscUtils::currentTimeMillis() < stopTime) { try { writer->updateDocument(newLucene(L"id", StringUtils::toString(idUpto++)), doc); ++addCount; } catch (IOException& e) { if (boost::starts_with(e.getError(), L"fake disk full at") || e.getError() == L"now failing on purpose") { LuceneThread::threadSleep(1); if (fullCount++ >= 5) break; } else { if (noErrors) BOOST_FAIL("Unexpected exception"); break; } } catch (...) { if (noErrors) BOOST_FAIL("Unexpected exception"); break; } } } }; DECLARE_SHARED_PTR(RunAddIndexesThreads) DECLARE_SHARED_PTR(RunAddThread) class RunAddIndexesThreads : public LuceneObject { public: RunAddIndexesThreads(int32_t numCopy); virtual ~RunAddIndexesThreads(); LUCENE_CLASS(RunAddIndexesThreads); public: DirectoryPtr dir; DirectoryPtr dir2; static const int32_t NUM_INIT_DOCS; static const int32_t NUM_THREADS; IndexWriterPtr writer2; bool didClose; Collection readers; int32_t NUM_COPY; Collection threads; ConcurrentMergeSchedulerPtr cms; public: void launchThreads(int32_t numIter); void joinThreads(); void close(bool doWait); void closeDir(); virtual void doBody(int32_t j, Collection dirs) = 0; virtual void handle(LuceneException& e) = 0; }; const int32_t RunAddIndexesThreads::NUM_INIT_DOCS = 17; const int32_t RunAddIndexesThreads::NUM_THREADS = 5; class RunAddThread : public LuceneThread { public: RunAddThread(RunAddIndexesThreadsPtr runAdd, int32_t numIter, int32_t numCopy, DirectoryPtr dir) { this->_runAdd = runAdd; this->numIter = numIter; this->numCopy = numCopy; this->dir = dir; } virtual ~RunAddThread() { } protected: RunAddIndexesThreadsWeakPtr _runAdd; int32_t numIter; int32_t numCopy; DirectoryPtr dir; public: virtual void run() { try { Collection dirs = Collection::newInstance(numCopy); for (int32_t k = 0; k < numCopy; ++k) dirs[k] = newLucene(dir); int32_t j = 0; while (true) { if (numIter > 0 && j == numIter) break; RunAddIndexesThreadsPtr(_runAdd)->doBody(j++, dirs); } } catch (LuceneException& e) { RunAddIndexesThreadsPtr(_runAdd)->handle(e); } } }; RunAddIndexesThreads::RunAddIndexesThreads(int32_t numCopy) { threads = Collection::newInstance(NUM_THREADS); didClose = false; NUM_COPY = numCopy; dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < NUM_INIT_DOCS; ++i) addDoc(writer); writer->close(); dir2 = newLucene(); writer2 = newLucene(dir2, newLucene(), IndexWriter::MaxFieldLengthLIMITED); cms = boost::dynamic_pointer_cast(writer2->getMergeScheduler()); readers = Collection::newInstance(NUM_COPY); for (int32_t i = 0; i < NUM_COPY; ++i) readers[i] = IndexReader::open(dir, true); } RunAddIndexesThreads::~RunAddIndexesThreads() { } void RunAddIndexesThreads::launchThreads(int32_t numIter) { for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i] = newLucene(shared_from_this(), numIter, NUM_COPY, dir); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); } void RunAddIndexesThreads::joinThreads() { for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); } void RunAddIndexesThreads::close(bool doWait) { didClose = true; writer2->close(doWait); } void RunAddIndexesThreads::closeDir() { for (int32_t i = 0; i < NUM_COPY; ++i) readers[i]->close(); dir2->close(); } BOOST_AUTO_TEST_CASE(testDocCount) { DirectoryPtr dir = newLucene(); IndexWriter::setDefaultWriteLockTimeout(2000); BOOST_CHECK_EQUAL(2000, IndexWriter::getDefaultWriteLockTimeout()); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); IndexWriter::setDefaultWriteLockTimeout(1000); // add 100 documents for (int32_t i = 0; i < 100; ++i) addDoc(writer); BOOST_CHECK_EQUAL(100, writer->maxDoc()); writer->close(); // delete 40 documents IndexReaderPtr reader = IndexReader::open(dir, false); for (int32_t i = 0; i < 40; ++i) reader->deleteDocument(i); reader->close(); // test doc count before segments are merged/index is optimized writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); BOOST_CHECK_EQUAL(100, writer->maxDoc()); writer->close(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(100, reader->maxDoc()); BOOST_CHECK_EQUAL(60, reader->numDocs()); reader->close(); // optimize the index and check that the new doc count is correct writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); BOOST_CHECK_EQUAL(100, writer->maxDoc()); BOOST_CHECK_EQUAL(60, writer->numDocs()); writer->optimize(); BOOST_CHECK_EQUAL(60, writer->maxDoc()); BOOST_CHECK_EQUAL(60, writer->numDocs()); writer->close(); // check that the index reader gives the same numbers reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(60, reader->maxDoc()); BOOST_CHECK_EQUAL(60, reader->numDocs()); reader->close(); // make sure opening a new index for create over this existing one works correctly writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); BOOST_CHECK_EQUAL(0, writer->maxDoc()); BOOST_CHECK_EQUAL(0, writer->numDocs()); writer->close(); } /// Test: make sure when we run out of disk space or hit random IOExceptions in any of the addIndexesNoOptimize(*) calls /// that 1) index is not corrupt (searcher can open/search it) and 2) transactional semantics are followed: /// either all or none of the incoming documents were in fact added. BOOST_AUTO_TEST_CASE(testAddIndexOnDiskFull) { int32_t START_COUNT = 57; int32_t NUM_DIR = 50; int32_t END_COUNT = START_COUNT + NUM_DIR * 25; // Build up a bunch of dirs that have indexes which we will then merge together by calling addIndexesNoOptimize(*) Collection dirs = Collection::newInstance(NUM_DIR); int64_t inputDiskUsage = 0; for (int32_t i = 0; i < NUM_DIR; ++i) { dirs[i] = newLucene(); IndexWriterPtr writer = newLucene(dirs[i], newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t j = 0; j < 25; ++j) addDocWithIndex(writer, 25 * i + j); writer->close(); HashSet files = dirs[i]->listAll(); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) inputDiskUsage += dirs[i]->fileLength(*file); } // Now, build a starting index that has START_COUNT docs. We will then try to addIndexesNoOptimize into a copy of this RAMDirectoryPtr startDir = newLucene(); IndexWriterPtr writer = newLucene(startDir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t j = 0; j < START_COUNT; ++j) addDocWithIndex(writer, j); writer->close(); // Make sure starting index seems to be working properly TermPtr searchTerm = newLucene(L"content", L"aaa"); IndexReaderPtr reader = IndexReader::open(startDir, true); BOOST_CHECK_EQUAL(57, reader->docFreq(searchTerm)); IndexSearcherPtr searcher = newLucene(reader); Collection hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(57, hits.size()); searcher->close(); reader->close(); // Iterate with larger and larger amounts of free disk space. With little free disk space, // addIndexesNoOptimize will certainly run out of space and fail. Verify that when this // happens, index is not corrupt and index in fact has added no documents. Then, we increase // disk space by 2000 bytes each iteration. At some point there is enough free disk space // and addIndexesNoOptimize should succeed and index should show all documents were added. int64_t diskUsage = startDir->sizeInBytes(); int64_t startDiskUsage = 0; HashSet files = startDir->listAll(); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) startDiskUsage += startDir->fileLength(*file); for (int32_t iter = 0; iter < 3; ++iter) { BOOST_TEST_MESSAGE("TEST: iter=" << iter); // Start with 100 bytes more than we are currently using: int64_t diskFree = diskUsage + 100; int32_t method = iter; bool success = false; bool done = false; String methodName; if (method == 0) methodName = L"addIndexes(Directory[]) + optimize()"; else if (method == 1) methodName = L"addIndexes(IndexReader[])"; else methodName = L"addIndexesNoOptimize(Directory[])"; while (!done) { // Make a new dir that will enforce disk usage MockRAMDirectoryPtr dir = newLucene(startDir); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthUNLIMITED); MergeSchedulerPtr ms = writer->getMergeScheduler(); for (int32_t x = 0; x < 2; ++x) { if (MiscUtils::typeOf(ms)) { // This test intentionally produces exceptions in the threads that CMS launches; we don't // want to pollute test output with these. if (x == 0) boost::dynamic_pointer_cast(ms)->setSuppressExceptions(); else boost::dynamic_pointer_cast(ms)->clearSuppressExceptions(); } // Two loops: first time, limit disk space and throw random IOExceptions; second time, no disk space limit double rate = 0.05; double diskRatio = (double)diskFree / (double)diskUsage; int64_t thisDiskFree = 0; String testName; if (x == 0) { thisDiskFree = diskFree; if (diskRatio >= 2.0) rate /= 2; if (diskRatio >= 4.0) rate /= 2; if (diskRatio >= 6.0) rate = 0.0; testName = L"disk full test " + methodName + L" with disk full at " + StringUtils::toString(diskFree) + L" bytes"; } else { thisDiskFree = 0; rate = 0.0; testName = L"disk full test " + methodName + L" with unlimited disk space"; } BOOST_TEST_MESSAGE("\ncycle: " << testName); dir->setMaxSizeInBytes(thisDiskFree); dir->setRandomIOExceptionRate(rate, diskFree); try { if (method == 0) { writer->addIndexesNoOptimize(dirs); writer->optimize(); } else if (method == 1) { Collection readers = Collection::newInstance(dirs.size()); for (int32_t i = 0; i < dirs.size(); ++i) readers[i] = IndexReader::open(dirs[i], true); LuceneException finally; try { writer->addIndexes(readers); } catch (LuceneException& e) { finally = e; } for (int32_t i = 0; i < dirs.size(); ++i) readers[i]->close(); finally.throwException(); } else writer->addIndexesNoOptimize(dirs); success = true; BOOST_TEST_MESSAGE(" success!"); if (x == 0) done = true; } catch (IOException& e) { success = false; BOOST_TEST_MESSAGE(" hit IOException: " << e.getError()); if (x == 1) BOOST_FAIL(methodName << " hit IOException after disk space was freed up"); } // Make sure all threads from ConcurrentMergeScheduler are done syncConcurrentMerges(writer); BOOST_TEST_MESSAGE(" now test readers"); // Finally, verify index is not corrupt, and, if we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added (transactional semantics) try { reader = IndexReader::open(dir, true); } catch (IOException& e) { BOOST_FAIL(testName << ": exception when creating IndexReader: " << e.getError()); } int32_t result = reader->docFreq(searchTerm); if (success) { if (result != START_COUNT) BOOST_FAIL(testName << ": method did not throw exception but docFreq('aaa') is " << result << " instead of expected " << START_COUNT); } else { // On hitting exception we still may have added all docs if (result != START_COUNT && result != END_COUNT) BOOST_FAIL(testName << ": method did throw exception but docFreq('aaa') is " << result << " instead of expected " << START_COUNT << " or " << END_COUNT); } searcher = newLucene(reader); try { hits = searcher->search(newLucene(searchTerm), FilterPtr(), END_COUNT)->scoreDocs; } catch (IOException& e) { BOOST_FAIL(testName << ": exception when searching: " << e.getError()); } int32_t result2 = hits.size(); if (success) { if (result2 != result) BOOST_FAIL(testName << ": method did not throw exception but hits.length for search on term 'aaa' is " << result2 << " instead of expected " << result); } else { // On hitting exception we still may have added all docs if (result2 != result) BOOST_FAIL(testName << ": method did throw exception but hits.length for search on term 'aaa' is " << result2 << " instead of expected " << result); } searcher->close(); reader->close(); BOOST_TEST_MESSAGE(" count is " << result); if (done || result == END_COUNT) break; } BOOST_TEST_MESSAGE(" start disk = " << startDiskUsage << "; input disk = " << inputDiskUsage << "; max used = " << dir->getMaxUsedSizeInBytes()); if (done) { // Make sure that temp free Directory space required is at most 3X total input size of indices BOOST_CHECK((dir->getMaxUsedSizeInBytes() - startDiskUsage) < 3 * (startDiskUsage + inputDiskUsage)); } // Make sure we don't hit disk full during close below dir->setMaxSizeInBytes(0); dir->setRandomIOExceptionRate(0.0, 0); writer->close(); // Wait for all BG threads to finish else dir->close() will throw IOException because there are still open files syncConcurrentMerges(ms); dir->close(); // Try again with 2000 more bytes of free space diskFree += 2000; } } startDir->close(); } /// Make sure IndexWriter cleans up on hitting a disk full exception in addDocument. BOOST_AUTO_TEST_CASE(testAddDocumentOnDiskFull) { for (int32_t pass = 0; pass < 2; ++pass) { BOOST_TEST_MESSAGE("TEST: pass=" << pass); bool doAbort = (pass == 1); int64_t diskFree = 200; while (true) { BOOST_TEST_MESSAGE("TEST: cycle: diskFree=" << diskFree); MockRAMDirectoryPtr dir = newLucene(); dir->setMaxSizeInBytes(diskFree); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); MergeSchedulerPtr ms = writer->getMergeScheduler(); if (MiscUtils::typeOf(ms)) boost::dynamic_pointer_cast(ms)->setSuppressExceptions(); bool hitError = false; try { for (int32_t i = 0; i < 200; ++i) addDoc(writer); } catch (IOException&) { BOOST_TEST_MESSAGE("TEST: exception on addDoc"); hitError = true; } if (hitError) { if (doAbort) writer->rollback(); else { try { writer->close(); } catch (IOException&) { BOOST_TEST_MESSAGE("TEST: exception on close"); dir->setMaxSizeInBytes(0); writer->close(); } } syncConcurrentMerges(ms); checkNoUnreferencedFiles(dir); // Make sure reader can open the index IndexReader::open(dir, true)->close(); dir->close(); // Now try again with more space diskFree += 500; } else { syncConcurrentMerges(writer); dir->close(); break; } } } } /// Make sure we skip wicked long terms. BOOST_AUTO_TEST_CASE(testWickedLongTerm) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); String bigTerm(DocumentsWriter::CHAR_BLOCK_SIZE - 1, L'x'); DocumentPtr doc = newLucene(); // Max length term is 16383, so this contents produces a too-long term String contents = L"abc xyz x" + bigTerm + L" another term"; doc->add(newLucene(L"content", contents, Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); // Make sure we can add another normal document doc = newLucene(); doc->add(newLucene(L"content", L"abc bbb ccc", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); // Make sure all terms < max size were indexed BOOST_CHECK_EQUAL(2, reader->docFreq(newLucene(L"content", L"abc"))); BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"content", L"bbb"))); BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"content", L"term"))); BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"content", L"another"))); // Make sure position is still incremented when massive term is skipped TermPositionsPtr tps = reader->termPositions(newLucene(L"content", L"another")); BOOST_CHECK(tps->next()); BOOST_CHECK_EQUAL(1, tps->freq()); BOOST_CHECK_EQUAL(3, tps->nextPosition()); // Make sure the doc that has the massive term is in the index BOOST_CHECK_EQUAL(2, reader->numDocs()); reader->close(); // Make sure we can add a document with exactly the maximum length term, and search on that term doc = newLucene(); doc->add(newLucene(L"content", bigTerm, Field::STORE_NO, Field::INDEX_ANALYZED)); StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); sa->setMaxTokenLength(100000); writer = newLucene(dir, sa, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc); writer->close(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"content", bigTerm))); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testOptimizeMaxNumSegments) { MockRAMDirectoryPtr dir = newLucene(); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED)); for (int32_t numDocs = 38; numDocs < 500; numDocs += 38) { IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); LogDocMergePolicyPtr ldmp = newLucene(writer); ldmp->setMinMergeDocs(1); writer->setMergePolicy(ldmp); writer->setMergeFactor(5); writer->setMaxBufferedDocs(2); for (int32_t j = 0; j addDocument(doc); writer->close(); SegmentInfosPtr sis = newLucene(); sis->read(dir); int32_t segCount = sis->size(); writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMergePolicy(ldmp); writer->setMergeFactor(5); writer->optimize((int32_t)3); writer->close(); sis = newLucene(); sis->read(dir); int32_t optSegCount = sis->size(); if (segCount < 3) BOOST_CHECK_EQUAL(segCount, optSegCount); else BOOST_CHECK_EQUAL(3, optSegCount); } } BOOST_AUTO_TEST_CASE(testOptimizeMaxNumSegments2) { MockRAMDirectoryPtr dir = newLucene(); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED)); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); LogDocMergePolicyPtr ldmp = newLucene(writer); ldmp->setMinMergeDocs(1); writer->setMergePolicy(ldmp); writer->setMergeFactor(4); writer->setMaxBufferedDocs(2); for (int32_t iter = 0; iter < 10; ++iter) { for (int32_t i = 0; i < 19; ++i) writer->addDocument(doc); writer->commit(); writer->waitForMerges(); writer->commit(); SegmentInfosPtr sis = newLucene(); sis->read(dir); int32_t segCount = sis->size(); writer->optimize((int32_t)7); writer->commit(); sis = newLucene(); boost::dynamic_pointer_cast(writer->getMergeScheduler())->sync(); sis->read(dir); int32_t optSegCount = sis->size(); if (segCount < 7) BOOST_CHECK_EQUAL(segCount, optSegCount); else BOOST_CHECK_EQUAL(7, optSegCount); } } /// Make sure optimize doesn't use any more than 1X starting index size as its temporary free space required. BOOST_AUTO_TEST_CASE(testOptimizeTempSpaceUsage) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t j = 0; j < 500; ++j) addDocWithIndex(writer, j); // force one extra segment w/ different doc store so we see the doc stores get merged writer->commit(); addDocWithIndex(writer, 500); writer->close(); int64_t startDiskUsage = 0; HashSet files = dir->listAll(); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) startDiskUsage += dir->fileLength(*file); dir->resetMaxUsedSizeInBytes(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); writer->close(); int64_t maxDiskUsage = dir->getMaxUsedSizeInBytes(); BOOST_CHECK(maxDiskUsage <= 4 * startDiskUsage); dir->close(); } /// Make sure we can open an index for create even when a reader holds it open (this fails pre lock-less commits on windows) BOOST_AUTO_TEST_CASE(testCreateWithReader) { String indexDir(FileUtils::joinPath(getTempDir(), L"lucenetestindexwriter")); LuceneException finally; try { DirectoryPtr dir = FSDirectory::open(indexDir); // add one document and close writer IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(writer); writer->close(); // now open reader IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(reader->numDocs(), 1); // now open index for create writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); BOOST_CHECK_EQUAL(writer->maxDoc(), 0); addDoc(writer); writer->close(); BOOST_CHECK_EQUAL(reader->numDocs(), 1); IndexReaderPtr reader2 = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(reader2->numDocs(), 1); reader->close(); reader2->close(); } catch (LuceneException& e) { finally = e; } FileUtils::removeDirectory(indexDir); if (!finally.isNull()) BOOST_FAIL(finally.getError()); } /// Simulate a writer that crashed while writing segments file: make sure we can still open the index (ie, /// gracefully fallback to the previous segments file), and that we can add to the index BOOST_AUTO_TEST_CASE(testSimulatedCrashedWriter) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // add 100 documents for (int32_t i = 0; i < 100; ++i) addDoc(writer); writer->close(); int64_t gen = SegmentInfos::getCurrentSegmentGeneration(dir); BOOST_CHECK(gen > 1); // Make the next segments file, with last byte missing, to simulate a writer that crashed while // writing segments file String fileNameIn = SegmentInfos::getCurrentSegmentFileName(dir); String fileNameOut = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", 1 + gen); IndexInputPtr in = dir->openInput(fileNameIn); IndexOutputPtr out = dir->createOutput(fileNameOut); int64_t length = in->length(); for (int32_t i = 0; i < length - 1; ++i) out->writeByte(in->readByte()); in->close(); out->close(); IndexReaderPtr reader; BOOST_CHECK_NO_THROW(reader = IndexReader::open(dir, true)); reader->close(); BOOST_CHECK_NO_THROW(writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED)); // add 100 documents for (int32_t i = 0; i < 100; ++i) addDoc(writer); writer->close(); } /// Simulate a corrupt index by removing last byte of latest segments file and make sure we get an /// IOException trying to open the index BOOST_AUTO_TEST_CASE(testSimulatedCorruptIndex1) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // add 100 documents for (int32_t i = 0; i < 100; ++i) addDoc(writer); writer->close(); int64_t gen = SegmentInfos::getCurrentSegmentGeneration(dir); BOOST_CHECK(gen > 1); String fileNameIn = SegmentInfos::getCurrentSegmentFileName(dir); String fileNameOut = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", 1 + gen); IndexInputPtr in = dir->openInput(fileNameIn); IndexOutputPtr out = dir->createOutput(fileNameOut); int64_t length = in->length(); for (int32_t i = 0; i < length - 1; ++i) out->writeByte(in->readByte()); in->close(); out->close(); dir->deleteFile(fileNameIn); IndexReaderPtr reader; BOOST_CHECK_EXCEPTION(reader = IndexReader::open(dir, true), IOException, check_exception(LuceneException::IO)); if (reader) reader->close(); } BOOST_AUTO_TEST_CASE(testChangesAfterClose) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(writer); writer->close(); BOOST_CHECK_EXCEPTION(addDoc(writer), AlreadyClosedException, check_exception(LuceneException::AlreadyClosed)); } /// Simulate a corrupt index by removing one of the cfs files and make sure we get an IOException trying to open the index BOOST_AUTO_TEST_CASE(testSimulatedCorruptIndex2) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // add 100 documents for (int32_t i = 0; i < 100; ++i) addDoc(writer); writer->close(); int64_t gen = SegmentInfos::getCurrentSegmentGeneration(dir); BOOST_CHECK(gen > 1); HashSet files = dir->listAll(); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { if (boost::ends_with(*file, L".cfs")) { dir->deleteFile(*file); break; } } IndexReaderPtr reader; BOOST_CHECK_EXCEPTION(reader = IndexReader::open(dir, true), FileNotFoundException, check_exception(LuceneException::FileNotFound)); if (reader) reader->close(); } /// Simple test for "commit on close": open writer then add a bunch of docs, making sure reader does /// not see these docs until writer is closed. BOOST_AUTO_TEST_CASE(testCommitOnClose) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 14; ++i) addDoc(writer); writer->close(); TermPtr searchTerm = newLucene(L"content", L"aaa"); IndexSearcherPtr searcher = newLucene(dir, false); Collection hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(14, hits.size()); searcher->close(); IndexReaderPtr reader = IndexReader::open(dir, true); writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 3; ++i) { for (int32_t j = 0; j < 11; ++j) addDoc(writer); searcher = newLucene(dir, false); hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(14, hits.size()); searcher->close(); BOOST_CHECK(reader->isCurrent()); } // Now, close the writer writer->close(); BOOST_CHECK(!reader->isCurrent()); searcher = newLucene(dir, false); hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(47, hits.size()); searcher->close(); } BOOST_AUTO_TEST_CASE(testCommitOnCloseAbort) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); for (int32_t i = 0; i < 14; ++i) addDoc(writer); writer->close(); TermPtr searchTerm = newLucene(L"content", L"aaa"); IndexSearcherPtr searcher = newLucene(dir, false); Collection hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(14, hits.size()); searcher->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); for (int32_t i = 0; i < 17; ++i) addDoc(writer); // Delete all docs writer->deleteDocuments(searchTerm); searcher = newLucene(dir, false); hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(14, hits.size()); searcher->close(); // Now, close the writer writer->rollback(); checkNoUnreferencedFiles(dir); searcher = newLucene(dir, false); hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(14, hits.size()); searcher->close(); // Now make sure we can re-open the index, add docs, and all is good writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); // On abort, writer in fact may write to the same segments_N file dir->setPreventDoubleWrite(false); for (int32_t i = 0; i < 12; ++i) { for (int32_t j = 0; j < 17; ++j) addDoc(writer); searcher = newLucene(dir, false); hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(14, hits.size()); searcher->close(); } writer->close(); searcher = newLucene(dir, false); hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(218, hits.size()); searcher->close(); dir->close(); } /// Verify that a writer with "commit on close" indeed cleans up the temp segments created after opening /// that are not referenced by the starting segments file. We check this by using MockRAMDirectory to /// measure max temp disk space used. BOOST_AUTO_TEST_CASE(testCommitOnCloseDiskUsage) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); for (int32_t j = 0; j < 30; ++j) addDocWithIndex(writer, j); writer->close(); dir->resetMaxUsedSizeInBytes(); int64_t startDiskUsage = dir->getMaxUsedSizeInBytes(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); writer->setMergeScheduler(newLucene()); for (int32_t j = 0; j < 1470; ++j) addDocWithIndex(writer, j); int64_t midDiskUsage = dir->getMaxUsedSizeInBytes(); dir->resetMaxUsedSizeInBytes(); writer->optimize(); writer->close(); IndexReader::open(dir, true)->close(); int64_t endDiskUsage = dir->getMaxUsedSizeInBytes(); // Ending index is 50X as large as starting index; due to 3X disk usage normally we allow 150X max // transient usage. If something is wrong with deleter and it doesn't delete intermediate segments // then it will exceed this 150X BOOST_CHECK(midDiskUsage < 150 * startDiskUsage); BOOST_CHECK(endDiskUsage < 150 * startDiskUsage); } /// Verify that calling optimize when writer is open for "commit on close" works correctly both for /// rollback() and close(). BOOST_AUTO_TEST_CASE(testCommitOnCloseOptimize) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); for (int32_t j = 0; j < 17; ++j) addDocWithIndex(writer, j); writer->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); // Open a reader before closing (commiting) the writer IndexReaderPtr reader = IndexReader::open(dir, true); // Reader should see index as unoptimized at this point BOOST_CHECK(!reader->isOptimized()); reader->close(); // Abort the writer writer->rollback(); checkNoUnreferencedFiles(dir); // Open a reader after aborting writer reader = IndexReader::open(dir, true); // Reader should still see index as unoptimized BOOST_CHECK(!reader->isOptimized()); reader->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); writer->close(); checkNoUnreferencedFiles(dir); // Open a reader after aborting writer reader = IndexReader::open(dir, true); // Reader should still see index as unoptimized: BOOST_CHECK(reader->isOptimized()); reader->close(); } BOOST_AUTO_TEST_CASE(testIndexNoDocuments) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->commit(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader->maxDoc()); BOOST_CHECK_EQUAL(0, reader->numDocs()); reader->close(); writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->commit(); writer->close(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader->maxDoc()); BOOST_CHECK_EQUAL(0, reader->numDocs()); reader->close(); } BOOST_AUTO_TEST_CASE(testManyFields) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); for (int32_t j = 0; j < 100; ++j) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"a" + StringUtils::toString(j), L"aaa" + StringUtils::toString(j), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"b" + StringUtils::toString(j), L"aaa" + StringUtils::toString(j), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"c" + StringUtils::toString(j), L"aaa" + StringUtils::toString(j), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"d" + StringUtils::toString(j), L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"e" + StringUtils::toString(j), L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"f" + StringUtils::toString(j), L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(100, reader->maxDoc()); BOOST_CHECK_EQUAL(100, reader->numDocs()); for (int32_t j = 0; j < 100; ++j) { BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"a" + StringUtils::toString(j), L"aaa" + StringUtils::toString(j)))); BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"b" + StringUtils::toString(j), L"aaa" + StringUtils::toString(j)))); BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"c" + StringUtils::toString(j), L"aaa" + StringUtils::toString(j)))); BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"d" + StringUtils::toString(j), L"aaa"))); BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"e" + StringUtils::toString(j), L"aaa"))); BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"f" + StringUtils::toString(j), L"aaa"))); } reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testSmallRAMBuffer) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setRAMBufferSizeMB(0.000001); int32_t lastNumFile = dir->listAll().size(); for (int32_t j = 0; j < 9; ++j) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"aaa" + StringUtils::toString(j), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); int32_t numFile = dir->listAll().size(); // Verify that with a tiny RAM buffer we see new segment after every doc BOOST_CHECK(numFile > lastNumFile); lastNumFile = numFile; } writer->close(); dir->close(); } /// Make sure it's OK to change RAM buffer size and maxBufferedDocs in a write session BOOST_AUTO_TEST_CASE(testChangingRAMBuffer) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); int32_t lastFlushCount = -1; for (int32_t j = 1; j < 52; ++j) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"aaa" + StringUtils::toString(j), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); syncConcurrentMerges(writer); int32_t flushCount = writer->getFlushCount(); if (j == 1) lastFlushCount = flushCount; else if (j < 10) { // No new files should be created BOOST_CHECK_EQUAL(flushCount, lastFlushCount); } else if (j == 10) { BOOST_CHECK(flushCount > lastFlushCount); lastFlushCount = flushCount; writer->setRAMBufferSizeMB(0.000001); writer->setMaxBufferedDocs(IndexWriter::DISABLE_AUTO_FLUSH); } else if (j < 20) { BOOST_CHECK(flushCount > lastFlushCount); lastFlushCount = flushCount; } else if (j == 20) { writer->setRAMBufferSizeMB(16); writer->setMaxBufferedDocs(IndexWriter::DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 30) BOOST_CHECK_EQUAL(flushCount, lastFlushCount); else if (j == 30) { writer->setRAMBufferSizeMB(0.000001); writer->setMaxBufferedDocs(IndexWriter::DISABLE_AUTO_FLUSH); } else if (j < 40) { BOOST_CHECK(flushCount> lastFlushCount); lastFlushCount = flushCount; } else if (j == 40) { writer->setMaxBufferedDocs(10); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 50) { BOOST_CHECK_EQUAL(flushCount, lastFlushCount); writer->setMaxBufferedDocs(10); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); } else if (j == 50) BOOST_CHECK(flushCount > lastFlushCount); } writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testChangingRAMBuffer2) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); writer->setMaxBufferedDeleteTerms(10); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); for (int32_t j = 1; j < 52; ++j) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"aaa" + StringUtils::toString(j), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } int32_t lastFlushCount = -1; for (int32_t j = 1; j < 52; ++j) { writer->deleteDocuments(newLucene(L"field", L"aaa" + StringUtils::toString(j))); syncConcurrentMerges(writer); int32_t flushCount = writer->getFlushCount(); if (j == 1) lastFlushCount = flushCount; else if (j < 10) { // No new files should be created BOOST_CHECK_EQUAL(flushCount, lastFlushCount); } else if (j == 10) { BOOST_CHECK(flushCount > lastFlushCount); lastFlushCount = flushCount; writer->setRAMBufferSizeMB(0.000001); writer->setMaxBufferedDeleteTerms(1); } else if (j < 20) { BOOST_CHECK(flushCount > lastFlushCount); lastFlushCount = flushCount; } else if (j == 20) { writer->setRAMBufferSizeMB(16); writer->setMaxBufferedDeleteTerms(IndexWriter::DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 30) BOOST_CHECK_EQUAL(flushCount, lastFlushCount); else if (j == 30) { writer->setRAMBufferSizeMB(0.000001); writer->setMaxBufferedDeleteTerms(IndexWriter::DISABLE_AUTO_FLUSH); writer->setMaxBufferedDeleteTerms(1); } else if (j < 40) { BOOST_CHECK(flushCount> lastFlushCount); lastFlushCount = flushCount; } else if (j == 40) { writer->setMaxBufferedDeleteTerms(10); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 50) { BOOST_CHECK_EQUAL(flushCount, lastFlushCount); writer->setMaxBufferedDeleteTerms(10); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); } else if (j == 50) BOOST_CHECK(flushCount > lastFlushCount); } writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testDiverseDocs) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setRAMBufferSizeMB(0.5); RandomPtr rand = newLucene(); for (int32_t i = 0; i < 3; ++i) { // First, docs where every term is unique (heavy on Posting instances) for (int32_t j = 0; j < 100; ++j) { DocumentPtr doc = newLucene(); for (int32_t k = 0; k < 100; ++k) doc->add(newLucene(L"field", StringUtils::toString(rand->nextInt()), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } // Next, many single term docs where only one term occurs (heavy on byte blocks) for (int32_t j = 0; j < 100; ++j) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } // Next, many single term docs where only one term occurs but the terms are very long (heavy on char[] arrays) for (int32_t j = 0; j < 100; ++j) { StringStream buffer; for (int32_t k = 0; k < 1000; ++k) buffer << j << L"."; String longTerm = buffer.str(); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", longTerm, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } } writer->close(); IndexSearcherPtr searcher = newLucene(dir, false); Collection hits = searcher->search(newLucene(newLucene(L"field", L"aaa")), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(300, hits.size()); searcher->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEnablingNorms) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); // Enable norms for only 1 doc, pre flush for (int32_t j = 0; j < 10; ++j) { DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"field", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED); if (j != 8) f->setOmitNorms(true); doc->add(f); writer->addDocument(doc); } writer->close(); TermPtr searchTerm = newLucene(L"field", L"aaa"); IndexSearcherPtr searcher = newLucene(dir, false); Collection hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(10, hits.size()); searcher->close(); writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); // Enable norms for only 1 doc, post flush for (int32_t j = 0; j < 27; ++j) { DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"field", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED); if (j != 26) f->setOmitNorms(true); doc->add(f); writer->addDocument(doc); } writer->close(); searcher = newLucene(dir, false); hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(27, hits.size()); searcher->close(); IndexReaderPtr reader = IndexReader::open(dir, true); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testHighFreqTerm) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, 100000000); writer->setRAMBufferSizeMB(0.01); // Massive doc that has 128 K a's StringStream buffer; for (int32_t i = 0; i < 4096; ++i) { buffer << L" a a a a a a a a"; buffer << L" a a a a a a a a"; buffer << L" a a a a a a a a"; buffer << L" a a a a a a a a"; } DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", buffer.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(1, reader->maxDoc()); BOOST_CHECK_EQUAL(1, reader->numDocs()); TermPtr t = newLucene(L"field", L"a"); BOOST_CHECK_EQUAL(1, reader->docFreq(t)); TermDocsPtr td = reader->termDocs(t); td->next(); BOOST_CHECK_EQUAL(128 * 1024, td->freq()); reader->close(); dir->close(); } namespace TestNullLockFactory { class MyRAMDirectory : public RAMDirectory { public: MyRAMDirectory() { lockFactory.reset(); myLockFactory = newLucene(); } virtual ~MyRAMDirectory() { } LUCENE_CLASS(MyRAMDirectory); protected: LockFactoryPtr myLockFactory; public: virtual LockPtr makeLock(const String& name) { return myLockFactory->makeLock(name); } }; } /// Make sure that a Directory implementation that does not use LockFactory at all (ie overrides makeLock and /// implements its own private locking) works OK. This was raised on java-dev as loss of backwards compatibility. BOOST_AUTO_TEST_CASE(testNullLockFactory) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 100; ++i) addDoc(writer); writer->close(); TermPtr searchTerm = newLucene(L"content", L"aaa"); IndexSearcherPtr searcher = newLucene(dir, false); Collection hits = searcher->search(newLucene(searchTerm), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(100, hits.size()); writer->close(); writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->close(); dir->close(); } namespace TestFlushWithNoMerging { DECLARE_SHARED_PTR(TestableIndexWriter) class TestableIndexWriter : public IndexWriter { public: TestableIndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(d, a, create, mfl) { } virtual ~TestableIndexWriter() { } LUCENE_CLASS(TestableIndexWriter); public: using IndexWriter::flush; }; } BOOST_AUTO_TEST_CASE(testFlushWithNoMerging) { DirectoryPtr dir = newLucene(); TestFlushWithNoMerging::TestableIndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); for (int32_t i = 0; i < 19; ++i) writer->addDocument(doc); writer->flush(false, true, true); writer->close(); SegmentInfosPtr sis = newLucene(); sis->read(dir); // Since we flushed without allowing merging we should now have 10 segments BOOST_CHECK_EQUAL(sis->size(), 10); } /// Make sure we can flush segment with norms, then add empty doc (no norms) and flush BOOST_AUTO_TEST_CASE(testEmptyDocAfterFlushingRealDoc) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); writer->commit(); writer->addDocument(newLucene()); writer->close(); checkIndex(dir); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(2, reader->numDocs()); } /// Test calling optimize(false) whereby optimize is kicked off but we don't wait for it to finish (but /// writer.close()) does wait BOOST_AUTO_TEST_CASE(testBackgroundOptimize) { DirectoryPtr dir = newLucene(); for (int32_t pass = 0; pass < 2; ++pass) { IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergeScheduler(newLucene()); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->setMaxBufferedDocs(2); writer->setMergeFactor(101); for (int32_t i = 0; i < 200; ++i) writer->addDocument(doc); writer->optimize(false); if (pass == 0) { writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK(reader->isOptimized()); reader->close(); } else { // Get another segment to flush so we can verify it is NOT included in the optimization writer->addDocument(doc); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK(!reader->isOptimized()); reader->close(); SegmentInfosPtr infos = newLucene(); infos->read(dir); BOOST_CHECK_EQUAL(2, infos->size()); } } dir->close(); // allow time for merge threads to finish LuceneThread::threadSleep(1000); } /// Test that no NullPointerException will be raised, when adding one document with a single, empty /// field and term vectors enabled. BOOST_AUTO_TEST_CASE(testBadSegment) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); BOOST_CHECK_NO_THROW(doc->add(newLucene(L"tvtest", L"", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES))); writer->addDocument(doc); writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testNoTermVectorAfterTermVector) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"tvtest", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"tvtest", L"x y z", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO)); writer->addDocument(doc); // Make first segment writer->commit(); doc->add(newLucene(L"tvtest", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); writer->addDocument(doc); // Make 2nd segment writer->commit(); writer->optimize(); writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testNoTermVectorAfterTermVectorMerge) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"tvtest", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); writer->addDocument(doc); writer->commit(); doc = newLucene(); doc->add(newLucene(L"tvtest", L"x y z", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO)); writer->addDocument(doc); // Make first segment writer->commit(); writer->optimize(); doc->add(newLucene(L"tvtest", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); writer->addDocument(doc); // Make 2nd segment writer->commit(); writer->optimize(); writer->close(); dir->close(); } namespace TestMaxThreadPriority { // Just intercepts all merges & verifies that we are never merging a segment with >= 20 (maxMergeDocs) docs class MyMergeScheduler : public MergeScheduler { public: virtual ~MyMergeScheduler() { } LUCENE_CLASS(MyMergeScheduler); public: virtual void merge(IndexWriterPtr writer) { while (true) { OneMergePtr merge = writer->getNextMerge(); if (!merge) break; for (int32_t i = 0; i < merge->segments->size(); ++i) BOOST_CHECK(merge->segments->info(i)->docCount < 20); writer->merge(merge); } } virtual void close() { } }; } BOOST_AUTO_TEST_CASE(testMaxThreadPriority) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergeScheduler(newLucene()); writer->setMaxMergeDocs(20); writer->setMaxBufferedDocs(2); writer->setMergeFactor(2); DocumentPtr doc = newLucene(); doc->add(newLucene(L"tvtest", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); for (int32_t i = 0; i < 177; ++i) writer->addDocument(doc); writer->close(); dir->close(); } namespace TestExceptionFromTokenStream { class ExceptionTokenFilter : public TokenFilter { public: ExceptionTokenFilter(TokenStreamPtr input) : TokenFilter(input) { count = 0; } virtual ~ExceptionTokenFilter() { } LUCENE_CLASS(ExceptionTokenFilter); protected: int32_t count; public: virtual bool incrementToken() { if (count++ == 5) boost::throw_exception(IOException(L"now failing on purpose")); return input->incrementToken(); } }; class ExceptionAnalyzer : public Analyzer { public: virtual ~ExceptionAnalyzer() { } LUCENE_CLASS(ExceptionAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, reader)); } }; } BOOST_AUTO_TEST_CASE(testExceptionFromTokenStream) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); String contents = L"aa bb cc dd ee ff gg hh ii jj kk"; doc->add(newLucene(L"content", contents, Field::STORE_NO, Field::INDEX_ANALYZED)); BOOST_CHECK_EXCEPTION(writer->addDocument(doc), IOException, check_exception(LuceneException::IO)); // Make sure we can add another normal document doc = newLucene(); doc->add(newLucene(L"content", L"aa bb cc dd", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); // Make sure we can add another normal document doc = newLucene(); doc->add(newLucene(L"content", L"aa bb cc dd", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermPtr t = newLucene(L"content", L"aa"); BOOST_CHECK_EQUAL(reader->docFreq(t), 3); // Make sure the doc that hit the exception was marked as deleted TermDocsPtr tdocs = reader->termDocs(t); int32_t count = 0; while (tdocs->next()) ++count; BOOST_CHECK_EQUAL(2, count); BOOST_CHECK_EQUAL(reader->docFreq(newLucene(L"content", L"gg")), 0); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testDocumentsWriterAbort) { MockRAMDirectoryPtr dir = newLucene(); FailOnlyOnFlushPtr failure = newLucene(); failure->setDoFail(); dir->failOn(failure); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); DocumentPtr doc = newLucene(); String contents = L"aa bb cc dd ee ff gg hh ii jj kk"; doc->add(newLucene(L"content", contents, Field::STORE_NO, Field::INDEX_ANALYZED)); bool hitError = false; for (int32_t i = 0; i < 200; ++i) { try { writer->addDocument(doc); } catch (IOException&) { // only one flush should fail BOOST_CHECK(!hitError); hitError = true; } } BOOST_CHECK(hitError); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(198, reader->docFreq(newLucene(L"content", L"aa"))); reader->close(); } namespace TestDocumentsWriterExceptions { class CrashAnalyzer : public Analyzer { public: virtual ~CrashAnalyzer() { } LUCENE_CLASS(CrashAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(fieldName, newLucene(reader)); } }; } BOOST_AUTO_TEST_CASE(testDocumentsWriterExceptions) { AnalyzerPtr analyzer = newLucene(); for (int32_t i = 0; i < 2; ++i) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, analyzer, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"contents", L"here are some contents", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); writer->addDocument(doc); doc->add(newLucene(L"crash", L"this should crash after 4 terms", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"other", L"this will not get indexed", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); BOOST_CHECK_EXCEPTION(writer->addDocument(doc), IOException, check_exception(LuceneException::IO)); if (i == 0) { doc = newLucene(); doc->add(newLucene(L"contents", L"here are some contents", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); writer->addDocument(doc); } writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); int32_t expected = 3 + (1 - i) * 2; BOOST_CHECK_EQUAL(expected, reader->docFreq(newLucene(L"contents", L"here"))); BOOST_CHECK_EQUAL(expected, reader->maxDoc()); int32_t numDel = 0; for (int32_t j = 0; j < reader->maxDoc(); ++j) { if (reader->isDeleted(j)) ++numDel; else { reader->document(j); reader->getTermFreqVectors(j); } } reader->close(); BOOST_CHECK_EQUAL(1, numDel); writer = newLucene(dir, analyzer, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); doc = newLucene(); doc->add(newLucene(L"contents", L"here are some contents", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); for (int32_t j = 0; j < 17; ++j) writer->addDocument(doc); writer->optimize(); writer->close(); reader = IndexReader::open(dir, true); expected = 19 + (1 - i) * 2; BOOST_CHECK_EQUAL(expected, reader->docFreq(newLucene(L"contents", L"here"))); BOOST_CHECK_EQUAL(expected, reader->maxDoc()); numDel = 0; for (int32_t j = 0; j < reader->maxDoc(); ++j) { if (reader->isDeleted(j)) ++numDel; else { reader->document(j); reader->getTermFreqVectors(j); } } reader->close(); BOOST_CHECK_EQUAL(0, numDel); dir->close(); } } namespace TestDocumentsWriterExceptionThreads { class CrashAnalyzer : public Analyzer { public: virtual ~CrashAnalyzer() { } LUCENE_CLASS(CrashAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(fieldName, newLucene(reader)); } }; class ExceptionThread : public LuceneThread { public: ExceptionThread(IndexWriterPtr writer, int32_t numIter, int32_t finalI) { this->writer = writer; this->numIter = numIter; this->finalI = finalI; } virtual ~ExceptionThread() { } LUCENE_CLASS(ExceptionThread); protected: IndexWriterPtr writer; int32_t numIter; int32_t finalI; public: virtual void run() { try { for (int32_t iter = 0; iter < numIter; ++iter) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"contents", L"here are some contents", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); writer->addDocument(doc); doc->add(newLucene(L"crash", L"this should crash after 4 terms", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"other", L"this will not get indexed", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); try { writer->addDocument(doc); BOOST_FAIL("did not hit expected exception"); } catch (IOException&) { } if (finalI == 0) { doc = newLucene(); doc->add(newLucene(L"contents", L"here are some contents", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); writer->addDocument(doc); } } } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; } BOOST_AUTO_TEST_CASE(testDocumentsWriterExceptionThreads) { AnalyzerPtr analyzer = newLucene(); int32_t NUM_THREAD = 3; int32_t NUM_ITER = 100; for (int32_t i = 0; i < 2; ++i) { MockRAMDirectoryPtr dir = newLucene(); { IndexWriterPtr writer = newLucene(dir, analyzer, IndexWriter::MaxFieldLengthLIMITED); int32_t finalI = i; Collection threads = Collection::newInstance(NUM_THREAD); for (int32_t t = 0; t < NUM_THREAD; ++t) { threads[t] = newLucene(writer, NUM_ITER, finalI); threads[t]->start(); } for (int32_t t = 0; t < NUM_THREAD; ++t) threads[t]->join(); writer->close(); } IndexReaderPtr reader = IndexReader::open(dir, true); int32_t expected = (3 + (1 - i) * 2) * NUM_THREAD * NUM_ITER; BOOST_CHECK_EQUAL(expected, reader->docFreq(newLucene(L"contents", L"here"))); BOOST_CHECK_EQUAL(expected, reader->maxDoc()); int32_t numDel = 0; for (int32_t j = 0; j < reader->maxDoc(); ++j) { if (reader->isDeleted(j)) ++numDel; else { reader->document(j); reader->getTermFreqVectors(j); } } reader->close(); BOOST_CHECK_EQUAL(NUM_THREAD * NUM_ITER, numDel); IndexWriterPtr writer = newLucene(dir, analyzer, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); DocumentPtr doc = newLucene(); doc->add(newLucene(L"contents", L"here are some contents", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); for (int32_t j = 0; j < 17; ++j) writer->addDocument(doc); writer->optimize(); writer->close(); reader = IndexReader::open(dir, true); expected += 17 - NUM_THREAD * NUM_ITER; BOOST_CHECK_EQUAL(expected, reader->docFreq(newLucene(L"contents", L"here"))); BOOST_CHECK_EQUAL(expected, reader->maxDoc()); numDel = 0; for (int32_t j = 0; j < reader->maxDoc(); ++j) { if (reader->isDeleted(j)) ++numDel; else { reader->document(j); reader->getTermFreqVectors(j); } } reader->close(); BOOST_CHECK_EQUAL(0, numDel); dir->close(); } } BOOST_AUTO_TEST_CASE(testVariableSchema) { MockRAMDirectoryPtr dir = newLucene(); int32_t delID = 0; for (int32_t i = 0; i < 20; ++i) { IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(2); writer->setUseCompoundFile(false); DocumentPtr doc = newLucene(); String contents = L"aa bb cc dd ee ff gg hh ii jj kk"; if (i == 7) { // Add empty docs here doc->add(newLucene(L"content3", L"", Field::STORE_NO, Field::INDEX_ANALYZED)); } else { Field::Store storeVal = Field::STORE_NO; if (i % 2 == 0) { doc->add(newLucene(L"content4", contents, Field::STORE_YES, Field::INDEX_ANALYZED)); storeVal = Field::STORE_YES; } doc->add(newLucene(L"content1", contents, storeVal, Field::INDEX_ANALYZED)); doc->add(newLucene(L"content3", L"", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"content5", L"", storeVal, Field::INDEX_ANALYZED)); } for (int32_t j = 0; j < 4; ++j) writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, false); reader->deleteDocument(delID++); reader->close(); if (i % 4 == 0) { writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); writer->optimize(); writer->close(); } } } namespace TestNoWaitClose { class NoWaitThread : public LuceneThread { public: NoWaitThread(IndexWriterPtr finalWriter, DocumentPtr doc) { this->finalWriter = finalWriter; this->doc = doc; } virtual ~NoWaitThread() { } LUCENE_CLASS(NoWaitThread); protected: IndexWriterPtr finalWriter; DocumentPtr doc; public: virtual void run() { bool done = false; while (!done) { for (int32_t i = 0; i < 100; ++i) { try { finalWriter->addDocument(doc); } catch (AlreadyClosedException&) { done = true; break; } catch (NullPointerException&) { done = true; break; } catch (...) { BOOST_FAIL("Unexpected exception"); done = true; break; } } LuceneThread::threadYield(); } } }; } BOOST_AUTO_TEST_CASE(testNoWaitClose) { MockRAMDirectoryPtr dir = newLucene(); DocumentPtr doc = newLucene(); FieldPtr idField = newLucene(L"id", L"", Field::STORE_YES, Field::INDEX_NOT_ANALYZED); doc->add(idField); for (int32_t pass = 0; pass < 2; ++pass) { IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t iter = 0; iter < 10; ++iter) { MergeSchedulerPtr ms; if (pass == 1) ms = newLucene(); else ms = newLucene(); writer->setMergeScheduler(ms); writer->setMaxBufferedDocs(2); writer->setMergeFactor(100); for (int32_t j = 0; j < 199; ++j) { idField->setValue(StringUtils::toString(iter * 201 + j)); writer->addDocument(doc); } int32_t delID = iter * 199; for (int32_t j = 0; j < 20; ++j) { writer->deleteDocuments(newLucene(L"id", StringUtils::toString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we stress out aborting them on close writer->setMergeFactor(2); IndexWriterPtr finalWriter = writer; LuceneThreadPtr t1 = newLucene(finalWriter, doc); t1->start(); writer->close(false); t1->join(); // Make sure reader can read IndexReaderPtr reader = IndexReader::open(dir, true); reader->close(); // Reopen writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthUNLIMITED); } writer->close(); } dir->close(); } /// Make sure we can close() even while threads are trying to add documents. BOOST_AUTO_TEST_CASE(testCloseWithThreads) { int32_t NUM_THREADS = 3; for (int32_t iter = 0; iter < 20; ++iter) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); ConcurrentMergeSchedulerPtr cms = newLucene(); // We expect AlreadyClosedException cms->setSuppressExceptions(); writer->setMergeScheduler(cms); writer->setMaxBufferedDocs(10); writer->setMergeFactor(4); Collection threads = Collection::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i] = newLucene(writer, false); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); bool done = false; while (!done) { LuceneThread::threadSleep(100); for (int32_t i = 0; i < NUM_THREADS; ++i) { // only stop when at least one thread has added a doc if (threads[i]->addCount > 0) { done = true; break; } } } writer->close(false); // Make sure threads that are adding docs are not hung for (int32_t i = 0; i < NUM_THREADS; ++i) { threads[i]->join(); if (threads[i]->isAlive()) BOOST_FAIL("thread seems to be hung"); } // Quick test to make sure index is not corrupt IndexReaderPtr reader = IndexReader::open(dir, true); TermDocsPtr tdocs = reader->termDocs(newLucene(L"field", L"aaa")); int32_t count = 0; while (tdocs->next()) ++count; BOOST_CHECK(count > 0); reader->close(); dir->close(); } } /// Make sure immediate disk full on creating an IndexWriter (hit during DW.ThreadState.init()) is OK BOOST_AUTO_TEST_CASE(testImmediateDiskFull) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); dir->setMaxSizeInBytes(dir->getRecomputedActualSizeInBytes()); writer->setMaxBufferedDocs(2); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); BOOST_CHECK_EXCEPTION(writer->addDocument(doc), IOException, check_exception(LuceneException::IO)); BOOST_CHECK_EXCEPTION(writer->addDocument(doc), IOException, check_exception(LuceneException::IO)); BOOST_CHECK_EXCEPTION(writer->close(false), IOException, check_exception(LuceneException::IO)); } /// Make sure immediate disk full on creating an IndexWriter (hit during DW.ThreadState.init()), /// with multiple threads, is OK BOOST_AUTO_TEST_CASE(testImmediateDiskFullWithThreads) { int32_t NUM_THREADS = 3; for (int32_t iter = 0; iter < 10; ++iter) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); ConcurrentMergeSchedulerPtr cms = newLucene(); // We expect AlreadyClosedException cms->setSuppressExceptions(); writer->setMergeScheduler(cms); writer->setMaxBufferedDocs(2); writer->setMergeFactor(4); dir->setMaxSizeInBytes(4 * 1024 + 20 * iter); Collection threads = Collection::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i] = newLucene(writer, true); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); try { writer->close(false); } catch (IOException&) { } // allow time for merge threads to finish LuceneThread::threadSleep(1000); dir->close(); } } static void _testSingleThreadFailure(MockDirectoryFailurePtr failure) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); for (int32_t i = 0; i < 6; ++i) writer->addDocument(doc); dir->failOn(failure); failure->setDoFail(); try { writer->addDocument(doc); writer->addDocument(doc); writer->commit(); BOOST_FAIL("did not hit exception"); } catch (IOException&) { } failure->clearDoFail(); writer->addDocument(doc); writer->close(false); } static void _testMultipleThreadsFailure(MockDirectoryFailurePtr failure) { int32_t NUM_THREADS = 3; for (int32_t iter = 0; iter < 5; ++iter) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); ConcurrentMergeSchedulerPtr cms = newLucene(); // We expect disk full exceptions in the merge threads cms->setSuppressExceptions(); writer->setMergeScheduler(cms); writer->setMaxBufferedDocs(2); writer->setMergeFactor(4); Collection threads = Collection::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i] = newLucene(writer, true); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); LuceneThread::threadSleep(10); dir->failOn(failure); failure->setDoFail(); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); bool success = false; try { writer->close(false); success = true; } catch (IOException&) { failure->clearDoFail(); writer->close(false); } if (success) { IndexReaderPtr reader = IndexReader::open(dir, true); for (int32_t j = 0; j < reader->maxDoc(); ++j) { if (!reader->isDeleted(j)) { reader->document(j); reader->getTermFreqVectors(j); } } reader->close(); } // allow time for merge threads to finish LuceneThread::threadSleep(1000); dir->close(); } } /// Make sure initial IOException, and then 2nd IOException during rollback(), is OK BOOST_AUTO_TEST_CASE(testIOExceptionDuringAbort) { _testSingleThreadFailure(newLucene(false)); } /// Make sure initial IOException, and then 2nd IOException during rollback(), is OK BOOST_AUTO_TEST_CASE(testIOExceptionDuringAbortOnlyOnce) { _testSingleThreadFailure(newLucene(true)); } /// Make sure initial IOException, and then 2nd IOException during rollback(), with /// multiple threads, is OK BOOST_AUTO_TEST_CASE(testIOExceptionDuringAbortWithThreads) { _testMultipleThreadsFailure(newLucene(false)); } /// Make sure initial IOException, and then 2nd IOException during rollback(), with /// multiple threads, is OK BOOST_AUTO_TEST_CASE(testIOExceptionDuringAbortWithThreadsOnlyOnce) { _testMultipleThreadsFailure(newLucene(true)); } /// Test IOException in closeDocStore BOOST_AUTO_TEST_CASE(testIOExceptionDuringCloseDocStore) { _testSingleThreadFailure(newLucene(false)); } /// Test IOException in closeDocStore BOOST_AUTO_TEST_CASE(testIOExceptionDuringCloseDocStoreOnlyOnce) { _testSingleThreadFailure(newLucene(true)); } /// Test IOException in closeDocStore, with threads BOOST_AUTO_TEST_CASE(testIOExceptionDuringCloseDocStoreWithThreads) { _testMultipleThreadsFailure(newLucene(false)); } /// Test IOException in closeDocStore, with threads BOOST_AUTO_TEST_CASE(testIOExceptionDuringCloseDocStoreWithThreadsOnlyOnce) { _testMultipleThreadsFailure(newLucene(true)); } /// Test IOException in writeSegment BOOST_AUTO_TEST_CASE(testIOExceptionDuringWriteSegment) { _testSingleThreadFailure(newLucene(false)); } /// Test IOException in writeSegment BOOST_AUTO_TEST_CASE(testIOExceptionDuringWriteSegmentOnlyOnce) { _testSingleThreadFailure(newLucene(true)); } /// Test IOException in writeSegment, with threads BOOST_AUTO_TEST_CASE(testIOExceptionDuringWriteSegmentWithThreads) { _testMultipleThreadsFailure(newLucene(false)); } /// Test IOException in writeSegment, with threads BOOST_AUTO_TEST_CASE(testIOExceptionDuringWriteSegmentWithThreadsOnlyOnce) { _testMultipleThreadsFailure(newLucene(true)); } /// Test unlimited field length BOOST_AUTO_TEST_CASE(testUnlimitedMaxFieldLength) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); StringStream buffer; for (int32_t i = 0; i < 10000; ++i) buffer << L" a"; buffer << L" x"; doc->add(newLucene(L"field", buffer.str(), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermPtr t = newLucene(L"field", L"x"); BOOST_CHECK_EQUAL(1, reader->docFreq(t)); reader->close(); dir->close(); } /// Simulate checksum error in segments_N BOOST_AUTO_TEST_CASE(testSegmentsChecksumError) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // add 100 documents for (int32_t i = 0; i < 100; ++i) addDoc(writer); writer->close(); int64_t gen = SegmentInfos::getCurrentSegmentGeneration(dir); BOOST_CHECK(gen > 1); String segmentsFileName = SegmentInfos::getCurrentSegmentFileName(dir); IndexInputPtr in = dir->openInput(segmentsFileName); IndexOutputPtr out = dir->createOutput(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", 1 + gen)); out->copyBytes(in, in->length() - 1); uint8_t b = in->readByte(); out->writeByte((uint8_t)(1 + b)); out->close(); in->close(); IndexReaderPtr reader; BOOST_CHECK_NO_THROW(reader = IndexReader::open(dir, true)); reader->close(); } /// Test writer.commit() when ac=false BOOST_AUTO_TEST_CASE(testForceCommit) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(5); for (int32_t i = 0; i < 23; ++i) addDoc(writer); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader->numDocs()); writer->commit(); IndexReaderPtr reader2 = reader->reopen(); BOOST_CHECK_EQUAL(0, reader->numDocs()); BOOST_CHECK_EQUAL(23, reader2->numDocs()); reader->close(); for (int32_t i = 0; i < 17; ++i) addDoc(writer); BOOST_CHECK_EQUAL(23, reader2->numDocs()); reader2->close(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(23, reader->numDocs()); reader->close(); writer->commit(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(40, reader->numDocs()); reader->close(); writer->close(); dir->close(); } /// Test exception during sync BOOST_AUTO_TEST_CASE(testExceptionDuringSync) { MockRAMDirectoryPtr dir = newLucene(); FailOnlyInSyncPtr failure = newLucene(); dir->failOn(failure); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); failure->setDoFail(); ConcurrentMergeSchedulerPtr cms = newLucene(); cms->setSuppressExceptions(); writer->setMergeScheduler(cms); writer->setMaxBufferedDocs(2); writer->setMergeFactor(5); for (int32_t i = 0; i < 23; ++i) { addDoc(writer); if ((i - 1) % 2 == 0) BOOST_CHECK_EXCEPTION(writer->commit(), IOException, check_exception(LuceneException::IO)); } cms->sync(); BOOST_CHECK(failure->didFail); failure->clearDoFail(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(23, reader->numDocs()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testTermVectorCorruption) { DirectoryPtr dir = newLucene(); for (int32_t iter = 0; iter < 2; ++iter) { IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); writer->setMergeScheduler(newLucene()); writer->setMergePolicy(newLucene(writer)); DocumentPtr document = newLucene(); FieldPtr storedField = newLucene(L"stored", L"stored", Field::STORE_YES, Field::INDEX_NO); document->add(storedField); writer->addDocument(document); writer->addDocument(document); document = newLucene(); document->add(storedField); FieldPtr termVectorField = newLucene(L"termVector", L"termVector", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); document->add(termVectorField); writer->addDocument(document); writer->optimize(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); for (int32_t i = 0; i < reader->numDocs(); ++i) { reader->document(i); reader->getTermFreqVectors(i); } reader->close(); writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); writer->setMergeScheduler(newLucene()); writer->setMergePolicy(newLucene(writer)); writer->addIndexesNoOptimize(newCollection(newLucene(dir))); writer->optimize(); writer->close(); } dir->close(); } BOOST_AUTO_TEST_CASE(testTermVectorCorruption2) { DirectoryPtr dir = newLucene(); for (int32_t iter = 0; iter < 2; ++iter) { IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); writer->setMergeScheduler(newLucene()); writer->setMergePolicy(newLucene(writer)); DocumentPtr document = newLucene(); FieldPtr storedField = newLucene(L"stored", L"stored", Field::STORE_YES, Field::INDEX_NO); document->add(storedField); writer->addDocument(document); writer->addDocument(document); document = newLucene(); document->add(storedField); FieldPtr termVectorField = newLucene(L"termVector", L"termVector", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); document->add(termVectorField); writer->addDocument(document); writer->optimize(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK(!reader->getTermFreqVectors(0)); BOOST_CHECK(!reader->getTermFreqVectors(1)); BOOST_CHECK(reader->getTermFreqVectors(2)); reader->close(); } dir->close(); } BOOST_AUTO_TEST_CASE(testTermVectorCorruption3) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); writer->setMergeScheduler(newLucene()); writer->setMergePolicy(newLucene(writer)); DocumentPtr document = newLucene(); FieldPtr storedField = newLucene(L"stored", L"stored", Field::STORE_YES, Field::INDEX_NO); document->add(storedField); FieldPtr termVectorField = newLucene(L"termVector", L"termVector", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); document->add(termVectorField); for (int32_t i = 0; i < 10; ++i) writer->addDocument(document); writer->close(); writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); writer->setMergeScheduler(newLucene()); writer->setMergePolicy(newLucene(writer)); for (int32_t i = 0; i < 6; ++i) writer->addDocument(document); writer->optimize(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); for (int32_t i = 0; i < 10; ++i) { reader->getTermFreqVectors(i); reader->document(i); } reader->close(); dir->close(); } /// Test user-specified field length BOOST_AUTO_TEST_CASE(testUserSpecifiedMaxFieldLength) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), 100000); DocumentPtr doc = newLucene(); StringStream buffer; for (int32_t i = 0; i < 10000; ++i) buffer << L" a"; buffer << L" x"; doc->add(newLucene(L"field", buffer.str(), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermPtr t = newLucene(L"field", L"x"); BOOST_CHECK_EQUAL(1, reader->docFreq(t)); reader->close(); dir->close(); } /// Test expungeDeletes, when 2 singular merges are required BOOST_AUTO_TEST_CASE(testExpungeDeletes) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); DocumentPtr document = newLucene(); FieldPtr storedField = newLucene(L"stored", L"stored", Field::STORE_YES, Field::INDEX_NO); document->add(storedField); FieldPtr termVectorField = newLucene(L"termVector", L"termVector", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); document->add(termVectorField); for (int32_t i = 0; i < 10; ++i) writer->addDocument(document); writer->close(); IndexReaderPtr ir = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(10, ir->maxDoc()); BOOST_CHECK_EQUAL(10, ir->numDocs()); ir->deleteDocument(0); ir->deleteDocument(7); BOOST_CHECK_EQUAL(8, ir->numDocs()); ir->close(); writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); BOOST_CHECK_EQUAL(8, writer->numDocs()); BOOST_CHECK_EQUAL(10, writer->maxDoc()); writer->expungeDeletes(); BOOST_CHECK_EQUAL(8, writer->numDocs()); writer->close(); ir = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(8, ir->maxDoc()); BOOST_CHECK_EQUAL(8, ir->numDocs()); ir->close(); dir->close(); } /// Test expungeDeletes, when many adjacent merges are required BOOST_AUTO_TEST_CASE(testExpungeDeletes2) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(50); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); DocumentPtr document = newLucene(); FieldPtr storedField = newLucene(L"stored", L"stored", Field::STORE_YES, Field::INDEX_NO); document->add(storedField); FieldPtr termVectorField = newLucene(L"termVector", L"termVector", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); document->add(termVectorField); for (int32_t i = 0; i < 98; ++i) writer->addDocument(document); writer->close(); IndexReaderPtr ir = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(98, ir->maxDoc()); BOOST_CHECK_EQUAL(98, ir->numDocs()); for (int32_t i = 0; i < 98; i += 2) ir->deleteDocument(i); BOOST_CHECK_EQUAL(49, ir->numDocs()); ir->close(); writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(3); BOOST_CHECK_EQUAL(49, writer->numDocs()); writer->expungeDeletes(); writer->close(); ir = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(49, ir->maxDoc()); BOOST_CHECK_EQUAL(49, ir->numDocs()); ir->close(); dir->close(); } /// Test expungeDeletes without waiting, when many adjacent merges are required BOOST_AUTO_TEST_CASE(testExpungeDeletes3) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(50); writer->setRAMBufferSizeMB(IndexWriter::DISABLE_AUTO_FLUSH); DocumentPtr document = newLucene(); FieldPtr storedField = newLucene(L"stored", L"stored", Field::STORE_YES, Field::INDEX_NO); document->add(storedField); FieldPtr termVectorField = newLucene(L"termVector", L"termVector", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); document->add(termVectorField); for (int32_t i = 0; i < 98; ++i) writer->addDocument(document); writer->close(); IndexReaderPtr ir = IndexReader::open(dir, false); BOOST_CHECK_EQUAL(98, ir->maxDoc()); BOOST_CHECK_EQUAL(98, ir->numDocs()); for (int32_t i = 0; i < 98; i += 2) ir->deleteDocument(i); BOOST_CHECK_EQUAL(49, ir->numDocs()); ir->close(); writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(3); writer->expungeDeletes(false); writer->close(); ir = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(49, ir->maxDoc()); BOOST_CHECK_EQUAL(49, ir->numDocs()); ir->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEmptyFieldName) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); } namespace TestExceptionDocumentsWriterInit { DECLARE_SHARED_PTR(MockIndexWriter) class MockIndexWriter : public IndexWriter { public: MockIndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(d, a, create, mfl) { doFail = false; } virtual ~MockIndexWriter() { } LUCENE_CLASS(MockIndexWriter); public: bool doFail; public: virtual bool testPoint(const String& name) { if (doFail && name == L"DocumentsWriter.ThreadState.init start") boost::throw_exception(RuntimeException(L"intentionally failing")); return true; } }; } BOOST_AUTO_TEST_CASE(testExceptionDocumentsWriterInit) { MockRAMDirectoryPtr dir = newLucene(); TestExceptionDocumentsWriterInit::MockIndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a field", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->doFail = true; BOOST_CHECK_EXCEPTION(writer->addDocument(doc), RuntimeException, check_exception(LuceneException::Runtime)); writer->close(); checkIndex(dir); dir->close(); } namespace TestExceptionJustBeforeFlush { DECLARE_SHARED_PTR(MockIndexWriter) class MockIndexWriter : public IndexWriter { public: MockIndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(d, a, create, mfl) { doFail = false; } virtual ~MockIndexWriter() { } LUCENE_CLASS(MockIndexWriter); public: bool doFail; public: virtual bool testPoint(const String& name) { if (doFail && name == L"DocumentsWriter.ThreadState.init start") boost::throw_exception(RuntimeException(L"intentionally failing")); return true; } }; class CrashAnalyzer : public Analyzer { public: virtual ~CrashAnalyzer() { } LUCENE_CLASS(CrashAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(fieldName, newLucene(reader)); } }; } BOOST_AUTO_TEST_CASE(testExceptionJustBeforeFlush) { MockRAMDirectoryPtr dir = newLucene(); TestExceptionJustBeforeFlush::MockIndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a field", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); AnalyzerPtr analyzer = newLucene(); DocumentPtr crashDoc = newLucene(); crashDoc->add(newLucene(L"crash", L"do it on token 4", Field::STORE_YES, Field::INDEX_ANALYZED)); BOOST_CHECK_EXCEPTION(writer->addDocument(crashDoc, analyzer), IOException, check_exception(LuceneException::IO)); writer->addDocument(doc); writer->close(); dir->close(); } namespace TestExceptionOnMergeInit { DECLARE_SHARED_PTR(MockIndexWriter) class MockIndexWriter : public IndexWriter { public: MockIndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(d, a, create, mfl) { doFail = false; failed = false; } virtual ~MockIndexWriter() { } LUCENE_CLASS(MockIndexWriter); public: bool doFail; bool failed; public: virtual bool testPoint(const String& name) { if (doFail && name == L"startMergeInit") { failed = true; boost::throw_exception(RuntimeException(L"intentionally failing")); } return true; } }; } BOOST_AUTO_TEST_CASE(testExceptionOnMergeInit) { MockRAMDirectoryPtr dir = newLucene(); TestExceptionOnMergeInit::MockIndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(2); writer->doFail = true; writer->setMergeScheduler(newLucene()); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a field", Field::STORE_YES, Field::INDEX_ANALYZED)); for (int32_t i = 0; i < 10; ++i) { try { writer->addDocument(doc); } catch (RuntimeException&) { break; } } boost::dynamic_pointer_cast(writer->getMergeScheduler())->sync(); BOOST_CHECK(writer->failed); writer->close(); dir->close(); } namespace TestDoBeforeAfterFlush { DECLARE_SHARED_PTR(MockIndexWriter) class MockIndexWriter : public IndexWriter { public: MockIndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(d, a, create, mfl) { afterWasCalled = false; beforeWasCalled = false; } virtual ~MockIndexWriter() { } LUCENE_CLASS(MockIndexWriter); public: bool afterWasCalled; bool beforeWasCalled; protected: virtual void doAfterFlush() { afterWasCalled = true; } virtual void doBeforeFlush() { beforeWasCalled = true; } }; } BOOST_AUTO_TEST_CASE(testDoBeforeAfterFlush) { MockRAMDirectoryPtr dir = newLucene(); TestDoBeforeAfterFlush::MockIndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a field", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->commit(); BOOST_CHECK(writer->beforeWasCalled); BOOST_CHECK(writer->afterWasCalled); writer->beforeWasCalled = false; writer->afterWasCalled = false; writer->deleteDocuments(newLucene(L"field", L"field")); writer->commit(); BOOST_CHECK(writer->beforeWasCalled); BOOST_CHECK(writer->afterWasCalled); writer->close(); IndexReaderPtr ir = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(1, ir->maxDoc()); BOOST_CHECK_EQUAL(0, ir->numDocs()); ir->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testExceptionsDuringCommit) { MockRAMDirectoryPtr dir = newLucene(); FailOnlyInCommitPtr failure = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a field", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); dir->failOn(failure); BOOST_CHECK_EXCEPTION(writer->close(), RuntimeException, check_exception(LuceneException::Runtime)); BOOST_CHECK(failure->fail1 && failure->fail2); writer->rollback(); dir->close(); } namespace TestNegativePositions { class NegativeTokenStream : public TokenStream { public: NegativeTokenStream() { termAtt = addAttribute(); posIncrAtt = addAttribute(); tokens = newCollection(L"a", L"b", L"c" ); tokenIter = tokens.begin(); first = true; } virtual ~NegativeTokenStream() { } public: TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; Collection tokens; Collection::iterator tokenIter; bool first; public: virtual bool incrementToken() { if (tokenIter == tokens.end()) return false; clearAttributes(); termAtt->setTermBuffer(*tokenIter++); posIncrAtt->setPositionIncrement(first ? 0 : 1); first = false; return true; } }; } BOOST_AUTO_TEST_CASE(testNegativePositions) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); TokenStreamPtr tokens = newLucene(); doc->add(newLucene(L"field", tokens)); writer->addDocument(doc); writer->commit(); IndexSearcherPtr s = newLucene(dir, false); PhraseQueryPtr pq = newLucene(); pq->add(newLucene(L"field", L"a")); pq->add(newLucene(L"field", L"b")); pq->add(newLucene(L"field", L"c")); Collection hits = s->search(pq, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryPtr q = newLucene(newLucene(L"field", L"a")); hits = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); TermPositionsPtr tps = s->getIndexReader()->termPositions(newLucene(L"field", L"a")); BOOST_CHECK(tps->next()); BOOST_CHECK_EQUAL(1, tps->freq()); BOOST_CHECK_EQUAL(0, tps->nextPosition()); writer->close(); BOOST_CHECK(checkIndex(dir)); s->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testPrepareCommit) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(5); for (int32_t i = 0; i < 23; ++i) addDoc(writer); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader->numDocs()); writer->prepareCommit(); IndexReaderPtr reader2 = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader2->numDocs()); writer->commit(); IndexReaderPtr reader3 = reader->reopen(); BOOST_CHECK_EQUAL(0, reader->numDocs()); BOOST_CHECK_EQUAL(0, reader2->numDocs()); BOOST_CHECK_EQUAL(23, reader3->numDocs()); reader->close(); reader2->close(); for (int32_t i = 0; i < 17; ++i) addDoc(writer); BOOST_CHECK_EQUAL(23, reader3->numDocs()); reader3->close(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(23, reader->numDocs()); reader->close(); writer->prepareCommit(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(23, reader->numDocs()); reader->close(); writer->commit(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(40, reader->numDocs()); reader->close(); writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testPrepareCommitRollback) { MockRAMDirectoryPtr dir = newLucene(); dir->setPreventDoubleWrite(false); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(5); for (int32_t i = 0; i < 23; ++i) addDoc(writer); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader->numDocs()); writer->prepareCommit(); IndexReaderPtr reader2 = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader2->numDocs()); writer->rollback(); IndexReaderPtr reader3 = reader->reopen(); BOOST_CHECK_EQUAL(0, reader->numDocs()); BOOST_CHECK_EQUAL(0, reader2->numDocs()); BOOST_CHECK_EQUAL(0, reader3->numDocs()); reader->close(); reader2->close(); writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 17; ++i) addDoc(writer); BOOST_CHECK_EQUAL(0, reader3->numDocs()); reader3->close(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader->numDocs()); reader->close(); writer->prepareCommit(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader->numDocs()); reader->close(); writer->commit(); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(17, reader->numDocs()); reader->close(); writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testPrepareCommitNoChanges) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->prepareCommit(); writer->commit(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(0, reader->numDocs()); reader->close(); dir->close(); } namespace TestAddIndexesWithThreads { DECLARE_SHARED_PTR(CommitAndAddIndexes) class CommitAndAddIndexes : public RunAddIndexesThreads { public: CommitAndAddIndexes(int32_t numCopy) : RunAddIndexesThreads(numCopy) { } virtual ~CommitAndAddIndexes() { } public: virtual void handle(LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } virtual void doBody(int32_t j, Collection dirs) { switch (j % 4) { case 0: writer2->addIndexesNoOptimize(dirs); writer2->optimize(); break; case 1: writer2->addIndexesNoOptimize(dirs); break; case 2: writer2->addIndexes(readers); break; case 3: writer2->commit(); break; } } }; } /// Test simultaneous addIndexes & commits from multiple threads BOOST_AUTO_TEST_CASE(testAddIndexesWithThreads) { static const int32_t NUM_ITER = 12; static const int32_t NUM_COPY = 3; TestAddIndexesWithThreads::CommitAndAddIndexesPtr c = newLucene(NUM_COPY); c->launchThreads(NUM_ITER); for (int32_t i = 0; i < 100; ++i) addDoc(c->writer2); c->joinThreads(); BOOST_CHECK_EQUAL(100 + NUM_COPY * (3 * NUM_ITER / 4) * c->NUM_THREADS * c->NUM_INIT_DOCS, c->writer2->numDocs()); c->close(true); checkIndex(c->dir2); IndexReaderPtr reader = IndexReader::open(c->dir2, true); BOOST_CHECK_EQUAL(100 + NUM_COPY * (3 * NUM_ITER / 4) * c->NUM_THREADS * c->NUM_INIT_DOCS, reader->numDocs()); reader->close(); c->closeDir(); } namespace TestAddIndexesWithClose { DECLARE_SHARED_PTR(CommitAndAddIndexes) class CommitAndAddIndexes : public RunAddIndexesThreads { public: CommitAndAddIndexes(int32_t numCopy) : RunAddIndexesThreads(numCopy) { } virtual ~CommitAndAddIndexes() { } public: virtual void handle(LuceneException& e) { if (e.getType() != LuceneException::AlreadyClosed && e.getType() != LuceneException::NullPointer) BOOST_FAIL("Unexpected exception: " << e.getError()); } virtual void doBody(int32_t j, Collection dirs) { switch (j % 4) { case 0: writer2->addIndexesNoOptimize(dirs); writer2->optimize(); break; case 1: writer2->addIndexesNoOptimize(dirs); break; case 2: writer2->addIndexes(readers); break; case 3: writer2->commit(); break; } } }; } /// Test simultaneous addIndexes & close BOOST_AUTO_TEST_CASE(testAddIndexesWithClose) { static const int32_t NUM_COPY = 3; TestAddIndexesWithClose::CommitAndAddIndexesPtr c = newLucene(NUM_COPY); c->launchThreads(-1); // Close without first stopping/joining the threads c->close(true); c->joinThreads(); checkIndex(c->dir2); c->closeDir(); } namespace TestAddIndexesWithCloseNoWait { DECLARE_SHARED_PTR(CommitAndAddIndexes) class CommitAndAddIndexes : public RunAddIndexesThreads { public: CommitAndAddIndexes(int32_t numCopy) : RunAddIndexesThreads(numCopy) { } virtual ~CommitAndAddIndexes() { } public: virtual void handle(LuceneException& e) { bool report = true; if (e.getType() == LuceneException::AlreadyClosed || e.getType() == LuceneException::MergeAborted || e.getType() == LuceneException::NullPointer) report = !didClose; else if (e.getType() == LuceneException::IO) report = !didClose; if (report) BOOST_FAIL("Unexpected exception: " << e.getError()); } virtual void doBody(int32_t j, Collection dirs) { switch (j % 5) { case 0: writer2->addIndexesNoOptimize(dirs); writer2->optimize(); break; case 1: writer2->addIndexesNoOptimize(dirs); break; case 2: writer2->addIndexes(readers); break; case 3: writer2->optimize(); case 4: writer2->commit(); break; } } }; } /// Test simultaneous addIndexes and close BOOST_AUTO_TEST_CASE(testAddIndexesWithCloseNoWait) { static const int32_t NUM_COPY = 50; TestAddIndexesWithCloseNoWait::CommitAndAddIndexesPtr c = newLucene(NUM_COPY); c->launchThreads(-1); LuceneThread::threadSleep(500); // Close without first stopping/joining the threads c->close(false); c->joinThreads(); checkIndex(c->dir2); c->closeDir(); } namespace TestAddIndexesWithRollback { DECLARE_SHARED_PTR(CommitAndAddIndexes) class CommitAndAddIndexes : public RunAddIndexesThreads { public: CommitAndAddIndexes(int32_t numCopy) : RunAddIndexesThreads(numCopy) { } virtual ~CommitAndAddIndexes() { } public: virtual void handle(LuceneException& e) { bool report = true; if (e.getType() == LuceneException::AlreadyClosed || e.getType() == LuceneException::MergeAborted || e.getType() == LuceneException::NullPointer) report = !didClose; else if (e.getType() == LuceneException::IO) report = !didClose; if (report) BOOST_FAIL("Unexpected exception: " << e.getError()); } virtual void doBody(int32_t j, Collection dirs) { switch (j % 5) { case 0: writer2->addIndexesNoOptimize(dirs); writer2->optimize(); break; case 1: writer2->addIndexesNoOptimize(dirs); break; case 2: writer2->addIndexes(readers); break; case 3: writer2->optimize(); case 4: writer2->commit(); break; } } }; } /// Test simultaneous addIndexes and close BOOST_AUTO_TEST_CASE(testAddIndexesWithRollback) { static const int32_t NUM_COPY = 50; TestAddIndexesWithRollback::CommitAndAddIndexesPtr c = newLucene(NUM_COPY); c->launchThreads(-1); LuceneThread::threadSleep(500); // Close without first stopping/joining the threads c->didClose = true; c->writer2->rollback(); c->joinThreads(); checkIndex(c->dir2); c->closeDir(); } namespace TestRollbackExceptionHang { DECLARE_SHARED_PTR(MockIndexWriter) class MockIndexWriter : public IndexWriter { public: MockIndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(d, a, create, mfl) { doFail = false; } virtual ~MockIndexWriter() { } LUCENE_CLASS(MockIndexWriter); public: bool doFail; public: virtual bool testPoint(const String& name) { if (doFail && name == L"rollback before checkpoint") boost::throw_exception(RuntimeException(L"intentionally failing")); return true; } }; } BOOST_AUTO_TEST_CASE(testRollbackExceptionHang) { MockRAMDirectoryPtr dir = newLucene(); TestRollbackExceptionHang::MockIndexWriterPtr w = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(w); w->doFail = true; BOOST_CHECK_EXCEPTION(w->rollback(), RuntimeException, check_exception(LuceneException::Runtime)); w->doFail = false; w->rollback(); } BOOST_AUTO_TEST_CASE(testBinaryFieldOffsetLength) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); ByteArray b = ByteArray::newInstance(50); for (int32_t i = 0; i < 50; ++i) b[i] = (uint8_t)(i + 77); DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"binary", b, 10, 17, Field::STORE_YES); ByteArray bx = f->getBinaryValue(); BOOST_CHECK(bx); BOOST_CHECK_EQUAL(50, bx.size()); BOOST_CHECK_EQUAL(10, f->getBinaryOffset()); BOOST_CHECK_EQUAL(17, f->getBinaryLength()); doc->add(f); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); doc = reader->document(0); f = doc->getField(L"binary"); b = f->getBinaryValue(); BOOST_CHECK(b); BOOST_CHECK_EQUAL(17, b.size()); BOOST_CHECK_EQUAL(87, b[0]); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testCommitUserData) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t j = 0; j < 17; ++j) addDoc(writer); writer->close(); BOOST_CHECK_EQUAL(0, IndexReader::getCommitUserData(dir).size()); IndexReaderPtr reader = IndexReader::open(dir, true); // commit(Map) never called for this index BOOST_CHECK_EQUAL(0, reader->getCommitUserData().size()); reader->close(); writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t j = 0; j < 17; ++j) addDoc(writer); MapStringString data = MapStringString::newInstance(); data.put(L"label", L"test1"); writer->commit(data); writer->close(); BOOST_CHECK_EQUAL(L"test1", IndexReader::getCommitUserData(dir).get(L"label")); reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(L"test1", reader->getCommitUserData().get(L"label")); reader->close(); writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); writer->close(); BOOST_CHECK_EQUAL(L"test1", IndexReader::getCommitUserData(dir).get(L"label")); dir->close(); } BOOST_AUTO_TEST_CASE(testOptimizeExceptions) { MockRAMDirectoryPtr startDir = newLucene(); IndexWriterPtr writer = newLucene(startDir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(100); for (int32_t i = 0; i < 27; ++i) addDoc(writer); writer->close(); for (int32_t i = 0; i < 200; ++i) { MockRAMDirectoryPtr dir = newLucene(startDir); writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); boost::dynamic_pointer_cast(writer->getMergeScheduler())->setSuppressExceptions(); dir->setRandomIOExceptionRate(0.5, 100); BOOST_CHECK_EXCEPTION(writer->optimize(), IOException, check_exception(LuceneException::IO)); // Make sure we don't hit random exception during close below dir->setRandomIOExceptionRate(0.0, 0); writer->close(); dir->close(); } } namespace TestOutOfMemoryErrorCausesCloseToFail { class MemoryIndexWriter : public IndexWriter { public: MemoryIndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(d, a, create, mfl) { thrown = false; } virtual ~MemoryIndexWriter() { } LUCENE_CLASS(MemoryIndexWriter); protected: bool thrown; public: virtual void message(const String& message) { if (boost::starts_with(message, L"now flush at close") && !thrown) { thrown = true; boost::throw_exception(std::bad_alloc()); } } }; } BOOST_AUTO_TEST_CASE(testOutOfMemoryErrorCausesCloseToFail) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setInfoStream(newLucene()); BOOST_CHECK_EXCEPTION(writer->close(), OutOfMemoryError, check_exception(LuceneException::OutOfMemory)); writer->close(); } BOOST_AUTO_TEST_CASE(testDoubleOffsetCounting) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"field", L"abcd", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f); doc->add(f); FieldPtr f2 = newLucene(L"field", L"", Field::STORE_NO, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f2); doc->add(f); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); Collection termOffsets = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field"))->getOffsets(0); // Token "" occurred once BOOST_CHECK_EQUAL(1, termOffsets.size()); BOOST_CHECK_EQUAL(8, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(8, termOffsets[0]->getEndOffset()); // Token "abcd" occurred three times termOffsets = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field"))->getOffsets(1); BOOST_CHECK_EQUAL(3, termOffsets.size()); BOOST_CHECK_EQUAL(0, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(4, termOffsets[0]->getEndOffset()); BOOST_CHECK_EQUAL(4, termOffsets[1]->getStartOffset()); BOOST_CHECK_EQUAL(8, termOffsets[1]->getEndOffset()); BOOST_CHECK_EQUAL(8, termOffsets[2]->getStartOffset()); BOOST_CHECK_EQUAL(12, termOffsets[2]->getEndOffset()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testDoubleOffsetCounting2) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"field", L"abcd", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f); doc->add(f); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); Collection termOffsets = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field"))->getOffsets(0); BOOST_CHECK_EQUAL(2, termOffsets.size()); BOOST_CHECK_EQUAL(0, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(4, termOffsets[0]->getEndOffset()); BOOST_CHECK_EQUAL(5, termOffsets[1]->getStartOffset()); BOOST_CHECK_EQUAL(9, termOffsets[1]->getEndOffset()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEndOffsetPositionCharAnalyzer) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"field", L"abcd ", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f); doc->add(f); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); Collection termOffsets = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field"))->getOffsets(0); BOOST_CHECK_EQUAL(2, termOffsets.size()); BOOST_CHECK_EQUAL(0, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(4, termOffsets[0]->getEndOffset()); BOOST_CHECK_EQUAL(8, termOffsets[1]->getStartOffset()); BOOST_CHECK_EQUAL(12, termOffsets[1]->getEndOffset()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEndOffsetPositionWithCachingTokenFilter) { MockRAMDirectoryPtr dir = newLucene(); AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(dir, analyzer, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); TokenStreamPtr stream = newLucene(analyzer->tokenStream(L"field", newLucene(L"abcd "))); FieldPtr f = newLucene(L"field", stream, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f); doc->add(f); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); Collection termOffsets = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field"))->getOffsets(0); BOOST_CHECK_EQUAL(2, termOffsets.size()); BOOST_CHECK_EQUAL(0, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(4, termOffsets[0]->getEndOffset()); BOOST_CHECK_EQUAL(8, termOffsets[1]->getStartOffset()); BOOST_CHECK_EQUAL(12, termOffsets[1]->getEndOffset()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEndOffsetPositionWithTeeSinkTokenFilter) { MockRAMDirectoryPtr dir = newLucene(); AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(dir, analyzer, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); TeeSinkTokenFilterPtr tee = newLucene(analyzer->tokenStream(L"field", newLucene(L"abcd "))); TokenStreamPtr sink = tee->newSinkTokenStream(); FieldPtr f1 = newLucene(L"field", tee, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); FieldPtr f2 = newLucene(L"field", sink, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f1); doc->add(f2); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); Collection termOffsets = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field"))->getOffsets(0); BOOST_CHECK_EQUAL(2, termOffsets.size()); BOOST_CHECK_EQUAL(0, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(4, termOffsets[0]->getEndOffset()); BOOST_CHECK_EQUAL(8, termOffsets[1]->getStartOffset()); BOOST_CHECK_EQUAL(12, termOffsets[1]->getEndOffset()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEndOffsetPositionStopFilter) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"field", L"abcd the", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f); doc->add(f); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); Collection termOffsets = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field"))->getOffsets(0); BOOST_CHECK_EQUAL(2, termOffsets.size()); BOOST_CHECK_EQUAL(0, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(4, termOffsets[0]->getEndOffset()); BOOST_CHECK_EQUAL(9, termOffsets[1]->getStartOffset()); BOOST_CHECK_EQUAL(13, termOffsets[1]->getEndOffset()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEndOffsetPositionStandard) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"field", L"abcd the ", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); FieldPtr f2 = newLucene(L"field", L"crunch man", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f); doc->add(f2); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermPositionVectorPtr tpv = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field")); Collection termOffsets = tpv->getOffsets(0); BOOST_CHECK_EQUAL(1, termOffsets.size()); BOOST_CHECK_EQUAL(0, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(4, termOffsets[0]->getEndOffset()); termOffsets = tpv->getOffsets(1); BOOST_CHECK_EQUAL(11, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(17, termOffsets[0]->getEndOffset()); termOffsets = tpv->getOffsets(2); BOOST_CHECK_EQUAL(18, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(21, termOffsets[0]->getEndOffset()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEndOffsetPositionStandardEmptyField) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"field", L"", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); FieldPtr f2 = newLucene(L"field", L"crunch man", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f); doc->add(f2); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermPositionVectorPtr tpv = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field")); Collection termOffsets = tpv->getOffsets(0); BOOST_CHECK_EQUAL(1, termOffsets.size()); BOOST_CHECK_EQUAL(0, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(6, termOffsets[0]->getEndOffset()); termOffsets = tpv->getOffsets(1); BOOST_CHECK_EQUAL(7, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(10, termOffsets[0]->getEndOffset()); reader->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEndOffsetPositionStandardEmptyField2) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"field", L"abcd", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f); doc->add(newLucene(L"field", L"", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); FieldPtr f2 = newLucene(L"field", L"crunch", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS); doc->add(f2); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermPositionVectorPtr tpv = boost::dynamic_pointer_cast(reader->getTermFreqVector(0, L"field")); Collection termOffsets = tpv->getOffsets(0); BOOST_CHECK_EQUAL(1, termOffsets.size()); BOOST_CHECK_EQUAL(0, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(4, termOffsets[0]->getEndOffset()); termOffsets = tpv->getOffsets(1); BOOST_CHECK_EQUAL(5, termOffsets[0]->getStartOffset()); BOOST_CHECK_EQUAL(11, termOffsets[0]->getEndOffset()); reader->close(); dir->close(); } /// Make sure opening an IndexWriter with create=true does not remove non-index files BOOST_AUTO_TEST_CASE(testOtherFiles) { String indexDir(FileUtils::joinPath(getTempDir(), L"otherfiles")); DirectoryPtr dir = FSDirectory::open(indexDir); LuceneException finally; try { // Create my own random file IndexOutputPtr out = dir->createOutput(L"myrandomfile"); out->writeByte((uint8_t)42); out->close(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->close(); BOOST_CHECK(dir->fileExists(L"myrandomfile")); // Make sure this does not copy myrandomfile DirectoryPtr dir2 = newLucene(dir); BOOST_CHECK(!dir2->fileExists(L"myrandomfile")); } catch (LuceneException& e) { finally = e; } dir->close(); FileUtils::removeDirectory(indexDir); finally.throwException(); } BOOST_AUTO_TEST_CASE(testDeadlock) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa bbb ccc ddd eee fff ggg hhh iii", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); writer->addDocument(doc); writer->addDocument(doc); writer->commit(); // index has 2 segments MockRAMDirectoryPtr dir2 = newLucene(); IndexWriterPtr writer2 = newLucene(dir2, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer2->addDocument(doc); writer2->close(); IndexReaderPtr r1 = IndexReader::open(dir2, true); IndexReaderPtr r2 = boost::dynamic_pointer_cast(r1->clone()); writer->addIndexes(newCollection(r1, r2)); writer->close(); IndexReaderPtr r3 = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(5, r3->numDocs()); r3->close(); r1->close(); r2->close(); dir2->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testIndexStoreCombos) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); ByteArray b = ByteArray::newInstance(50); for (int32_t i = 0; i < 50; ++i) b[i] = (uint8_t)(i + 77); DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"binary", b, 10, 17, Field::STORE_YES); f->setTokenStream(newLucene(newLucene(L"doc1field1"))); FieldPtr f2 = newLucene(L"string", L"value", Field::STORE_YES, Field::INDEX_ANALYZED); f2->setTokenStream(newLucene(newLucene(L"doc1field2"))); doc->add(f); doc->add(f2); writer->addDocument(doc); // add 2 docs to test in-memory merging f->setTokenStream(newLucene(newLucene(L"doc2field1"))); f2->setTokenStream(newLucene(newLucene(L"doc2field2"))); writer->addDocument(doc); // force segment flush so we can force a segment merge with doc3 later. writer->commit(); f->setTokenStream(newLucene(newLucene(L"doc3field1"))); f2->setTokenStream(newLucene(newLucene(L"doc3field2"))); writer->addDocument(doc); writer->commit(); writer->optimize(); // force segment merge. IndexReaderPtr reader = IndexReader::open(dir, true); doc = reader->document(0); f = doc->getField(L"binary"); b = f->getBinaryValue(); BOOST_CHECK(b); BOOST_CHECK_EQUAL(17, b.size()); BOOST_CHECK_EQUAL(87, b[0]); BOOST_CHECK(reader->document(0)->getFieldable(L"binary")->isBinary()); BOOST_CHECK(reader->document(1)->getFieldable(L"binary")->isBinary()); BOOST_CHECK(reader->document(2)->getFieldable(L"binary")->isBinary()); BOOST_CHECK_EQUAL(L"value", reader->document(0)->get(L"string")); BOOST_CHECK_EQUAL(L"value", reader->document(1)->get(L"string")); BOOST_CHECK_EQUAL(L"value", reader->document(2)->get(L"string")); // test that the terms were indexed. BOOST_CHECK(reader->termDocs(newLucene(L"binary", L"doc1field1"))->next()); BOOST_CHECK(reader->termDocs(newLucene(L"binary", L"doc2field1"))->next()); BOOST_CHECK(reader->termDocs(newLucene(L"binary", L"doc3field1"))->next()); BOOST_CHECK(reader->termDocs(newLucene(L"string", L"doc1field2"))->next()); BOOST_CHECK(reader->termDocs(newLucene(L"string", L"doc2field2"))->next()); BOOST_CHECK(reader->termDocs(newLucene(L"string", L"doc3field2"))->next()); reader->close(); dir->close(); } /// Make sure doc fields are stored in order BOOST_AUTO_TEST_CASE(testStoredFieldsOrder) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"zzz", L"a b c", Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"aaa", L"a b c", Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"zzz", L"1 2 3", Field::STORE_YES, Field::INDEX_NO)); writer->addDocument(doc); IndexReaderPtr reader = writer->getReader(); doc = reader->document(0); Collection fields = doc->getFields(); BOOST_CHECK_EQUAL(3, fields.size()); BOOST_CHECK_EQUAL(fields[0]->name(), L"zzz"); BOOST_CHECK_EQUAL(fields[0]->stringValue(), L"a b c"); BOOST_CHECK_EQUAL(fields[1]->name(), L"aaa"); BOOST_CHECK_EQUAL(fields[1]->stringValue(), L"a b c"); BOOST_CHECK_EQUAL(fields[2]->name(), L"zzz"); BOOST_CHECK_EQUAL(fields[2]->stringValue(), L"1 2 3"); reader->close(); writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEmbeddedFFFF) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); const wchar_t _field[] = {L'a', L' ', L'a', UTF8Base::UNICODE_TERMINATOR, L'b'}; String field(_field, SIZEOF_ARRAY(_field)); doc->add(newLucene(L"field", field, Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"field", L"a", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); checkIndex(dir); dir->close(); } BOOST_AUTO_TEST_CASE(testNoDocsIndex) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); writer->setUseCompoundFile(false); writer->addDocument(newLucene()); writer->close(); checkIndex(dir); dir->close(); } namespace TestCommitThreadSafety { class CommitThread : public LuceneThread { public: CommitThread(int32_t finalI, IndexWriterPtr writer, DirectoryPtr dir, int64_t endTime) { this->finalI = finalI; this->writer = writer; this->dir = dir; this->endTime = endTime; } virtual ~CommitThread() { } LUCENE_CLASS(CommitThread); protected: int32_t finalI; IndexWriterPtr writer; DirectoryPtr dir; int64_t endTime; public: virtual void run() { try { DocumentPtr doc = newLucene(); IndexReaderPtr reader = IndexReader::open(dir); FieldPtr field = newLucene(L"f", L"", Field::STORE_NO, Field::INDEX_NOT_ANALYZED); doc->add(field); int32_t count = 0; while ((int64_t)MiscUtils::currentTimeMillis() < endTime) { for (int32_t j = 0; j < 10; ++j) { String s = StringUtils::toString(finalI) + L"_" + StringUtils::toString(count++); field->setValue(s); writer->addDocument(doc); writer->commit(); IndexReaderPtr reader2 = reader->reopen(); BOOST_CHECK_NE(reader2, reader); reader->close(); reader = reader2; BOOST_CHECK_EQUAL(1, reader->docFreq(newLucene(L"f", s))); } } reader->close(); } catch (...) { BOOST_FAIL("Unexpected exception"); } } }; } /// make sure with multiple threads commit doesn't return until all changes are in fact in the index BOOST_AUTO_TEST_CASE(testCommitThreadSafety) { static const int32_t NUM_THREADS = 5; double RUN_SEC = 0.5; DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); writer->commit(); Collection threads = Collection::newInstance(NUM_THREADS); int64_t endTime = (int64_t)MiscUtils::currentTimeMillis() + (int64_t)(RUN_SEC * 1000.0); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i] = newLucene(i, writer, dir, endTime); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); writer->close(); dir->close(); } namespace TestCorruptionAfterDiskFullDuringMerge { DECLARE_SHARED_PTR(FailTwiceDuringMerge) class FailTwiceDuringMerge : public MockDirectoryFailure { public: FailTwiceDuringMerge() { didFail1 = false; didFail2 = false; } virtual ~FailTwiceDuringMerge() { } public: bool didFail1; bool didFail2; public: virtual void eval(MockRAMDirectoryPtr dir) { if (!doFail) return; if (TestPoint::getTestPoint(L"SegmentMerger", L"mergeTerms") && !didFail1) { didFail1 = true; boost::throw_exception(IOException(L"fake disk full during mergeTerms")); } if (TestPoint::getTestPoint(L"BitVector", L"write") && !didFail2) { didFail2 = true; boost::throw_exception(IOException(L"fake disk full while writing BitVector")); } } }; } BOOST_AUTO_TEST_CASE(testCorruptionAfterDiskFullDuringMerge) { MockRAMDirectoryPtr dir = newLucene(); dir->setPreventDoubleWrite(false); IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); w->setMergeScheduler(newLucene()); boost::dynamic_pointer_cast(w->getMergePolicy())->setMergeFactor(2); DocumentPtr doc = newLucene(); doc->add(newLucene(L"f", L"doctor who", Field::STORE_YES, Field::INDEX_ANALYZED)); w->addDocument(doc); w->commit(); w->deleteDocuments(newLucene(L"f", L"who")); w->addDocument(doc); // disk fills up! TestCorruptionAfterDiskFullDuringMerge::FailTwiceDuringMergePtr ftdm = newLucene(); ftdm->setDoFail(); dir->failOn(ftdm); try { w->commit(); BOOST_FAIL("fake disk full IOExceptions not hit"); } catch (IOException&) { } BOOST_CHECK(ftdm->didFail1 || ftdm->didFail2); checkIndex(dir); ftdm->clearDoFail(); w->addDocument(doc); w->close(); checkIndex(dir); dir->close(); } namespace TestFutureCommit { class NoDeletionPolicy : public IndexDeletionPolicy { public: virtual ~NoDeletionPolicy() { } LUCENE_CLASS(NoDeletionPolicy); public: virtual void onInit(Collection commits) { } virtual void onCommit(Collection commits) { } }; } BOOST_AUTO_TEST_CASE(testFutureCommit) { MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr w = newLucene(dir, newLucene(), (IndexDeletionPolicyPtr)newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); w->addDocument(doc); // commit to "first" MapStringString commitData = MapStringString::newInstance(); commitData.put(L"tag", L"first"); w->commit(commitData); // commit to "second" w->addDocument(doc); commitData.put(L"tag", L"second"); w->commit(commitData); w->close(); // open "first" with IndexWriter IndexCommitPtr commit; Collection commits = IndexReader::listCommits(dir); for (Collection::iterator c = commits.begin(); c != commits.end(); ++c) { String tag = (*c)->getUserData().get(L"tag"); if (tag == L"first") { commit = *c; break; } } BOOST_CHECK(commit); w = newLucene(dir, newLucene(), newLucene(), IndexWriter::MaxFieldLengthUNLIMITED, commit); BOOST_CHECK_EQUAL(1, w->numDocs()); // commit IndexWriter to "third" w->addDocument(doc); commitData.put(L"tag", L"third"); w->commit(commitData); w->close(); // make sure "second" commit is still there commit.reset(); commits = IndexReader::listCommits(dir); for (Collection::iterator c = commits.begin(); c != commits.end(); ++c) { String tag = (*c)->getUserData().get(L"tag"); if (tag == L"second") { commit = *c; break; } } BOOST_CHECK(commit); IndexReaderPtr r = IndexReader::open(commit, true); BOOST_CHECK_EQUAL(2, r->numDocs()); r->close(); // open "second", with writeable IndexReader & commit r = IndexReader::open(commit, newLucene(), false); BOOST_CHECK_EQUAL(2, r->numDocs()); r->deleteDocument(0); r->deleteDocument(1); commitData.put(L"tag", L"fourth"); r->commit(commitData); r->close(); // make sure "third" commit is still there commit.reset(); commits = IndexReader::listCommits(dir); for (Collection::iterator c = commits.begin(); c != commits.end(); ++c) { String tag = (*c)->getUserData().get(L"tag"); if (tag == L"third") { commit = *c; break; } } BOOST_CHECK(commit); dir->close(); } BOOST_AUTO_TEST_CASE(testNoUnwantedTVFiles) { DirectoryPtr dir = newLucene(); IndexWriterPtr indexWriter = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); indexWriter->setRAMBufferSizeMB(0.01); indexWriter->setUseCompoundFile(false); String BIG = L"alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg"; BIG += BIG + BIG + BIG; for (int32_t i = 0; i < 2; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i) + BIG, Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); doc->add(newLucene(L"str", StringUtils::toString(i) + BIG, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"str2", StringUtils::toString(i) + BIG, Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"str3", StringUtils::toString(i) + BIG, Field::STORE_YES, Field::INDEX_ANALYZED_NO_NORMS)); indexWriter->addDocument(doc); } indexWriter->close(); checkIndex(dir); checkNoUnreferencedFiles(dir); HashSet files = dir->listAll(); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { BOOST_CHECK(!boost::ends_with(*file, IndexFileNames::VECTORS_FIELDS_EXTENSION())); BOOST_CHECK(!boost::ends_with(*file, IndexFileNames::VECTORS_INDEX_EXTENSION())); BOOST_CHECK(!boost::ends_with(*file, IndexFileNames::VECTORS_DOCUMENTS_EXTENSION())); } dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/LazyBugTest.cpp000066400000000000000000000127071217574114600231200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FieldSelector.h" #include "RAMDirectory.h" #include "IndexReader.h" #include "SimpleAnalyzer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "Random.h" using namespace Lucene; class LazyBugTestFixture : public LuceneTestFixture { public: virtual ~LazyBugTestFixture() { } public: static const int32_t NUM_DOCS; static const int32_t NUM_FIELDS; public: Collection data() { static Collection _data; if (!_data) { _data = Collection::newInstance(); _data.add(L"now"); _data.add(L"is the time"); _data.add(L"for all good men"); _data.add(L"to come to the aid"); _data.add(L"of their country!"); _data.add(L"this string contains big chars:{\u0111 \u0222 \u0333 \u1111 \u2222 \u3333}"); _data.add(L"this string is a bigger string, mary had a little lamb, little lamb, little lamb!"); } return _data; } HashSet dataset() { static HashSet _dataset; if (!_dataset) { Collection _data = data(); _dataset = HashSet::newInstance(_data.begin(), _data.end()); } return _dataset; } String MAGIC_FIELD() { return L"f" + StringUtils::toString((double)NUM_FIELDS / 3); } DECLARE_SHARED_PTR(LazyBugSelector) class LazyBugSelector : public FieldSelector { public: LazyBugSelector(const String& magicField) { this->magicField = magicField; } virtual ~LazyBugSelector() { } LUCENE_CLASS(LazyBugSelector); protected: String magicField; public: virtual FieldSelectorResult accept(const String& fieldName) { if (fieldName == magicField) return FieldSelector::SELECTOR_LOAD; else return FieldSelector::SELECTOR_LAZY_LOAD; } }; FieldSelectorPtr SELECTOR() { return newLucene(MAGIC_FIELD()); } DirectoryPtr makeIndex() { DirectoryPtr dir = newLucene(); RandomPtr rand = newLucene(); try { AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); Collection _data = data(); for (int32_t d = 1; d <= NUM_DOCS; ++d) { DocumentPtr doc = newLucene(); for (int32_t f = 1; f <= NUM_FIELDS; ++f) doc->add(newLucene(L"f" + StringUtils::toString(f), _data[f % _data.size()] + L"#" + _data[rand->nextInt(_data.size())], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } return dir; } void doTest(Collection docs) { DirectoryPtr dir = makeIndex(); IndexReaderPtr reader = IndexReader::open(dir, true); HashSet _dataset = dataset(); for (int32_t i = 0; i < docs.size(); ++i) { DocumentPtr d = reader->document(docs[i], SELECTOR()); d->get(MAGIC_FIELD()); Collection fields = d->getFields(); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { try { String fname = (*field)->name(); String fval = (*field)->stringValue(); BOOST_CHECK(!fval.empty()); Collection vals = StringUtils::split(fval, L"#"); BOOST_CHECK_EQUAL(vals.size(), 2); if (!_dataset.contains(vals[0]) || !_dataset.contains(vals[1])) BOOST_FAIL("FIELD:" << fname << ",VAL:" << fval); } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } } reader->close(); } }; const int32_t LazyBugTestFixture::NUM_DOCS = 500; const int32_t LazyBugTestFixture::NUM_FIELDS = 100; /// Test demonstrating EOF bug on the last field of the last doc if other docs have already been accessed. BOOST_FIXTURE_TEST_SUITE(LazyBugTest, LazyBugTestFixture) BOOST_AUTO_TEST_CASE(testLazyWorks) { doTest(newCollection(399)); } BOOST_AUTO_TEST_CASE(testLazyAlsoWorks) { doTest(newCollection(399, 150)); } BOOST_AUTO_TEST_CASE(testLazyBroken) { doTest(newCollection(150, 399)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/LazyProxSkippingTest.cpp000066400000000000000000000147131217574114600250370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "ScoreDoc.h" #include "PhraseQuery.h" #include "Term.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "SegmentReader.h" #include "IndexSearcher.h" #include "IndexInput.h" #include "TopDocs.h" #include "TermPositions.h" #include "IndexReader.h" using namespace Lucene; DECLARE_SHARED_PTR(SeeksCountingStream) /// Simply extends IndexInput in a way that we are able to count the number of invocations of seek() class SeeksCountingStream : public IndexInput { public: SeeksCountingStream(IndexInputPtr input) { this->input = input; } virtual ~SeeksCountingStream() { } LUCENE_CLASS(SeeksCountingStream); protected: IndexInputPtr input; public: virtual uint8_t readByte() { return input->readByte(); } virtual void readBytes(uint8_t* b, int32_t offset, int32_t length) { input->readBytes(b, offset, length); } virtual void close() { input->close(); } virtual int64_t getFilePointer() { return input->getFilePointer(); } virtual void seek(int64_t pos); // implemented below virtual int64_t length() { return input->length(); } LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()) { return newLucene(boost::dynamic_pointer_cast(input->clone())); } }; class SeekCountingDirectory : public RAMDirectory { public: virtual ~SeekCountingDirectory() { } public: virtual IndexInputPtr openInput(const String& name) { IndexInputPtr ii = RAMDirectory::openInput(name); if (boost::ends_with(name, L".prx")) { // we decorate the proxStream with a wrapper class that allows to count the number of calls of seek() ii = newLucene(ii); } return ii; } }; class LazyProxSkippingFixture : public LuceneTestFixture { public: LazyProxSkippingFixture() { seeksCounter = 0; field = L"tokens"; term1 = L"xx"; term2 = L"yy"; term3 = L"zz"; } virtual ~LazyProxSkippingFixture() { } protected: SearcherPtr searcher; String field; String term1; String term2; String term3; public: static int32_t seeksCounter; public: void createIndex(int32_t numHits) { int32_t numDocs = 500; DirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); writer->setMaxBufferedDocs(10); for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene(); String content; if (i % (numDocs / numHits) == 0) { // add a document that matches the query "term1 term2" content = term1 + L" " + term2; } else if (i % 15 == 0) { // add a document that only contains term1 content = term1 + L" " + term1; } else { // add a document that contains term2 but not term 1 content = term3 + L" " + term2; } doc->add(newLucene(field, content, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } // make sure the index has only a single segment writer->optimize(); writer->close(); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(directory); searcher = newLucene(reader); } Collection search() { // create PhraseQuery "term1 term2" and search PhraseQueryPtr pq = newLucene(); pq->add(newLucene(field, term1)); pq->add(newLucene(field, term2)); return searcher->search(pq, FilterPtr(), 1000)->scoreDocs; } void performTest(int32_t numHits) { createIndex(numHits); seeksCounter = 0; Collection hits = search(); // verify that the right number of docs was found BOOST_CHECK_EQUAL(numHits, hits.size()); // check if the number of calls of seek() does not exceed the number of hits BOOST_CHECK(seeksCounter > 0); BOOST_CHECK(seeksCounter <= numHits + 1); } }; int32_t LazyProxSkippingFixture::seeksCounter = 0; void SeeksCountingStream::seek(int64_t pos) { ++LazyProxSkippingFixture::seeksCounter; input->seek(pos); } /// Tests lazy skipping on the proximity file. BOOST_FIXTURE_TEST_SUITE(LazyProxSkippingTest, LazyProxSkippingFixture) BOOST_AUTO_TEST_CASE(testLazySkipping) { // test whether only the minimum amount of seeks() are performed performTest(5); performTest(10); } BOOST_AUTO_TEST_CASE(testSeek) { DirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 10; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(field, L"a b", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); IndexReaderPtr reader = IndexReader::open(directory, true); TermPositionsPtr tp = reader->termPositions(); tp->seek(newLucene(field, L"b")); for (int32_t i = 0; i < 10; ++i) { tp->next(); BOOST_CHECK_EQUAL(tp->doc(), i); BOOST_CHECK_EQUAL(tp->nextPosition(), 1); } tp->seek(newLucene(field, L"a")); for (int32_t i = 0; i < 10; ++i) { tp->next(); BOOST_CHECK_EQUAL(tp->doc(), i); BOOST_CHECK_EQUAL(tp->nextPosition(), 0); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/MockIndexInput.cpp000066400000000000000000000027701217574114600236030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "MockIndexInput.h" #include "MiscUtils.h" namespace Lucene { MockIndexInput::MockIndexInput(ByteArray bytes) { buffer = bytes; _length = bytes.size(); pointer = 0; } MockIndexInput::~MockIndexInput() { } void MockIndexInput::readInternal(uint8_t* b, int32_t offset, int32_t length) { int32_t remainder = length; int32_t start = pointer; while (remainder != 0) { int32_t bufferOffset = start % buffer.size(); int32_t bytesInBuffer = buffer.size() - bufferOffset; int32_t bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer; MiscUtils::arrayCopy(buffer.get(), bufferOffset, b, offset, bytesToCopy); offset += bytesToCopy; start += bytesToCopy; remainder -= bytesToCopy; } pointer += length; } void MockIndexInput::close() { // ignore } void MockIndexInput::seekInternal(int64_t pos) { pointer = (int32_t)pos; } int64_t MockIndexInput::length() { return _length; } } LucenePlusPlus-rel_3.0.4/src/test/index/MultiLevelSkipListTest.cpp000066400000000000000000000117061217574114600253060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "Analyzer.h" #include "LowerCaseTokenizer.h" #include "TokenFilter.h" #include "PayloadAttribute.h" #include "TokenStream.h" #include "Payload.h" #include "Term.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "SegmentReader.h" #include "SegmentTermPositions.h" #include "IndexInput.h" using namespace Lucene; /// This testcase tests whether multi-level skipping is being used to reduce I/O while /// skipping through posting lists. Skipping in general is already covered by /// several other testcases. BOOST_FIXTURE_TEST_SUITE(MultiLevelSkipListTest, LuceneTestFixture) class PayloadFilter : public TokenFilter { public: PayloadFilter(TokenStreamPtr input) : TokenFilter(input) { payloadAtt = addAttribute(); } virtual ~PayloadFilter() { } LUCENE_CLASS(PayloadFilter); public: static int32_t count; PayloadAttributePtr payloadAtt; public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(newLucene(reader)); } virtual bool incrementToken() { bool hasNext = input->incrementToken(); if (hasNext) { ByteArray data = ByteArray::newInstance(1); data[0] = (uint8_t)(count++); payloadAtt->setPayload(newLucene(data)); } return hasNext; } }; int32_t PayloadFilter::count = 0; class PayloadAnalyzer : public Analyzer { public: virtual ~PayloadAnalyzer() { } LUCENE_CLASS(PayloadAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(newLucene(reader)); } }; static int32_t counter = 0; class CountingStream : public IndexInput { public: CountingStream(IndexInputPtr input) { this->input = input; } virtual ~CountingStream() { } LUCENE_CLASS(CountingStream); protected: IndexInputPtr input; public: virtual uint8_t readByte() { ++counter; return input->readByte(); } virtual void readBytes(uint8_t* b, int32_t offset, int32_t length) { counter += length; input->readBytes(b, offset, length); } virtual void close() { input->close(); } virtual int64_t getFilePointer() { return input->getFilePointer(); } virtual void seek(int64_t pos) { input->seek(pos); } virtual int64_t length() { return input->length(); } LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()) { return newLucene(boost::dynamic_pointer_cast(input->clone())); } }; static void checkSkipTo(TermPositionsPtr tp, int32_t target, int32_t maxCounter) { tp->skipTo(target); if (maxCounter < counter) BOOST_FAIL("Too many bytes read: " << counter); BOOST_CHECK_EQUAL(target, tp->doc()); BOOST_CHECK_EQUAL(1, tp->freq()); tp->nextPosition(); ByteArray b = ByteArray::newInstance(1); tp->getPayload(b, 0); BOOST_CHECK_EQUAL((uint8_t)target, b[0]); } BOOST_AUTO_TEST_CASE(testSimpleSkip) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); TermPtr term = newLucene(L"test", L"a"); for (int32_t i = 0; i < 5000; ++i) { DocumentPtr d1 = newLucene(); d1->add(newLucene(term->field(), term->text(), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d1); } writer->commit(); writer->optimize(); writer->close(); IndexReaderPtr reader = SegmentReader::getOnlySegmentReader(dir); SegmentTermPositionsPtr tp = boost::dynamic_pointer_cast(reader->termPositions()); tp->freqStream(newLucene(tp->freqStream())); for (int32_t i = 0; i < 2; ++i) { counter = 0; tp->seek(term); checkSkipTo(tp, 14, 185); // no skips checkSkipTo(tp, 17, 190); // one skip on level 0 checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, because than more bytes would be read from the freqStream checkSkipTo(tp, 4800, 250);// one skip on level 2 } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/MultiReaderTest.cpp000066400000000000000000000203741217574114600237570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "DirectoryReader.h" #include "RAMDirectory.h" #include "Document.h" #include "DocHelper.h" #include "SegmentInfos.h" #include "IndexReader.h" #include "TermFreqVector.h" #include "Field.h" #include "DefaultSimilarity.h" #include "MultiReader.h" #include "StandardAnalyzer.h" #include "IndexWriter.h" #include "TermDocs.h" #include "TermEnum.h" #include "Term.h" #include "SegmentReader.h" using namespace Lucene; class MultiReaderTestFixture : public LuceneTestFixture, public DocHelper { public: MultiReaderTestFixture() { readers = Collection::newInstance(2); dir = newLucene(); doc1 = newLucene(); doc2 = newLucene(); DocHelper::setupDoc(doc1); DocHelper::setupDoc(doc2); DocHelper::writeDoc(dir, doc1); DocHelper::writeDoc(dir, doc2); sis = newLucene(); sis->read(dir); } virtual ~MultiReaderTestFixture() { } protected: DirectoryPtr dir; DocumentPtr doc1; DocumentPtr doc2; Collection readers; SegmentInfosPtr sis; public: void doTestDocument() { sis->read(dir); IndexReaderPtr reader = openReader(); BOOST_CHECK(reader); DocumentPtr newDoc1 = reader->document(0); BOOST_CHECK(newDoc1); BOOST_CHECK(DocHelper::numFields(newDoc1) == DocHelper::numFields(doc1) - DocHelper::unstored.size()); DocumentPtr newDoc2 = reader->document(1); BOOST_CHECK(newDoc2); BOOST_CHECK(DocHelper::numFields(newDoc2) == DocHelper::numFields(doc2) - DocHelper::unstored.size()); TermFreqVectorPtr vector = reader->getTermFreqVector(0, DocHelper::TEXT_FIELD_2_KEY); BOOST_CHECK(vector); checkNorms(reader); } void doTestUndeleteAll() { sis->read(dir); IndexReaderPtr reader = openReader(); BOOST_CHECK(reader); BOOST_CHECK_EQUAL(2, reader->numDocs()); reader->deleteDocument(0); BOOST_CHECK_EQUAL(1, reader->numDocs()); reader->undeleteAll(); BOOST_CHECK_EQUAL(2, reader->numDocs()); // Ensure undeleteAll survives commit/close/reopen reader->commit(MapStringString()); reader->close(); if (boost::dynamic_pointer_cast(reader)) { // MultiReader does not "own" the directory so it does not write the changes to sis on commit sis->commit(dir); } sis->read(dir); reader = openReader(); BOOST_CHECK_EQUAL(2, reader->numDocs()); reader->deleteDocument(0); BOOST_CHECK_EQUAL(1, reader->numDocs()); reader->commit(MapStringString()); reader->close(); if (boost::dynamic_pointer_cast(reader)) { // MultiReader does not "own" the directory so it does not write the changes to sis on commit sis->commit(dir); } sis->read(dir); reader = openReader(); BOOST_CHECK_EQUAL(1, reader->numDocs()); } protected: IndexReaderPtr openReader() { sis->read(dir); SegmentReaderPtr reader1 = SegmentReader::get(false, sis->info(0), IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); SegmentReaderPtr reader2 = SegmentReader::get(false, sis->info(1), IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); readers[0] = reader1; readers[1] = reader2; BOOST_CHECK(reader1); BOOST_CHECK(reader2); IndexReaderPtr reader = newLucene(readers); BOOST_CHECK(dir); BOOST_CHECK(sis); BOOST_CHECK(reader); return reader; } void checkNorms(IndexReaderPtr reader) { for (Collection::iterator field = DocHelper::fields.begin(); field != DocHelper::fields.end(); ++field) { if ((*field)->isIndexed()) { BOOST_CHECK_EQUAL(reader->hasNorms((*field)->name()), !(*field)->getOmitNorms()); BOOST_CHECK_EQUAL(reader->hasNorms((*field)->name()), !DocHelper::noNorms.contains((*field)->name())); if (!reader->hasNorms((*field)->name())) { // test for fake norms of 1.0 or null depending on the flag ByteArray norms = reader->norms((*field)->name()); uint8_t norm1 = DefaultSimilarity::encodeNorm(1.0); BOOST_CHECK(!norms); norms = ByteArray::newInstance(reader->maxDoc()); reader->norms((*field)->name(), norms, 0); for (int32_t j = 0; j < reader->maxDoc(); ++j) BOOST_CHECK_EQUAL(norms[j], norm1); } } } } void addDoc(RAMDirectoryPtr ramDir1, const String& s, bool create) { IndexWriterPtr iw = newLucene(ramDir1, newLucene(LuceneVersion::LUCENE_CURRENT), create, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"body", s, Field::STORE_YES, Field::INDEX_ANALYZED)); iw->addDocument(doc); iw->close(); } }; BOOST_FIXTURE_TEST_SUITE(MultiReaderTest, MultiReaderTestFixture) BOOST_AUTO_TEST_CASE(testTestMultiReader) { doTestDocument(); doTestUndeleteAll(); } BOOST_AUTO_TEST_CASE(testIsCurrent) { RAMDirectoryPtr ramDir1 = newLucene(); addDoc(ramDir1, L"test foo", true); RAMDirectoryPtr ramDir2 = newLucene(); addDoc(ramDir2, L"test blah", true); MultiReaderPtr mr = newLucene(newCollection(IndexReader::open(ramDir1, false), IndexReader::open(ramDir2, false))); BOOST_CHECK(mr->isCurrent()); // just opened, must be current addDoc(ramDir1, L"more text", false); BOOST_CHECK(!mr->isCurrent()); // has been modified, not current anymore addDoc(ramDir2, L"even more text", false); BOOST_CHECK(!mr->isCurrent()); // has been modified even more, not current anymore BOOST_CHECK_EXCEPTION(mr->getVersion(), LuceneException, check_exception(LuceneException::UnsupportedOperation)); mr->close(); } BOOST_AUTO_TEST_CASE(testMultiTermDocs) { RAMDirectoryPtr ramDir1 = newLucene(); addDoc(ramDir1, L"test foo", true); RAMDirectoryPtr ramDir2 = newLucene(); addDoc(ramDir2, L"test blah", true); RAMDirectoryPtr ramDir3 = newLucene(); addDoc(ramDir3, L"test wow", true); Collection readers1 = newCollection(IndexReader::open(ramDir1, false), IndexReader::open(ramDir3, false)); Collection readers2 = newCollection(IndexReader::open(ramDir1, false), IndexReader::open(ramDir2, false), IndexReader::open(ramDir3, false)); MultiReaderPtr mr2 = newLucene(readers1); MultiReaderPtr mr3 = newLucene(readers2); // test mixing up TermDocs and TermEnums from different readers. TermDocsPtr td2 = mr2->termDocs(); TermEnumPtr te3 = mr3->terms(newLucene(L"body", L"wow")); td2->seek(te3); int32_t ret = 0; // This should blow up if we forget to check that the TermEnum is from the same reader as the TermDocs. while (td2->next()) ret += td2->doc(); td2->close(); te3->close(); // really a dummy check to ensure that we got some docs and to ensure that nothing is optimized out. BOOST_CHECK(ret > 0); } BOOST_AUTO_TEST_CASE(testAllTermDocs) { IndexReaderPtr reader = openReader(); int32_t NUM_DOCS = 2; TermDocsPtr td = reader->termDocs(TermPtr()); for (int32_t i = 0; i < NUM_DOCS; ++i) { BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(i, td->doc()); BOOST_CHECK_EQUAL(1, td->freq()); } td->close(); reader->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/NRTReaderWithThreadsTest.cpp000066400000000000000000000126301217574114600254730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "IndexReader.h" #include "LuceneThread.h" #include "Document.h" #include "Term.h" #include "Field.h" #include "TermDocs.h" #include "Random.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(NRTReaderWithThreadsTest, LuceneTestFixture) DECLARE_SHARED_PTR(RunThread) DECLARE_SHARED_PTR(HeavyAtomicInt) static DocumentPtr createDocument(int32_t n, const String& indexName, int32_t numFields) { StringStream sb; DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(n), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"indexname", indexName, Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); sb << L"a" << n; doc->add(newLucene(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); sb << L" b" << n; for (int32_t i = 1; i < numFields; ++i) doc->add(newLucene(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); return doc; } static int32_t count(TermPtr t, IndexReaderPtr r) { int32_t count = 0; TermDocsPtr td = r->termDocs(t); while (td->next()) { td->doc(); ++count; } td->close(); return count; } class HeavyAtomicInt : public LuceneObject { public: HeavyAtomicInt(int32_t start) { value = start; } virtual ~HeavyAtomicInt() { } protected: int32_t value; public: int32_t addAndGet(int32_t inc) { SyncLock syncLock(this); value += inc; return value; } int32_t incrementAndGet() { SyncLock syncLock(this); return ++value; } int32_t intValue() { SyncLock syncLock(this); return value; } }; class RunThread : public LuceneThread { public: RunThread(int32_t type, IndexWriterPtr writer, HeavyAtomicIntPtr seq) { this->_run = true; this->delCount = 0; this->addCount = 0; this->type = type; this->writer = writer; this->seq = seq; this->rand = newLucene(); } virtual ~RunThread() { } LUCENE_CLASS(RunThread); public: HeavyAtomicIntPtr seq; IndexWriterPtr writer; bool _run; int32_t delCount; int32_t addCount; int32_t type; RandomPtr rand; public: virtual void run() { try { while (_run) { if (type == 0) { int32_t i = seq->addAndGet(1); DocumentPtr doc = createDocument(i, L"index1", 10); writer->addDocument(doc); ++addCount; } else { // we may or may not delete because the term may not exist, // however we're opening and closing the reader rapidly IndexReaderPtr reader = writer->getReader(); int32_t id = rand->nextInt(seq->intValue()); TermPtr term = newLucene(L"id", StringUtils::toString(id)); int32_t _count = count(term, reader); writer->deleteDocuments(term); reader->close(); delCount += _count; } } } catch (LuceneException& e) { _run = false; BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; BOOST_AUTO_TEST_CASE(testIndexing) { HeavyAtomicIntPtr seq = newLucene(1); DirectoryPtr mainDir = newLucene(); IndexWriterPtr writer = newLucene(mainDir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); IndexReaderPtr reader = writer->getReader(); // start pooling readers reader->close(); writer->setMergeFactor(2); writer->setMaxBufferedDocs(10); Collection indexThreads = Collection::newInstance(4); for (int32_t x = 0; x < indexThreads.size(); ++x) { indexThreads[x] = newLucene(x % 2, writer, seq); indexThreads[x]->start(); } int64_t startTime = MiscUtils::currentTimeMillis(); int64_t duration = 5 * 1000; while (((int64_t)MiscUtils::currentTimeMillis() - startTime) < duration) LuceneThread::threadSleep(100); int32_t delCount = 0; int32_t addCount = 0; for (int32_t x = 0; x < indexThreads.size(); ++x) { indexThreads[x]->_run = false; addCount += indexThreads[x]->addCount; delCount += indexThreads[x]->delCount; } for (int32_t x = 0; x < indexThreads.size(); ++x) indexThreads[x]->join(); writer->close(); mainDir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/NormsTest.cpp000066400000000000000000000164551217574114600226450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FSDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "DefaultSimilarity.h" #include "IndexReader.h" #include "FileUtils.h" using namespace Lucene; class SimilarityOne : public DefaultSimilarity { public: virtual ~SimilarityOne() { } LUCENE_CLASS(SimilarityOne); public: virtual double lengthNorm(const String& fieldName, int32_t numTokens) { return 1.0; } }; /// Test that norms info is preserved during index life - including separate norms, addDocument, addIndexesNoOptimize, optimize. class NormTestFixture : public LuceneTestFixture { public: NormTestFixture() { similarityOne = newLucene(); lastNorm = 0.0; normDelta = 0.001; numDocNorms = 0; } virtual ~NormTestFixture() { } protected: static const int32_t NUM_FIELDS; SimilarityPtr similarityOne; int32_t numDocNorms; Collection norms; Collection modifiedNorms; double lastNorm; double normDelta; public: /// return unique norm values that are unchanged by encoding/decoding double nextNorm() { double norm = lastNorm + normDelta; do { double norm1 = Similarity::decodeNorm(Similarity::encodeNorm(norm)); if (norm1 > lastNorm) { norm = norm1; break; } norm += normDelta; } while (true); norms.add(numDocNorms, norm); modifiedNorms.add(numDocNorms, norm); ++numDocNorms; lastNorm = (norm > 10 ? 0 : norm); // there's a limit to how many distinct values can be stored in a ingle byte return norm; } /// create the next document DocumentPtr newDoc() { DocumentPtr d = newLucene(); double boost = nextNorm(); for (int32_t i = 0; i < 10; ++i) { FieldPtr f = newLucene(L"f" + StringUtils::toString(i), L"v" + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED); f->setBoost(boost); d->add(f); } return d; } void verifyIndex(DirectoryPtr dir) { IndexReaderPtr ir = IndexReader::open(dir, false); for (int32_t i = 0; i < NUM_FIELDS; ++i) { String field = L"f" + StringUtils::toString(i); ByteArray b = ir->norms(field); BOOST_CHECK_EQUAL(numDocNorms, b.size()); Collection storedNorms = (i == 1 ? modifiedNorms : norms); for (int32_t j = 0; j < b.size(); ++j) { double norm = Similarity::decodeNorm(b[j]); double norm1 = storedNorms[j]; BOOST_CHECK_EQUAL(norm, norm1); // 0.000001 } } } void addDocs(DirectoryPtr dir, int32_t ndocs, bool compound) { IndexWriterPtr iw = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->setSimilarity(similarityOne); iw->setUseCompoundFile(compound); for (int32_t i = 0; i < ndocs; ++i) iw->addDocument(newDoc()); iw->close(); } void modifyNormsForF1(DirectoryPtr dir) { IndexReaderPtr ir = IndexReader::open(dir, false); int32_t n = ir->maxDoc(); for (int32_t i = 0; i < n; i += 3) // modify for every third doc { int32_t k = (i * 3) % modifiedNorms.size(); double origNorm = modifiedNorms[i]; double newNorm = modifiedNorms[k]; modifiedNorms[i] = newNorm; modifiedNorms[k] = origNorm; ir->setNorm(i, L"f1", newNorm); ir->setNorm(k, L"f1", origNorm); } ir->close(); } void doTestNorms(DirectoryPtr dir) { for (int32_t i = 0; i < 5; ++i) { addDocs(dir, 12, true); verifyIndex(dir); modifyNormsForF1(dir); verifyIndex(dir); addDocs(dir, 12, false); verifyIndex(dir); modifyNormsForF1(dir); verifyIndex(dir); } } void createIndex(DirectoryPtr dir) { IndexWriterPtr iw = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->setSimilarity(similarityOne); iw->setUseCompoundFile(true); iw->close(); } }; const int32_t NormTestFixture::NUM_FIELDS = 10; BOOST_FIXTURE_TEST_SUITE(NormsTest, NormTestFixture) /// Test that norms values are preserved as the index is maintained. /// Including separate norms. /// Including merging indexes with separate norms. /// Including optimize. BOOST_AUTO_TEST_CASE(testNorms) { // test with a single index: index1 String indexDir1(FileUtils::joinPath(getTempDir(), L"lucenetestindex1")); DirectoryPtr dir1 = FSDirectory::open(indexDir1); norms = Collection::newInstance(); modifiedNorms = Collection::newInstance(); createIndex(dir1); doTestNorms(dir1); // test with a single index: index2 Collection norms1 = norms; Collection modifiedNorms1 = modifiedNorms; int32_t numDocNorms1 = numDocNorms; norms = Collection::newInstance(); modifiedNorms = Collection::newInstance(); numDocNorms = 0; String indexDir2(FileUtils::joinPath(getTempDir(), L"lucenetestindex2")); DirectoryPtr dir2 = FSDirectory::open(indexDir2); createIndex(dir2); doTestNorms(dir2); // add index1 and index2 to a third index: index3 String indexDir3(FileUtils::joinPath(getTempDir(), L"lucenetestindex3")); DirectoryPtr dir3 = FSDirectory::open(indexDir3); createIndex(dir3); IndexWriterPtr iw = newLucene(dir3, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->addIndexesNoOptimize(newCollection(dir1, dir2)); iw->optimize(); iw->close(); norms1.addAll(norms.begin(), norms.end()); norms = norms1; modifiedNorms1.addAll(modifiedNorms.begin(), modifiedNorms.end()); modifiedNorms = modifiedNorms1; numDocNorms += numDocNorms1; // test with index3 verifyIndex(dir3); doTestNorms(dir3); // now with optimize iw = newLucene(dir3, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->optimize(); iw->close(); verifyIndex(dir3); dir1->close(); dir2->close(); dir3->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/OmitTfTest.cpp000066400000000000000000000312421217574114600227400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "StandardAnalyzer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "SegmentReader.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "Similarity.h" #include "Explanation.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "Term.h" #include "Collector.h" #include "Scorer.h" #include "BooleanQuery.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(OmitTfTest, LuceneTestFixture) DECLARE_SHARED_PTR(CountingHitCollector) class SimpleIDFExplanation : public IDFExplanation { public: virtual ~SimpleIDFExplanation() { } LUCENE_CLASS(SimpleIDFExplanation); public: virtual double getIdf() { return 1.0; } virtual String explain() { return L"Inexplicable"; } }; class SimpleSimilarity : public Similarity { public: virtual ~SimpleSimilarity() { } LUCENE_CLASS(SimpleSimilarity); public: virtual double lengthNorm(const String& fieldName, int32_t numTokens) { return 1.0; } virtual double queryNorm(double sumOfSquaredWeights) { return 1.0; } virtual double tf(double freq) { return freq; } virtual double sloppyFreq(int32_t distance) { return 2.0; } virtual double idf(int32_t docFreq, int32_t numDocs) { return 1.0; } virtual double coord(int32_t overlap, int32_t maxOverlap) { return 1.0; } virtual IDFExplanationPtr idfExplain(Collection terms, SearcherPtr searcher) { return newLucene(); } }; class CountingHitCollector : public Collector { public: CountingHitCollector() { count = 0; sum = 0; docBase = -1; } virtual ~CountingHitCollector() { } LUCENE_CLASS(CountingHitCollector); public: int32_t count; int32_t sum; protected: int32_t docBase; public: virtual void setScorer(ScorerPtr scorer) { } virtual void collect(int32_t doc) { ++count; sum += doc + docBase; // use it to avoid any possibility of being optimized away } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { this->docBase = docBase; } virtual bool acceptsDocsOutOfOrder() { return true; } }; static void checkNoPrx(DirectoryPtr dir) { HashSet files = dir->listAll(); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) BOOST_CHECK(!boost::ends_with(*file, L".prx")); } /// Tests whether the DocumentWriter correctly enable the omitTermFreqAndPositions bit in the FieldInfo BOOST_AUTO_TEST_CASE(testOmitTermFreqAndPositions) { DirectoryPtr ram = newLucene(); AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); IndexWriterPtr writer = newLucene(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d = newLucene(); // this field will have Tf FieldPtr f1 = newLucene(L"f1", L"This field has term freqs", Field::STORE_NO, Field::INDEX_ANALYZED); d->add(f1); // this field will NOT have Tf FieldPtr f2 = newLucene(L"f2", L"This field has NO Tf in all docs", Field::STORE_NO, Field::INDEX_ANALYZED); f2->setOmitTermFreqAndPositions(true); d->add(f2); writer->addDocument(d); writer->optimize(); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger keep things constant d = newLucene(); // Reverese f1->setOmitTermFreqAndPositions(true); d->add(f1); f2->setOmitTermFreqAndPositions(false); d->add(f2); writer->addDocument(d); // force merge writer->optimize(); // flush writer->close(); checkIndex(ram); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(ram); FieldInfosPtr fi = reader->fieldInfos(); BOOST_CHECK(fi->fieldInfo(L"f1")->omitTermFreqAndPositions); BOOST_CHECK(fi->fieldInfo(L"f2")->omitTermFreqAndPositions); reader->close(); ram->close(); } /// Tests whether merging of docs that have different omitTermFreqAndPositions for the same field works BOOST_AUTO_TEST_CASE(testMixedMerge) { DirectoryPtr ram = newLucene(); AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); IndexWriterPtr writer = newLucene(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(3); writer->setMergeFactor(2); DocumentPtr d = newLucene(); // this field will have Tf FieldPtr f1 = newLucene(L"f1", L"This field has term freqs", Field::STORE_NO, Field::INDEX_ANALYZED); d->add(f1); // this field will NOT have Tf FieldPtr f2 = newLucene(L"f2", L"This field has NO Tf in all docs", Field::STORE_NO, Field::INDEX_ANALYZED); f2->setOmitTermFreqAndPositions(true); d->add(f2); for (int32_t i = 0; i < 30; ++i) writer->addDocument(d); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger keep things constant d = newLucene(); // Reverese f1->setOmitTermFreqAndPositions(true); d->add(f1); f2->setOmitTermFreqAndPositions(false); d->add(f2); for (int32_t i = 0; i < 30; ++i) writer->addDocument(d); // force merge writer->optimize(); // flush writer->close(); checkIndex(ram); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(ram); FieldInfosPtr fi = reader->fieldInfos(); BOOST_CHECK(fi->fieldInfo(L"f1")->omitTermFreqAndPositions); BOOST_CHECK(fi->fieldInfo(L"f2")->omitTermFreqAndPositions); reader->close(); ram->close(); } /// Make sure first adding docs that do not omitTermFreqAndPositions for field X, then adding docs that do /// omitTermFreqAndPositions for that same field BOOST_AUTO_TEST_CASE(testMixedRAM) { DirectoryPtr ram = newLucene(); AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); IndexWriterPtr writer = newLucene(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); writer->setMergeFactor(2); DocumentPtr d = newLucene(); // this field will have Tf FieldPtr f1 = newLucene(L"f1", L"This field has term freqs", Field::STORE_NO, Field::INDEX_ANALYZED); d->add(f1); // this field will NOT have Tf FieldPtr f2 = newLucene(L"f2", L"This field has NO Tf in all docs", Field::STORE_NO, Field::INDEX_ANALYZED); d->add(f2); for (int32_t i = 0; i < 5; ++i) writer->addDocument(d); f2->setOmitTermFreqAndPositions(true); for (int32_t i = 0; i < 20; ++i) writer->addDocument(d); // force merge writer->optimize(); // flush writer->close(); checkIndex(ram); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(ram); FieldInfosPtr fi = reader->fieldInfos(); BOOST_CHECK(!fi->fieldInfo(L"f1")->omitTermFreqAndPositions); BOOST_CHECK(fi->fieldInfo(L"f2")->omitTermFreqAndPositions); reader->close(); ram->close(); } /// Verifies no *.prx exists when all fields omit term freq BOOST_AUTO_TEST_CASE(testNoPrxFile) { DirectoryPtr ram = newLucene(); AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); IndexWriterPtr writer = newLucene(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(3); writer->setMergeFactor(2); writer->setUseCompoundFile(false); DocumentPtr d = newLucene(); // this field will have Tf FieldPtr f1 = newLucene(L"f1", L"This field has term freqs", Field::STORE_NO, Field::INDEX_ANALYZED); f1->setOmitTermFreqAndPositions(true); d->add(f1); for (int32_t i = 0; i < 30; ++i) writer->addDocument(d); writer->commit(); checkNoPrx(ram); // force merge writer->optimize(); // flush writer->close(); checkNoPrx(ram); checkIndex(ram); ram->close(); } namespace TestBasic { class CountingHitCollectorQ1 : public CountingHitCollector { protected: ScorerPtr scorer; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { BOOST_CHECK_EQUAL(scorer->score(), 1.0); CountingHitCollector::collect(doc); } }; class CountingHitCollectorQ2 : public CountingHitCollector { protected: ScorerPtr scorer; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { BOOST_CHECK_EQUAL(scorer->score(), 1.0 + (double)doc); CountingHitCollector::collect(doc); } }; class CountingHitCollectorQ3 : public CountingHitCollector { protected: ScorerPtr scorer; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { BOOST_CHECK_EQUAL(scorer->score(), 1.0); BOOST_CHECK_NE(doc % 2, 0); CountingHitCollector::collect(doc); } }; class CountingHitCollectorQ4 : public CountingHitCollector { protected: ScorerPtr scorer; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { BOOST_CHECK_EQUAL(scorer->score(), 1.0); BOOST_CHECK_EQUAL(doc % 2, 0); CountingHitCollector::collect(doc); } }; } BOOST_AUTO_TEST_CASE(testBasic) { DirectoryPtr dir = newLucene(); AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); IndexWriterPtr writer = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(2); writer->setMaxBufferedDocs(2); writer->setSimilarity(newLucene()); StringStream sb; for (int32_t i = 0; i < 30; ++i) { DocumentPtr d = newLucene(); sb << L"term "; String content = sb.str(); FieldPtr noTf = newLucene(L"noTf", content + (i % 2 == 0 ? L"" : L" notf"), Field::STORE_NO, Field::INDEX_ANALYZED); noTf->setOmitTermFreqAndPositions(true); d->add(noTf); FieldPtr tf = newLucene(L"tf", content + (i % 2 == 0 ? L" tf" : L""), Field::STORE_NO, Field::INDEX_ANALYZED); d->add(tf); writer->addDocument(d); } writer->optimize(); // flush writer->close(); checkIndex(dir); // Verify the index SearcherPtr searcher = newLucene(dir, true); searcher->setSimilarity(newLucene()); TermPtr a = newLucene(L"noTf", L"term"); TermPtr b = newLucene(L"tf", L"term"); TermPtr c = newLucene(L"noTf", L"noTf"); TermPtr d = newLucene(L"tf", L"tf"); TermQueryPtr q1 = newLucene(a); TermQueryPtr q2 = newLucene(b); TermQueryPtr q3 = newLucene(c); TermQueryPtr q4 = newLucene(d); searcher->search(q1, newLucene()); searcher->search(q2, newLucene()); searcher->search(q3, newLucene()); searcher->search(q4, newLucene()); BooleanQueryPtr bq = newLucene(); bq->add(q1, BooleanClause::MUST); bq->add(q4, BooleanClause::MUST); CountingHitCollectorPtr collector = newLucene(); searcher->search(bq, collector); BOOST_CHECK_EQUAL(15, collector->count); searcher->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/ParallelReaderEmptyIndexTest.cpp000066400000000000000000000070561217574114600264320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "ParallelReader.h" #include "IndexReader.h" #include "Document.h" #include "Field.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(ParallelReaderEmptyIndexTest, LuceneTestFixture) /// Creates two empty indexes and wraps a ParallelReader around. /// Adding this reader to a new index should not throw any exception. BOOST_AUTO_TEST_CASE(testEmptyIndex) { RAMDirectoryPtr rd1 = newLucene(); IndexWriterPtr iw = newLucene(rd1, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); iw->close(); RAMDirectoryPtr rd2 = newLucene(rd1); RAMDirectoryPtr rdOut = newLucene(); IndexWriterPtr iwOut = newLucene(rdOut, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(rd1,true)); pr->add(IndexReader::open(rd2,true)); iwOut->addIndexes(newCollection(pr)); iwOut->optimize(); iwOut->close(); checkIndex(rdOut); rdOut->close(); rd1->close(); rd2->close(); } /// This method creates an empty index (numFields=0, numDocs=0) but is marked to have TermVectors. /// Adding this index to another index should not throw any exception. BOOST_AUTO_TEST_CASE(testEmptyIndexWithVectors) { RAMDirectoryPtr rd1 = newLucene(); { IndexWriterPtr iw = newLucene(rd1, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"test", L"", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); iw->addDocument(doc); doc->add(newLucene(L"test", L"", Field::STORE_NO, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO)); iw->addDocument(doc); iw->close(); IndexReaderPtr ir = IndexReader::open(rd1,false); ir->deleteDocument(0); ir->close(); iw = newLucene(rd1, newLucene(), false, IndexWriter::MaxFieldLengthUNLIMITED); iw->optimize(); iw->close(); } RAMDirectoryPtr rd2 = newLucene(); { IndexWriterPtr iw = newLucene(rd2, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); iw->addDocument(doc); iw->close(); } RAMDirectoryPtr rdOut = newLucene(); IndexWriterPtr iwOut = newLucene(rdOut, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(rd1, true)); pr->add(IndexReader::open(rd2, true)); iwOut->addIndexes(newCollection(pr)); // ParallelReader closes any IndexReader you added to it pr->close(); rd1->close(); rd2->close(); iwOut->optimize(); iwOut->close(); checkIndex(rdOut); rdOut->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/ParallelReaderTest.cpp000066400000000000000000000270221217574114600244160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexSearcher.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "ParallelReader.h" #include "IndexReader.h" #include "TermQuery.h" #include "Term.h" #include "BooleanQuery.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "MapFieldSelector.h" #include "TermDocs.h" using namespace Lucene; class ParallelReaderTestFixture : public LuceneTestFixture { public: ParallelReaderTestFixture() { single = createSingle(); parallel = createParallel(); } virtual ~ParallelReaderTestFixture() { } public: SearcherPtr parallel; SearcherPtr single; public: /// Fields 1-4 indexed together SearcherPtr createSingle() { DirectoryPtr dir = newLucene(); IndexWriterPtr w = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d1 = newLucene(); d1->add(newLucene(L"f1", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); d1->add(newLucene(L"f2", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); d1->add(newLucene(L"f3", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); d1->add(newLucene(L"f4", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); w->addDocument(d1); DocumentPtr d2 = newLucene(); d2->add(newLucene(L"f1", L"v2", Field::STORE_YES, Field::INDEX_ANALYZED)); d2->add(newLucene(L"f2", L"v2", Field::STORE_YES, Field::INDEX_ANALYZED)); d2->add(newLucene(L"f3", L"v2", Field::STORE_YES, Field::INDEX_ANALYZED)); d2->add(newLucene(L"f4", L"v2", Field::STORE_YES, Field::INDEX_ANALYZED)); w->addDocument(d2); w->close(); return newLucene(dir, false); } /// Fields 1 & 2 in one index, 3 & 4 in other, with ParallelReader SearcherPtr createParallel() { DirectoryPtr dir1 = getDir1(); DirectoryPtr dir2 = getDir2(); ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); return newLucene(pr); } DirectoryPtr getDir1() { DirectoryPtr dir1 = newLucene(); IndexWriterPtr w1 = newLucene(dir1, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d1 = newLucene(); d1->add(newLucene(L"f1", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); d1->add(newLucene(L"f2", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); w1->addDocument(d1); DocumentPtr d2 = newLucene(); d2->add(newLucene(L"f1", L"v2", Field::STORE_YES, Field::INDEX_ANALYZED)); d2->add(newLucene(L"f2", L"v2", Field::STORE_YES, Field::INDEX_ANALYZED)); w1->addDocument(d2); w1->close(); return dir1; } DirectoryPtr getDir2() { DirectoryPtr dir2 = newLucene(); IndexWriterPtr w2 = newLucene(dir2, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d3 = newLucene(); d3->add(newLucene(L"f3", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); d3->add(newLucene(L"f4", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); w2->addDocument(d3); DocumentPtr d4 = newLucene(); d4->add(newLucene(L"f3", L"v2", Field::STORE_YES, Field::INDEX_ANALYZED)); d4->add(newLucene(L"f4", L"v2", Field::STORE_YES, Field::INDEX_ANALYZED)); w2->addDocument(d4); w2->close(); return dir2; } void queryTest(QueryPtr query) { Collection parallelHits = parallel->search(query, FilterPtr(), 1000)->scoreDocs; Collection singleHits = single->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(parallelHits.size(), singleHits.size()); for (int32_t i = 0; i < parallelHits.size(); ++i) { BOOST_CHECK_CLOSE_FRACTION(parallelHits[i]->score, singleHits[i]->score, 0.001); DocumentPtr docParallel = parallel->doc(parallelHits[i]->doc); DocumentPtr docSingle = single->doc(singleHits[i]->doc); BOOST_CHECK_EQUAL(docParallel->get(L"f1"), docSingle->get(L"f1")); BOOST_CHECK_EQUAL(docParallel->get(L"f2"), docSingle->get(L"f2")); BOOST_CHECK_EQUAL(docParallel->get(L"f3"), docSingle->get(L"f3")); BOOST_CHECK_EQUAL(docParallel->get(L"f4"), docSingle->get(L"f4")); } } }; BOOST_FIXTURE_TEST_SUITE(ParallelReaderTest, ParallelReaderTestFixture) BOOST_AUTO_TEST_CASE(testQueries) { queryTest(newLucene(newLucene(L"f1", L"v1"))); queryTest(newLucene(newLucene(L"f2", L"v1"))); queryTest(newLucene(newLucene(L"f2", L"v2"))); queryTest(newLucene(newLucene(L"f3", L"v1"))); queryTest(newLucene(newLucene(L"f3", L"v2"))); queryTest(newLucene(newLucene(L"f4", L"v1"))); queryTest(newLucene(newLucene(L"f4", L"v2"))); BooleanQueryPtr bq1 = newLucene(); bq1->add(newLucene(newLucene(L"f1", L"v1")), BooleanClause::MUST); bq1->add(newLucene(newLucene(L"f4", L"v1")), BooleanClause::MUST); queryTest(bq1); } BOOST_AUTO_TEST_CASE(testFieldNames) { DirectoryPtr dir1 = getDir1(); DirectoryPtr dir2 = getDir2(); ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); HashSet fieldNames = pr->getFieldNames(IndexReader::FIELD_OPTION_ALL); BOOST_CHECK_EQUAL(4, fieldNames.size()); BOOST_CHECK(fieldNames.contains(L"f1")); BOOST_CHECK(fieldNames.contains(L"f2")); BOOST_CHECK(fieldNames.contains(L"f3")); BOOST_CHECK(fieldNames.contains(L"f4")); } BOOST_AUTO_TEST_CASE(testDocument) { DirectoryPtr dir1 = getDir1(); DirectoryPtr dir2 = getDir2(); ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); Collection fields1 = newCollection(L"f1"); Collection fields2 = newCollection(L"f4"); Collection fields3 = newCollection(L"f2", L"f3"); DocumentPtr doc11 = pr->document(0, newLucene(fields1)); DocumentPtr doc24 = pr->document(1, newLucene(fields2)); DocumentPtr doc223 = pr->document(1, newLucene(fields3)); BOOST_CHECK_EQUAL(1, doc11->getFields().size()); BOOST_CHECK_EQUAL(1, doc24->getFields().size()); BOOST_CHECK_EQUAL(2, doc223->getFields().size()); BOOST_CHECK_EQUAL(L"v1", doc11->get(L"f1")); BOOST_CHECK_EQUAL(L"v2", doc24->get(L"f4")); BOOST_CHECK_EQUAL(L"v2", doc223->get(L"f2")); BOOST_CHECK_EQUAL(L"v2", doc223->get(L"f3")); } BOOST_AUTO_TEST_CASE(testIncompatibleIndexes) { // two documents DirectoryPtr dir1 = getDir1(); // one document only DirectoryPtr dir2 = newLucene(); IndexWriterPtr w2 = newLucene(dir2, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d3 = newLucene(); d3->add(newLucene(L"f3", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); w2->addDocument(d3); w2->close(); ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir1, false)); BOOST_CHECK_EXCEPTION(pr->add(IndexReader::open(dir2, false)), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); } BOOST_AUTO_TEST_CASE(testIsCurrent) { DirectoryPtr dir1 = getDir1(); DirectoryPtr dir2 = getDir2(); ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); BOOST_CHECK(pr->isCurrent()); IndexReaderPtr modifier = IndexReader::open(dir1, false); modifier->setNorm(0, L"f1", (uint8_t)100); modifier->close(); // one of the two IndexReaders which ParallelReader is using is not current anymore BOOST_CHECK(!pr->isCurrent()); modifier = IndexReader::open(dir2, false); modifier->setNorm(0, L"f3", (uint8_t)100); modifier->close(); // now both are not current anymore BOOST_CHECK(!pr->isCurrent()); } BOOST_AUTO_TEST_CASE(testIsOptimized) { DirectoryPtr dir1 = getDir1(); DirectoryPtr dir2 = getDir2(); // add another document to ensure that the indexes are not optimized IndexWriterPtr modifier = newLucene(dir1, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d = newLucene(); d->add(newLucene(L"f1", L"v1", Field::STORE_YES, Field::INDEX_ANALYZED)); modifier->addDocument(d); modifier->close(); modifier = newLucene(dir2, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); d = newLucene(); d->add(newLucene(L"f2", L"v2", Field::STORE_YES, Field::INDEX_ANALYZED)); modifier->addDocument(d); modifier->close(); ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); BOOST_CHECK(!pr->isOptimized()); pr->close(); modifier = newLucene(dir1, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); modifier->optimize(); modifier->close(); pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); // just one of the two indexes are optimized BOOST_CHECK(!pr->isOptimized()); pr->close(); modifier = newLucene(dir2, newLucene(LuceneVersion::LUCENE_CURRENT), IndexWriter::MaxFieldLengthLIMITED); modifier->optimize(); modifier->close(); pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); // now both indexes are optimized BOOST_CHECK(pr->isOptimized()); pr->close(); } BOOST_AUTO_TEST_CASE(testAllTermDocs) { DirectoryPtr dir1 = getDir1(); DirectoryPtr dir2 = getDir2(); ParallelReaderPtr pr = newLucene(); pr->add(IndexReader::open(dir1, false)); pr->add(IndexReader::open(dir2, false)); int32_t NUM_DOCS = 2; TermDocsPtr td = pr->termDocs(TermPtr()); for (int32_t i = 0; i < NUM_DOCS; ++i) { BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(i, td->doc()); BOOST_CHECK_EQUAL(1, td->freq()); } td->close(); pr->close(); dir1->close(); dir2->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/ParallelTermEnumTest.cpp000066400000000000000000000136621217574114600247550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexReader.h" #include "Document.h" #include "Field.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "TermDocs.h" #include "TermEnum.h" #include "ParallelReader.h" #include "Term.h" using namespace Lucene; class ParallelTermEnumTestFixture : public LuceneTestFixture { public: ParallelTermEnumTestFixture() { RAMDirectoryPtr rd1 = newLucene(); IndexWriterPtr iw1 = newLucene(rd1, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field1", L"the quick brown fox jumps", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"field2", L"the quick brown fox jumps", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"field4", L"", Field::STORE_NO, Field::INDEX_ANALYZED)); iw1->addDocument(doc); iw1->close(); RAMDirectoryPtr rd2 = newLucene(); IndexWriterPtr iw2 = newLucene(rd2, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); doc = newLucene(); doc->add(newLucene(L"field0", L"", Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"field1", L"the fox jumps over the lazy dog", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"field3", L"the fox jumps over the lazy dog", Field::STORE_YES, Field::INDEX_ANALYZED)); iw2->addDocument(doc); iw2->close(); this->ir1 = IndexReader::open(rd1, true); this->ir2 = IndexReader::open(rd2, true); } virtual ~ParallelTermEnumTestFixture() { ir1->close(); ir2->close(); } public: IndexReaderPtr ir1; IndexReaderPtr ir2; }; BOOST_FIXTURE_TEST_SUITE(ParallelTermEnumTest, ParallelTermEnumTestFixture) BOOST_AUTO_TEST_CASE(testParallelTermEnum) { ParallelReaderPtr pr = newLucene(); pr->add(ir1); pr->add(ir2); TermDocsPtr td = pr->termDocs(); TermEnumPtr te = pr->terms(); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field1:brown", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field1:fox", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field1:jumps", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field1:quick", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field1:the", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field2:brown", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field2:fox", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field2:jumps", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field2:quick", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field2:the", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field3:dog", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field3:fox", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field3:jumps", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field3:lazy", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field3:over", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(te->next()); BOOST_CHECK_EQUAL(L"field3:the", te->term()->toString()); td->seek(te->term()); BOOST_CHECK(td->next()); BOOST_CHECK_EQUAL(0, td->doc()); BOOST_CHECK(!td->next()); BOOST_CHECK(!te->next()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/PayloadsTest.cpp000066400000000000000000000473061217574114600233220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "Payload.h" #include "RAMDirectory.h" #include "Analyzer.h" #include "WhitespaceTokenizer.h" #include "TokenFilter.h" #include "TokenStream.h" #include "PayloadAttribute.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "SegmentReader.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "FSDirectory.h" #include "Term.h" #include "TermPositions.h" #include "TermAttribute.h" #include "WhitespaceAnalyzer.h" #include "TermEnum.h" #include "Base64.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PayloadsTest, LuceneTestFixture) DECLARE_SHARED_PTR(PayloadData) DECLARE_SHARED_PTR(PayloadFilter) DECLARE_SHARED_PTR(PayloadAnalyzer) class PayloadData : public LuceneObject { public: PayloadData(int32_t skip, ByteArray data, int32_t offset, int32_t length) { this->numFieldInstancesToSkip = skip; this->data = data; this->offset = offset; this->length = length; } virtual ~PayloadData() { } LUCENE_CLASS(PayloadData); public: ByteArray data; int32_t offset; int32_t length; int32_t numFieldInstancesToSkip; }; /// This Filter adds payloads to the tokens. class PayloadFilter : public TokenFilter { public: PayloadFilter(TokenStreamPtr in, ByteArray data, int32_t offset, int32_t length) : TokenFilter(in) { this->payload = newLucene(); this->data = data; this->length = length; this->offset = offset; this->payloadAtt = addAttribute(); } virtual ~PayloadFilter() { } LUCENE_CLASS(PayloadFilter); public: ByteArray data; int32_t length; int32_t offset; PayloadPtr payload; PayloadAttributePtr payloadAtt; public: virtual bool incrementToken() { bool hasNext = input->incrementToken(); if (hasNext) { if (offset + length <= data.size()) { PayloadPtr p = newLucene(); payloadAtt->setPayload(p); p->setData(data, offset, length); offset += length; } else payloadAtt->setPayload(PayloadPtr()); } return hasNext; } }; /// This Analyzer uses an WhitespaceTokenizer and PayloadFilter. class PayloadAnalyzer : public Analyzer { public: PayloadAnalyzer() { fieldToData = HashMap::newInstance(); } virtual ~PayloadAnalyzer() { } LUCENE_CLASS(PayloadAnalyzer); public: HashMap fieldToData; public: void setPayloadData(const String& field, ByteArray data, int32_t offset, int32_t length) { fieldToData.put(field, newLucene(0, data, offset, length)); } void setPayloadData(const String& field, int32_t numFieldInstancesToSkip, ByteArray data, int32_t offset, int32_t length) { fieldToData.put(field, newLucene(numFieldInstancesToSkip, data, offset, length)); } virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { PayloadDataPtr payload = fieldToData.get(fieldName); TokenStreamPtr ts = newLucene(reader); if (payload) { if (payload->numFieldInstancesToSkip == 0) ts = newLucene(ts, payload->data, payload->offset, payload->length); else --payload->numFieldInstancesToSkip; } return ts; } }; static void generateRandomData(ByteArray data) { std::generate(data.get(), data.get() + data.size(), rand); } static ByteArray generateRandomData(int32_t n) { ByteArray data(ByteArray::newInstance(n)); generateRandomData(data); return data; } static Collection generateTerms(const String& fieldName, int32_t n) { int32_t maxDigits = (int32_t)(std::log((double)n) / std::log(10.0)); Collection terms = Collection::newInstance(n); for (int32_t i = 0; i < n; ++i) { StringStream sb; sb << L"t"; int32_t zeros = maxDigits - (int32_t)(std::log((double)i) / std::log(10.0)); for (int32_t j = 0; j < zeros; ++j) sb << L"0"; sb << i; terms[i] = newLucene(fieldName, sb.str()); } return terms; } /// Simple tests to test the Payload class BOOST_AUTO_TEST_CASE(testPayload) { ByteArray testData(ByteArray::newInstance(15)); uint8_t input[15] = { 'T', 'h', 'i', 's', ' ', 'i', 's', ' ', 'a', ' ', 't', 'e', 's', 't', '!' }; std::memcpy(testData.get(), input, 15); PayloadPtr payload = newLucene(testData); BOOST_CHECK_EQUAL(testData.size(), payload->length()); // test copyTo() ByteArray target(ByteArray::newInstance(testData.size() - 1)); BOOST_CHECK_EXCEPTION(payload->copyTo(target, 0), IndexOutOfBoundsException, check_exception(LuceneException::IndexOutOfBounds)); target.resize(testData.size() + 3); payload->copyTo(target, 3); for (int32_t i = 0; i < testData.size(); ++i) BOOST_CHECK_EQUAL(testData[i], target[i + 3]); // test toByteArray() target = payload->toByteArray(); BOOST_CHECK(testData.equals(target)); // test byteAt() for (int32_t i = 0; i < testData.size(); ++i) BOOST_CHECK_EQUAL(payload->byteAt(i), testData[i]); BOOST_CHECK_EXCEPTION(payload->byteAt(testData.size() + 1), IndexOutOfBoundsException, check_exception(LuceneException::IndexOutOfBounds)); PayloadPtr clone = boost::dynamic_pointer_cast(payload->clone()); BOOST_CHECK_EQUAL(payload->length(), clone->length()); for (int32_t i = 0; i < payload->length(); ++i) BOOST_CHECK_EQUAL(payload->byteAt(i), clone->byteAt(i)); } /// Tests whether the DocumentWriter and SegmentMerger correctly enable the payload bit in the FieldInfo BOOST_AUTO_TEST_CASE(testPayloadFieldBit) { DirectoryPtr ram = newLucene(); PayloadAnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d = newLucene(); // this field won't have any payloads d->add(newLucene(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d->add(newLucene(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d->add(newLucene(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED)); // only add payload data for field f2 ByteArray someData(ByteArray::newInstance(8)); uint8_t input[8] = { 's', 'o', 'm', 'e', 'd', 'a', 't', 'a' }; std::memcpy(someData.get(), input, 8); analyzer->setPayloadData(L"f2", 1, someData, 0, 1); writer->addDocument(d); // flush writer->close(); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(ram); FieldInfosPtr fi = reader->fieldInfos(); BOOST_CHECK(!fi->fieldInfo(L"f1")->storePayloads); BOOST_CHECK(fi->fieldInfo(L"f2")->storePayloads); BOOST_CHECK(!fi->fieldInfo(L"f3")->storePayloads); reader->close(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field writer = newLucene(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); d = newLucene(); d->add(newLucene(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED)); // add payload data for field f2 and f3 analyzer->setPayloadData(L"f2", someData, 0, 1); analyzer->setPayloadData(L"f3", someData, 0, 3); writer->addDocument(d); // force merge writer->optimize(); // flush writer->close(); reader = SegmentReader::getOnlySegmentReader(ram); fi = reader->fieldInfos(); BOOST_CHECK(!fi->fieldInfo(L"f1")->storePayloads); BOOST_CHECK(fi->fieldInfo(L"f2")->storePayloads); BOOST_CHECK(fi->fieldInfo(L"f3")->storePayloads); reader->close(); } /// Builds an index with payloads in the given Directory and performs different /// tests to verify the payload encoding static void encodingTest(DirectoryPtr dir) { PayloadAnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); // should be in sync with value in TermInfosWriter int32_t skipInterval = 16; int32_t numTerms = 5; String fieldName = L"f1"; int32_t numDocs = skipInterval + 1; // create content for the test documents with just a few terms Collection terms = generateTerms(fieldName, numTerms); StringStream sb; for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) sb << (*term)->text() << L" "; String content = sb.str(); int32_t payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; ByteArray payloadData = generateRandomData(payloadDataLength); DocumentPtr d = newLucene(); d->add(newLucene(fieldName, content, Field::STORE_NO, Field::INDEX_ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int32_t offset = 0; for (int32_t i = 0; i < 2 * numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer->addDocument(d); } // make sure we create more than one segment to test merging writer->commit(); for (int32_t i = 0; i < numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer->addDocument(d); } writer->optimize(); // flush writer->close(); // Verify the index IndexReaderPtr reader = IndexReader::open(dir, true); ByteArray verifyPayloadData(ByteArray::newInstance(payloadDataLength)); offset = 0; Collection tps = Collection::newInstance(numTerms); for (int32_t i = 0; i < numTerms; ++i) tps[i] = reader->termPositions(terms[i]); while (tps[0]->next()) { for (int32_t i = 1; i < numTerms; ++i) tps[i]->next(); int32_t freq = tps[0]->freq(); for (int32_t i = 0; i < freq; ++i) { for (int32_t j = 0; j < numTerms; ++j) { tps[j]->nextPosition(); tps[j]->getPayload(verifyPayloadData, offset); offset += tps[j]->getPayloadLength(); } } } for (int32_t i = 0; i < numTerms; ++i) tps[i]->close(); BOOST_CHECK(payloadData.equals(verifyPayloadData)); // test lazy skipping TermPositionsPtr tp = reader->termPositions(terms[0]); tp->next(); tp->nextPosition(); // now we don't read this payload tp->nextPosition(); BOOST_CHECK_EQUAL(1, tp->getPayloadLength()); ByteArray payload = tp->getPayload(ByteArray(), 0); BOOST_CHECK_EQUAL(payload[0], payloadData[numTerms]); tp->nextPosition(); // we don't read this payload and skip to a different document tp->skipTo(5); tp->nextPosition(); BOOST_CHECK_EQUAL(1, tp->getPayloadLength()); payload = tp->getPayload(ByteArray(), 0); BOOST_CHECK_EQUAL(payload[0], payloadData[5 * numTerms]); // Test different lengths at skip points tp->seek(terms[1]); tp->next(); tp->nextPosition(); BOOST_CHECK_EQUAL(1, tp->getPayloadLength()); tp->skipTo(skipInterval - 1); tp->nextPosition(); BOOST_CHECK_EQUAL(1, tp->getPayloadLength()); tp->skipTo(2 * skipInterval - 1); tp->nextPosition(); BOOST_CHECK_EQUAL(1, tp->getPayloadLength()); tp->skipTo(3 * skipInterval - 1); tp->nextPosition(); BOOST_CHECK_EQUAL(3 * skipInterval - 2 * numDocs - 1, tp->getPayloadLength()); // Test multiple call of getPayload() tp->getPayload(ByteArray(), 0); // it is forbidden to call getPayload() more than once without calling nextPosition() BOOST_CHECK_EXCEPTION(tp->getPayload(ByteArray(), 0), IOException, check_exception(LuceneException::IO)); reader->close(); // test long payload analyzer = newLucene(); writer = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); String singleTerm = L"lucene"; d = newLucene(); d->add(newLucene(fieldName, singleTerm, Field::STORE_NO, Field::INDEX_ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = generateRandomData(2000); analyzer->setPayloadData(fieldName, payloadData, 100, 1500); writer->addDocument(d); writer->optimize(); // flush writer->close(); reader = IndexReader::open(dir, true); tp = reader->termPositions(newLucene(fieldName, singleTerm)); tp->next(); tp->nextPosition(); verifyPayloadData.resize(tp->getPayloadLength()); tp->getPayload(verifyPayloadData, 0); ByteArray portion(ByteArray::newInstance(1500)); MiscUtils::arrayCopy(payloadData.get(), 100, portion.get(), 0, 1500); BOOST_CHECK(portion.equals(verifyPayloadData)); reader->close(); } /// Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory BOOST_AUTO_TEST_CASE(testPayloadsEncoding) { // first perform the test using a RAMDirectory DirectoryPtr dir = newLucene(); encodingTest(dir); // now use a FSDirectory and repeat same test String dirName(FileUtils::joinPath(getTempDir(), L"test_payloads")); dir = FSDirectory::open(dirName); encodingTest(dir); FileUtils::removeDirectory(dirName); } namespace TestThreadSafety { DECLARE_SHARED_PTR(ByteArrayPool) class ByteArrayPool : public LuceneObject { public: ByteArrayPool(int32_t capacity, int32_t size) { pool = Collection::newInstance(); for (int32_t i = 0; i < capacity; ++i) pool.add(ByteArray::newInstance(size)); } virtual ~ByteArrayPool() { } LUCENE_CLASS(ByteArrayPool); public: Collection pool; public: String bytesToString(ByteArray bytes) { SyncLock syncLock(this); return Base64::encode(bytes); } ByteArray get() { SyncLock syncLock(this); return pool.removeFirst(); } void release(ByteArray b) { SyncLock syncLock(this); pool.add(b); } int32_t size() { SyncLock syncLock(this); return pool.size(); } }; class PoolingPayloadTokenStream : public TokenStream { public: PoolingPayloadTokenStream(ByteArrayPoolPtr pool) { this->pool = pool; payload = pool->get(); generateRandomData(payload); term = pool->bytesToString(payload); first = true; payloadAtt = addAttribute(); termAtt = addAttribute(); } virtual ~PoolingPayloadTokenStream() { } LUCENE_CLASS(PoolingPayloadTokenStream); public: ByteArray payload; bool first; ByteArrayPoolPtr pool; String term; TermAttributePtr termAtt; PayloadAttributePtr payloadAtt; public: virtual bool incrementToken() { if (!first) return false; first = false; clearAttributes(); termAtt->setTermBuffer(term); payloadAtt->setPayload(newLucene(payload)); return true; } virtual void close() { pool->release(payload); } }; class IngesterThread : public LuceneThread { public: IngesterThread(int32_t numDocs, ByteArrayPoolPtr pool, IndexWriterPtr writer) { this->numDocs = numDocs; this->pool = pool; this->writer = writer; } virtual ~IngesterThread() { } LUCENE_CLASS(IngesterThread); protected: int32_t numDocs; ByteArrayPoolPtr pool; IndexWriterPtr writer; public: virtual void run() { try { for (int32_t j = 0; j < numDocs; ++j) { DocumentPtr d = newLucene(); d->add(newLucene(L"test", newLucene(pool))); writer->addDocument(d); } } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; } BOOST_AUTO_TEST_CASE(testThreadSafety) { int32_t numThreads = 5; int32_t numDocs = 50; TestThreadSafety::ByteArrayPoolPtr pool = newLucene(numThreads, 5); DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); Collection ingesters = Collection::newInstance(numThreads); for (int32_t i = 0; i < numThreads; ++i) { ingesters[i] = newLucene(numDocs, pool, writer); ingesters[i]->start(); } for (int32_t i = 0; i < numThreads; ++i) ingesters[i]->join(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermEnumPtr terms = reader->terms(); while (terms->next()) { TermPositionsPtr tp = reader->termPositions(terms->term()); while (tp->next()) { int32_t freq = tp->freq(); for (int32_t i = 0; i < freq; ++i) { tp->nextPosition(); BOOST_CHECK_EQUAL(pool->bytesToString(tp->getPayload(ByteArray::newInstance(5), 0)), terms->term()->text()); } } tp->close(); } terms->close(); reader->close(); BOOST_CHECK_EQUAL(pool->size(), numThreads); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/PositionBasedTermVectorMapperTest.cpp000066400000000000000000000070561217574114600274670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "TermVectorOffsetInfo.h" #include "PositionBasedTermVectorMapper.h" #include "BitSet.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PositionBasedTermVectorMapperTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testPayload) { Collection tokens = newCollection(L"here", L"is", L"some", L"text", L"to", L"test", L"extra"); Collection< Collection > thePositions = Collection< Collection >::newInstance(tokens.size()); Collection< Collection > offsets = Collection< Collection >::newInstance(tokens.size()); int32_t numPositions = 0; // save off the last one so we can add it with the same positions as some of the others, but in a predictable way for (int32_t i = 0; i < tokens.size() - 1; ++i) { thePositions[i] = Collection::newInstance(2 * i + 1); // give 'em all some positions for (int32_t j = 0; j < thePositions[i].size(); ++j) thePositions[i][j] = numPositions++; offsets[i] = Collection::newInstance(thePositions[i].size()); for (int32_t j = 0; j < offsets[i].size(); ++j) offsets[i][j] = newLucene(j, j + 1); // the actual value here doesn't much matter } thePositions[tokens.size() - 1] = Collection::newInstance(1); thePositions[tokens.size() - 1][0] = 0; // put this at the same position as "here" offsets[tokens.size() - 1] = Collection::newInstance(1); offsets[tokens.size() - 1][0] = newLucene(0, 1); PositionBasedTermVectorMapperPtr mapper = newLucene(); mapper->setExpectations(L"test", tokens.size(), true, true); // Test single position for (int32_t i = 0; i < tokens.size(); ++i) { String token = tokens[i]; mapper->map(token, 1, Collection(), thePositions[i]); } MapStringMapIntTermVectorsPositionInfo map = mapper->getFieldToTerms(); BOOST_CHECK(map); BOOST_CHECK_EQUAL(map.size(), 1); MapIntTermVectorsPositionInfo positions = map.get(L"test"); BOOST_CHECK(positions); BOOST_CHECK_EQUAL(positions.size(), numPositions); BitSetPtr bits = newLucene(numPositions); for (MapIntTermVectorsPositionInfo::iterator entry = positions.begin(); entry != positions.end(); ++entry) { BOOST_CHECK(entry->second); int32_t pos = entry->first; bits->set(pos); BOOST_CHECK_EQUAL(entry->second->getPosition(), pos); BOOST_CHECK(entry->second->getOffsets()); if (pos == 0) { BOOST_CHECK_EQUAL(entry->second->getTerms().size(), 2); // need a test for multiple terms at one pos BOOST_CHECK_EQUAL(entry->second->getOffsets().size(), 2); } else { BOOST_CHECK_EQUAL(entry->second->getTerms().size(), 1); // need a test for multiple terms at one pos BOOST_CHECK_EQUAL(entry->second->getOffsets().size(), 1); } } BOOST_CHECK_EQUAL(bits->cardinality(), numPositions); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/SegmentMergerTest.cpp000066400000000000000000000122101217574114600242740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "DocHelper.h" #include "RAMDirectory.h" #include "Document.h" #include "SegmentReader.h" #include "SegmentInfo.h" #include "IndexReader.h" #include "SegmentMerger.h" #include "TermDocs.h" #include "Term.h" #include "TermFreqVector.h" #include "Field.h" #include "DefaultSimilarity.h" #include "TermPositionVector.h" using namespace Lucene; class SegmentMergerTestFixture : public LuceneTestFixture, public DocHelper { public: SegmentMergerTestFixture() { mergedDir = newLucene(); mergedSegment = L"test"; merge1Dir = newLucene(); doc1 = newLucene(); merge2Dir = newLucene(); doc2 = newLucene(); DocHelper::setupDoc(doc1); SegmentInfoPtr info1 = DocHelper::writeDoc(merge1Dir, doc1); DocHelper::setupDoc(doc2); SegmentInfoPtr info2 = DocHelper::writeDoc(merge2Dir, doc2); reader1 = SegmentReader::get(true, info1, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); reader2 = SegmentReader::get(true, info2, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); } virtual ~SegmentMergerTestFixture() { } protected: // The variables for the new merged segment DirectoryPtr mergedDir; String mergedSegment; // First segment to be merged DirectoryPtr merge1Dir; DocumentPtr doc1; SegmentReaderPtr reader1; // Second Segment to be merged DirectoryPtr merge2Dir; DocumentPtr doc2; SegmentReaderPtr reader2; public: void checkNorms(IndexReaderPtr reader) { // test omit norms for (int32_t i = 0; i < DocHelper::fields.size(); ++i) { FieldPtr f = DocHelper::fields[i]; if (f->isIndexed()) { BOOST_CHECK_EQUAL(reader->hasNorms(f->name()), !f->getOmitNorms()); BOOST_CHECK_EQUAL(reader->hasNorms(f->name()), !DocHelper::noNorms.contains(f->name())); if (!reader->hasNorms(f->name())) { // test for fake norms of 1.0 or null depending on the flag ByteArray norms = reader->norms(f->name()); uint8_t norm1 = DefaultSimilarity::encodeNorm(1.0); BOOST_CHECK(!norms); norms.resize(reader->maxDoc()); reader->norms(f->name(), norms, 0); for (int32_t j = 0; j < reader->maxDoc(); ++j) BOOST_CHECK_EQUAL(norms[j], norm1); } } } } }; BOOST_FIXTURE_TEST_SUITE(SegmentMergerTest, SegmentMergerTestFixture) BOOST_AUTO_TEST_CASE(testMerge) { SegmentMergerPtr merger = newLucene(mergedDir, mergedSegment); merger->add(reader1); merger->add(reader2); int32_t docsMerged = merger->merge(); merger->closeReaders(); BOOST_CHECK_EQUAL(docsMerged, 2); // Should be able to open a new SegmentReader against the new directory SegmentReaderPtr mergedReader = SegmentReader::get(true, newLucene(mergedSegment, docsMerged, mergedDir, false, true), IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); BOOST_CHECK(mergedReader); BOOST_CHECK_EQUAL(mergedReader->numDocs(), 2); DocumentPtr newDoc1 = mergedReader->document(0); BOOST_CHECK(newDoc1); // There are 2 unstored fields on the document BOOST_CHECK_EQUAL(DocHelper::numFields(newDoc1), DocHelper::numFields(doc1) - DocHelper::unstored.size()); DocumentPtr newDoc2 = mergedReader->document(1); BOOST_CHECK(newDoc2); BOOST_CHECK_EQUAL(DocHelper::numFields(newDoc2), DocHelper::numFields(doc2) - DocHelper::unstored.size()); TermDocsPtr termDocs = mergedReader->termDocs(newLucene(DocHelper::TEXT_FIELD_2_KEY, L"field")); BOOST_CHECK(termDocs); BOOST_CHECK(termDocs->next()); HashSet stored = mergedReader->getFieldNames(IndexReader::FIELD_OPTION_INDEXED_WITH_TERMVECTOR); BOOST_CHECK(stored); BOOST_CHECK_EQUAL(stored.size(), 3); TermFreqVectorPtr vector = mergedReader->getTermFreqVector(0, DocHelper::TEXT_FIELD_2_KEY); BOOST_CHECK(vector); Collection terms = vector->getTerms(); BOOST_CHECK(terms); BOOST_CHECK_EQUAL(terms.size(), 3); Collection freqs = vector->getTermFrequencies(); BOOST_CHECK(freqs); BOOST_CHECK(boost::dynamic_pointer_cast(vector)); for (int32_t i = 0; i < terms.size(); ++i) { String term = terms[i]; int32_t freq = freqs[i]; BOOST_CHECK(String(DocHelper::FIELD_2_TEXT).find(term) != String::npos); BOOST_CHECK_EQUAL(DocHelper::FIELD_2_FREQS[i], freq); } checkNorms(mergedReader); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/SegmentReaderTest.cpp000066400000000000000000000153371217574114600242720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "DocHelper.h" #include "RAMDirectory.h" #include "Document.h" #include "SegmentReader.h" #include "SegmentInfo.h" #include "Field.h" #include "TermEnum.h" #include "Term.h" #include "TermDocs.h" #include "TermPositions.h" #include "DefaultSimilarity.h" #include "TermFreqVector.h" using namespace Lucene; class SegmentReaderTestFixture : public LuceneTestFixture, public DocHelper { public: SegmentReaderTestFixture() { dir = newLucene(); testDoc = newLucene(); DocHelper::setupDoc(testDoc); SegmentInfoPtr info = DocHelper::writeDoc(dir, testDoc); reader = SegmentReader::get(true, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); } virtual ~SegmentReaderTestFixture() { } protected: RAMDirectoryPtr dir; DocumentPtr testDoc; SegmentReaderPtr reader; }; BOOST_FIXTURE_TEST_SUITE(SegmentReaderTest, SegmentReaderTestFixture) BOOST_AUTO_TEST_CASE(testSegmentReader) { BOOST_CHECK(dir); BOOST_CHECK(reader); BOOST_CHECK(DocHelper::nameValues.size() > 0); BOOST_CHECK_EQUAL(DocHelper::numFields(testDoc), DocHelper::all.size()); } BOOST_AUTO_TEST_CASE(testDocument) { BOOST_CHECK_EQUAL(reader->numDocs(), 1); BOOST_CHECK(reader->maxDoc() >= 1); DocumentPtr result = reader->document(0); BOOST_CHECK(result); // There are 2 unstored fields on the document that are not preserved across writing BOOST_CHECK_EQUAL(DocHelper::numFields(result), DocHelper::numFields(testDoc) - DocHelper::unstored.size()); Collection fields = result->getFields(); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { BOOST_CHECK(*field); BOOST_CHECK(DocHelper::nameValues.contains((*field)->name())); } } BOOST_AUTO_TEST_CASE(testDelete) { DocumentPtr docToDelete = newLucene(); DocHelper::setupDoc(docToDelete); SegmentInfoPtr info = DocHelper::writeDoc(dir, docToDelete); SegmentReaderPtr deleteReader = SegmentReader::get(false, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); BOOST_CHECK(deleteReader); BOOST_CHECK_EQUAL(deleteReader->numDocs(), 1); deleteReader->deleteDocument(0); BOOST_CHECK(deleteReader->isDeleted(0)); BOOST_CHECK(deleteReader->hasDeletions()); BOOST_CHECK_EQUAL(deleteReader->numDocs(), 0); } BOOST_AUTO_TEST_CASE(testGetFieldNameVariations) { HashSet result = reader->getFieldNames(IndexReader::FIELD_OPTION_ALL); BOOST_CHECK(result); BOOST_CHECK_EQUAL(result.size(), DocHelper::all.size()); for (HashSet::iterator field = result.begin(); field != result.end(); ++field) BOOST_CHECK(DocHelper::nameValues.contains(*field) || field->empty()); result = reader->getFieldNames(IndexReader::FIELD_OPTION_INDEXED); BOOST_CHECK(result); BOOST_CHECK_EQUAL(result.size(), DocHelper::indexed.size()); for (HashSet::iterator field = result.begin(); field != result.end(); ++field) BOOST_CHECK(DocHelper::indexed.contains(*field) || field->empty()); result = reader->getFieldNames(IndexReader::FIELD_OPTION_UNINDEXED); BOOST_CHECK(result); BOOST_CHECK_EQUAL(result.size(), DocHelper::unindexed.size()); // Get all indexed fields that are storing term vectors result = reader->getFieldNames(IndexReader::FIELD_OPTION_INDEXED_WITH_TERMVECTOR); BOOST_CHECK(result); BOOST_CHECK_EQUAL(result.size(), DocHelper::termvector.size()); result = reader->getFieldNames(IndexReader::FIELD_OPTION_INDEXED_NO_TERMVECTOR); BOOST_CHECK(result); BOOST_CHECK_EQUAL(result.size(), DocHelper::notermvector.size()); } BOOST_AUTO_TEST_CASE(testTerms) { TermEnumPtr terms = reader->terms(); BOOST_CHECK(terms); while (terms->next()) { TermPtr term = terms->term(); BOOST_CHECK(term); String fieldValue = DocHelper::nameValues.get(term->field()); BOOST_CHECK_NE(fieldValue.find(term->text()), -1); } TermDocsPtr termDocs = reader->termDocs(); BOOST_CHECK(termDocs); termDocs->seek(newLucene(DocHelper::TEXT_FIELD_1_KEY, L"field")); BOOST_CHECK(termDocs->next()); termDocs->seek(newLucene(DocHelper::NO_NORMS_KEY, DocHelper::NO_NORMS_TEXT)); BOOST_CHECK(termDocs->next()); TermPositionsPtr positions = reader->termPositions(); positions->seek(newLucene(DocHelper::TEXT_FIELD_1_KEY, L"field")); BOOST_CHECK(positions); BOOST_CHECK_EQUAL(positions->doc(), 0); BOOST_CHECK(positions->nextPosition() >= 0); } BOOST_AUTO_TEST_CASE(testNorms) { // test omit norms for (int32_t i = 0; i < DocHelper::fields.size(); ++i) { FieldPtr f = DocHelper::fields[i]; if (f->isIndexed()) { bool a = reader->hasNorms(f->name()); bool b = !f->getOmitNorms(); BOOST_CHECK_EQUAL(reader->hasNorms(f->name()), !f->getOmitNorms()); BOOST_CHECK_EQUAL(reader->hasNorms(f->name()), !DocHelper::noNorms.contains(f->name())); if (!reader->hasNorms(f->name())) { // test for fake norms of 1.0 or null depending on the flag ByteArray norms = reader->norms(f->name()); uint8_t norm1 = DefaultSimilarity::encodeNorm(1.0); BOOST_CHECK(!norms); norms.resize(reader->maxDoc()); reader->norms(f->name(), norms, 0); for (int32_t j = 0; j < reader->maxDoc(); ++j) BOOST_CHECK_EQUAL(norms[j], norm1); } } } } BOOST_AUTO_TEST_CASE(testTermVectors) { TermFreqVectorPtr result = reader->getTermFreqVector(0, DocHelper::TEXT_FIELD_2_KEY); BOOST_CHECK(result); Collection terms = result->getTerms(); Collection freqs = result->getTermFrequencies(); BOOST_CHECK(terms); BOOST_CHECK_EQUAL(terms.size(), 3); BOOST_CHECK(freqs); BOOST_CHECK_EQUAL(freqs.size(), 3); for (int32_t i = 0; i < terms.size(); ++i) { String term = terms[i]; int32_t freq = freqs[i]; BOOST_CHECK_NE(String(DocHelper::FIELD_2_TEXT).find(term), -1); BOOST_CHECK(freq > 0); } Collection results = reader->getTermFreqVectors(0); BOOST_CHECK(results); BOOST_CHECK_EQUAL(results.size(), 3); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/SegmentTermDocsTest.cpp000066400000000000000000000202301217574114600245740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "DocHelper.h" #include "Document.h" #include "MockRAMDirectory.h" #include "SegmentInfo.h" #include "SegmentReader.h" #include "SegmentTermDocs.h" #include "Term.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "IndexReader.h" #include "TermDocs.h" #include "Field.h" using namespace Lucene; class SegmentTermDocsTestFixture : public LuceneTestFixture, public DocHelper { public: SegmentTermDocsTestFixture() { testDoc = newLucene(); dir = newLucene(); DocHelper::setupDoc(testDoc); info = DocHelper::writeDoc(dir, testDoc); } virtual ~SegmentTermDocsTestFixture() { } protected: DocumentPtr testDoc; DirectoryPtr dir; SegmentInfoPtr info; public: void checkTermDocs(int32_t indexDivisor) { // After adding the document, we should be able to read it back in SegmentReaderPtr reader = SegmentReader::get(true, info, indexDivisor); BOOST_CHECK(reader); BOOST_CHECK_EQUAL(indexDivisor, reader->getTermInfosIndexDivisor()); SegmentTermDocsPtr segTermDocs = newLucene(reader); BOOST_CHECK(segTermDocs); segTermDocs->seek(newLucene(DocHelper::TEXT_FIELD_2_KEY, L"field")); if (segTermDocs->next()) { int32_t docId = segTermDocs->doc(); BOOST_CHECK_EQUAL(docId, 0); int32_t freq = segTermDocs->freq(); BOOST_CHECK_EQUAL(freq, 3); } reader->close(); } void checkBadSeek(int32_t indexDivisor) { { // After adding the document, we should be able to read it back in SegmentReaderPtr reader = SegmentReader::get(true, info, indexDivisor); BOOST_CHECK(reader); SegmentTermDocsPtr segTermDocs = newLucene(reader); BOOST_CHECK(segTermDocs); segTermDocs->seek(newLucene(L"textField2", L"bad")); BOOST_CHECK(!segTermDocs->next()); reader->close(); } { // After adding the document, we should be able to read it back in SegmentReaderPtr reader = SegmentReader::get(true, info, indexDivisor); BOOST_CHECK(reader); SegmentTermDocsPtr segTermDocs = newLucene(reader); BOOST_CHECK(segTermDocs); segTermDocs->seek(newLucene(L"junk", L"bad")); BOOST_CHECK(!segTermDocs->next()); reader->close(); } } void checkSkipTo(int32_t indexDivisor) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); TermPtr ta = newLucene(L"content", L"aaa"); for (int32_t i = 0; i < 10; ++i) addDoc(writer, L"aaa aaa aaa aaa"); TermPtr tb = newLucene(L"content", L"bbb"); for (int32_t i = 0; i < 16; ++i) addDoc(writer, L"bbb bbb bbb bbb"); TermPtr tc = newLucene(L"content", L"ccc"); for (int32_t i = 0; i < 50; ++i) addDoc(writer, L"ccc ccc ccc ccc"); // assure that we deal with a single segment writer->optimize(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, IndexDeletionPolicyPtr(), true, indexDivisor); TermDocsPtr tdocs = reader->termDocs(); // without optimization (assumption skipInterval == 16) // with next tdocs->seek(ta); BOOST_CHECK(tdocs->next()); BOOST_CHECK_EQUAL(0, tdocs->doc()); BOOST_CHECK_EQUAL(4, tdocs->freq()); BOOST_CHECK(tdocs->next()); BOOST_CHECK_EQUAL(1, tdocs->doc()); BOOST_CHECK_EQUAL(4, tdocs->freq()); BOOST_CHECK(tdocs->skipTo(0)); BOOST_CHECK_EQUAL(2, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(4)); BOOST_CHECK_EQUAL(4, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(9)); BOOST_CHECK_EQUAL(9, tdocs->doc()); BOOST_CHECK(!tdocs->skipTo(10)); // without next tdocs->seek(ta); BOOST_CHECK(tdocs->skipTo(0)); BOOST_CHECK_EQUAL(0, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(4)); BOOST_CHECK_EQUAL(4, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(9)); BOOST_CHECK_EQUAL(9, tdocs->doc()); BOOST_CHECK(!tdocs->skipTo(10)); // exactly skipInterval documents and therefore with optimization // with next tdocs->seek(tb); BOOST_CHECK(tdocs->next()); BOOST_CHECK_EQUAL(10, tdocs->doc()); BOOST_CHECK_EQUAL(4, tdocs->freq()); BOOST_CHECK(tdocs->next()); BOOST_CHECK_EQUAL(11, tdocs->doc()); BOOST_CHECK_EQUAL(4, tdocs->freq()); BOOST_CHECK(tdocs->skipTo(5)); BOOST_CHECK_EQUAL(12, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(15)); BOOST_CHECK_EQUAL(15, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(24)); BOOST_CHECK_EQUAL(24, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(25)); BOOST_CHECK_EQUAL(25, tdocs->doc()); BOOST_CHECK(!tdocs->skipTo(26)); // without next tdocs->seek(tb); BOOST_CHECK(tdocs->skipTo(5)); BOOST_CHECK_EQUAL(10, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(15)); BOOST_CHECK_EQUAL(15, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(24)); BOOST_CHECK_EQUAL(24, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(25)); BOOST_CHECK_EQUAL(25, tdocs->doc()); BOOST_CHECK(!tdocs->skipTo(26)); // much more than skipInterval documents and therefore with optimization // with next tdocs->seek(tc); BOOST_CHECK(tdocs->next()); BOOST_CHECK_EQUAL(26, tdocs->doc()); BOOST_CHECK_EQUAL(4, tdocs->freq()); BOOST_CHECK(tdocs->next()); BOOST_CHECK_EQUAL(27, tdocs->doc()); BOOST_CHECK_EQUAL(4, tdocs->freq()); BOOST_CHECK(tdocs->skipTo(5)); BOOST_CHECK_EQUAL(28, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(40)); BOOST_CHECK_EQUAL(40, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(57)); BOOST_CHECK_EQUAL(57, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(74)); BOOST_CHECK_EQUAL(74, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(75)); BOOST_CHECK_EQUAL(75, tdocs->doc()); BOOST_CHECK(!tdocs->skipTo(76)); // without next tdocs->seek(tc); BOOST_CHECK(tdocs->skipTo(5)); BOOST_CHECK_EQUAL(26, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(40)); BOOST_CHECK_EQUAL(40, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(57)); BOOST_CHECK_EQUAL(57, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(74)); BOOST_CHECK_EQUAL(74, tdocs->doc()); BOOST_CHECK(tdocs->skipTo(75)); BOOST_CHECK_EQUAL(75, tdocs->doc()); BOOST_CHECK(!tdocs->skipTo(76)); tdocs->close(); reader->close(); dir->close(); } void addDoc(IndexWriterPtr writer, const String& value) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", value, Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } }; BOOST_FIXTURE_TEST_SUITE(SegmentTermDocsTest, SegmentTermDocsTestFixture) BOOST_AUTO_TEST_CASE(testTermDocs) { checkTermDocs(1); } BOOST_AUTO_TEST_CASE(testBadSeek) { checkBadSeek(1); } BOOST_AUTO_TEST_CASE(testSkipTo) { checkSkipTo(1); } BOOST_AUTO_TEST_CASE(testIndexDivisor) { dir = newLucene(); testDoc = newLucene(); DocHelper::setupDoc(testDoc); DocHelper::writeDoc(dir, testDoc); checkTermDocs(2); checkBadSeek(2); checkSkipTo(2); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/SegmentTermEnumTest.cpp000066400000000000000000000073511217574114600246210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexWriter.h" #include "MockRAMDirectory.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "TermEnum.h" #include "Term.h" #include "SegmentReader.h" #include "SegmentTermEnum.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SegmentTermEnumTest, LuceneTestFixture) static void addDoc(IndexWriterPtr writer, const String& value) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", value, Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } static void verifyDocFreq(DirectoryPtr dir) { IndexReaderPtr reader = IndexReader::open(dir, true); // create enumeration of all terms TermEnumPtr termEnum = reader->terms(); // go to the first term (aaa) termEnum->next(); // assert that term is 'aaa' BOOST_CHECK_EQUAL(L"aaa", termEnum->term()->text()); BOOST_CHECK_EQUAL(200, termEnum->docFreq()); // go to the second term (bbb) termEnum->next(); // assert that term is 'bbb' BOOST_CHECK_EQUAL(L"bbb", termEnum->term()->text()); BOOST_CHECK_EQUAL(100, termEnum->docFreq()); termEnum->close(); // create enumeration of terms after term 'aaa', including 'aaa' termEnum = reader->terms(newLucene(L"content", L"aaa")); // assert that term is 'aaa' BOOST_CHECK_EQUAL(L"aaa", termEnum->term()->text()); BOOST_CHECK_EQUAL(200, termEnum->docFreq()); // go to term 'bbb' termEnum->next(); // assert that term is 'bbb' BOOST_CHECK_EQUAL(L"bbb", termEnum->term()->text()); BOOST_CHECK_EQUAL(100, termEnum->docFreq()); termEnum->close(); } BOOST_AUTO_TEST_CASE(testTermEnum) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // ADD 100 documents with term : aaa // add 100 documents with terms: aaa bbb // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100 for (int32_t i = 0; i < 100; ++i) { addDoc(writer, L"aaa"); addDoc(writer, L"aaa bbb"); } writer->close(); // verify document frequency of terms in an unoptimized index verifyDocFreq(dir); // merge segments by optimizing the index writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); writer->optimize(); writer->close(); // verify document frequency of terms in an optimized index verifyDocFreq(dir); } BOOST_AUTO_TEST_CASE(testPrevTermAtEnd) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(writer, L"aaa bbb"); writer->close(); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(dir); SegmentTermEnumPtr termEnum = boost::dynamic_pointer_cast(reader->terms()); BOOST_CHECK(termEnum->next()); BOOST_CHECK_EQUAL(L"aaa", termEnum->term()->text()); BOOST_CHECK(termEnum->next()); BOOST_CHECK_EQUAL(L"aaa", termEnum->prev()->text()); BOOST_CHECK_EQUAL(L"bbb", termEnum->term()->text()); BOOST_CHECK(!termEnum->next()); BOOST_CHECK_EQUAL(L"bbb", termEnum->prev()->text()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/SnapshotDeletionPolicyTest.cpp000066400000000000000000000200141217574114600261740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FSDirectory.h" #include "MockRAMDirectory.h" #include "SnapshotDeletionPolicy.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "LuceneThread.h" #include "IndexFileDeleter.h" #include "IndexInput.h" #include "MiscUtils.h" #include "FileUtils.h" using namespace Lucene; class SnapshotThread : public LuceneThread { public: SnapshotThread(int64_t stopTime, IndexWriterPtr writer) { this->stopTime = stopTime; this->writer = writer; } virtual ~SnapshotThread() { } LUCENE_CLASS(SnapshotThread); protected: int64_t stopTime; IndexWriterPtr writer; public: virtual void run() { try { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); do { for (int32_t i = 0; i < 27; ++i) { writer->addDocument(doc); if (i % 2 == 0) writer->commit(); } LuceneThread::threadSleep(1); } while ((int64_t)MiscUtils::currentTimeMillis() < stopTime); } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; class SnapshotDeletionPolicyFixture : public LuceneTestFixture { public: SnapshotDeletionPolicyFixture() { buffer = ByteArray::newInstance(4096); } virtual ~SnapshotDeletionPolicyFixture() { } public: static const String INDEX_PATH; ByteArray buffer; public: void runTest(DirectoryPtr dir) { // Run for ~1 seconds int64_t stopTime = MiscUtils::currentTimeMillis() + 1000; SnapshotDeletionPolicyPtr dp = newLucene(newLucene()); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), (IndexDeletionPolicyPtr)dp, IndexWriter::MaxFieldLengthUNLIMITED); // Force frequent flushes writer->setMaxBufferedDocs(2); LuceneThreadPtr thread = newLucene(stopTime, writer); thread->start(); // While the above indexing thread is running, take many backups do { backupIndex(dir, dp); LuceneThread::threadSleep(20); if (!thread->isAlive()) break; } while ((int64_t)MiscUtils::currentTimeMillis() < stopTime); thread->join(); // Add one more document to force writer to commit a final segment, so deletion policy has a chance to delete again DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); // Make sure we don't have any leftover files in the directory writer->close(); checkNoUnreferencedFiles(dir); } /// Example showing how to use the SnapshotDeletionPolicy to take a backup. This method does not /// really do a backup; instead, it reads every byte of every file just to test that the files /// indeed exist and are readable even while the index is changing. void backupIndex(DirectoryPtr dir, SnapshotDeletionPolicyPtr dp) { // To backup an index we first take a snapshot LuceneException finally; try { copyFiles(dir, boost::dynamic_pointer_cast(dp->snapshot())); } catch (LuceneException& e) { finally = e; } // Make sure to release the snapshot, otherwise these files will never be deleted during this // IndexWriter session dp->release(); finally.throwException(); } void copyFiles(DirectoryPtr dir, IndexCommitPtr cp) { // While we hold the snapshot, and nomatter how long we take to do the backup, the IndexWriter will // never delete the files in the snapshot HashSet files = cp->getFileNames(); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { // NOTE: in a real backup you would not use readFile; you would need to use something else // that copies the file to a backup location. This could even be a spawned shell process // (eg "tar", "zip") that takes the list of files and builds a backup. readFile(dir, *fileName); } } void readFile(DirectoryPtr dir, const String& name) { IndexInputPtr input = dir->openInput(name); LuceneException finally; try { int64_t size = dir->fileLength(name); int64_t bytesLeft = size; while (bytesLeft > 0) { int32_t numToRead = bytesLeft < buffer.size() ? (int32_t)bytesLeft : buffer.size(); input->readBytes(buffer.get(), 0, numToRead, false); bytesLeft -= numToRead; } // Don't do this in your real backups! This is just to force a backup to take a somewhat // long time, to make sure we are exercising the fact that the IndexWriter should not delete // this file even when I take my time reading it. LuceneThread::threadSleep(1); } catch (LuceneException& e) { finally = e; } input->close(); finally.throwException(); } void checkNoUnreferencedFiles(DirectoryPtr dir) { HashSet _startFiles = dir->listAll(); SegmentInfosPtr infos = newLucene(); infos->read(dir); IndexFileDeleterPtr deleter = newLucene(dir, newLucene(), infos, InfoStreamPtr(), DocumentsWriterPtr(), HashSet()); HashSet _endFiles = dir->listAll(); Collection startFiles = Collection::newInstance(_startFiles.begin(), _startFiles.end()); Collection endFiles = Collection::newInstance(_endFiles.begin(), _endFiles.end()); std::sort(startFiles.begin(), startFiles.end()); std::sort(endFiles.begin(), endFiles.end()); BOOST_CHECK(startFiles.equals(endFiles)); } }; const String SnapshotDeletionPolicyFixture::INDEX_PATH = L"test.snapshots"; BOOST_FIXTURE_TEST_SUITE(SnapshotDeletionPolicyTest, SnapshotDeletionPolicyFixture) BOOST_AUTO_TEST_CASE(testSnapshotDeletionPolicy) { String dir = getTempDir(INDEX_PATH); LuceneException finally; try { DirectoryPtr fsDir = FSDirectory::open(dir); runTest(fsDir); fsDir->close(); } catch (LuceneException& e) { finally = e; } FileUtils::removeDirectory(dir); finally.throwException(); MockRAMDirectoryPtr dir2 = newLucene(); runTest(dir2); dir2->close(); } BOOST_AUTO_TEST_CASE(testNoCommits) { // Tests that if there were no commits when snapshot() is called, then // IllegalStateException is thrown rather than NPE. SnapshotDeletionPolicyPtr sdp = newLucene(newLucene()); BOOST_CHECK_EXCEPTION(sdp->snapshot(), IllegalStateException, check_exception(LuceneException::IllegalState)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/StressIndexingTest.cpp000066400000000000000000000637261217574114600245230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "ConcurrentMergeScheduler.h" #include "FSDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "LuceneThread.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "Term.h" #include "TermQuery.h" #include "IndexReader.h" #include "WhitespaceAnalyzer.h" #include "TermDocs.h" #include "TermEnum.h" #include "TermFreqVector.h" #include "TermPositionVector.h" #include "TermVectorOffsetInfo.h" #include "SegmentTermPositionVector.h" #include "Random.h" #include "MiscUtils.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(StressIndexingTest, LuceneTestFixture) DECLARE_SHARED_PTR(DocsAndWriter) class DocsAndWriter : public LuceneObject { public: virtual ~DocsAndWriter() { } LUCENE_CLASS(DocsAndWriter); public: HashMap docs; IndexWriterPtr writer; }; class MockIndexWriter : public IndexWriter { public: MockIndexWriter(DirectoryPtr dir, AnalyzerPtr a, bool create, int32_t mfl) : IndexWriter(dir, a, create, mfl) { rand = newLucene(); } virtual ~MockIndexWriter() { } LUCENE_CLASS(MockIndexWriter); protected: RandomPtr rand; public: virtual bool testPoint(const String& name) { if (rand->nextInt(4) == 2) LuceneThread::threadYield(); return true; } }; static int32_t bigFieldSize = 10; static int32_t maxFields = 4; static bool sameFieldOrder = false; static int32_t mergeFactor = 3; static int32_t maxBufferedDocs = 3; static int32_t seed = 0; DECLARE_SHARED_PTR(IndexingThread) struct lessFieldName { inline bool operator()(const FieldablePtr& first, const FieldablePtr& second) const { return (first->name() < second->name()); } }; class IndexingThread : public LuceneThread { public: IndexingThread() { base = 0; range = 0; iterations = 0; docs = HashMap::newInstance(); buffer.resize(100); r = newLucene(); } virtual ~IndexingThread() { } LUCENE_CLASS(IndexingThread); public: IndexWriterPtr w; int32_t base; int32_t range; int32_t iterations; HashMap docs; CharArray buffer; RandomPtr r; public: int32_t nextInt(int32_t limit = INT_MAX) { return r->nextInt(limit); } /// start is inclusive and end is exclusive int32_t nextInt(int32_t start, int32_t end) { return start + r->nextInt(end - start); } int32_t addUTF8Token(int32_t start) { int32_t end = start + nextInt(20); if (buffer.size() < 1 + end) buffer.resize((int32_t)((double)(1 + end) * 1.25)); for (int32_t i = start; i < end; ++i) { int32_t t = nextInt(5); if (t == 0 && i < end - 1) { #ifdef LPP_UNICODE_CHAR_SIZE_2 // Make a surrogate pair // High surrogate buffer[i++] = (wchar_t)nextInt(0xd800, 0xdc00); // Low surrogate buffer[i] = (wchar_t)nextInt(0xdc00, 0xe000); #else buffer[i] = (wchar_t)nextInt(0x10dc00, 0x10e000); #endif } else if (t <= 1) buffer[i] = (wchar_t)nextInt(0x01, 0x80); else if (t == 2) buffer[i] = (wchar_t)nextInt(0x80, 0x800); else if (t == 3) buffer[i] = (wchar_t)nextInt(0x800, 0xd800); else if (t == 4) buffer[i] = (wchar_t)nextInt(0xe000, 0xfff0); } buffer[end] = L' '; return 1 + end; } String getString(int32_t tokens) { tokens = tokens != 0 ? tokens : r->nextInt(4) + 1; // Half the time make a random UTF8 string if (nextInt() % 2 == 1) return getUTF8String(tokens); CharArray arr(CharArray::newInstance(tokens * 2)); for (int32_t i = 0; i < tokens; ++i) { arr[i * 2] = (wchar_t)(L'A' + r->nextInt(10)); arr[i * 2 + 1] = L' '; } return String(arr.get(), arr.size()); } String getUTF8String(int32_t tokens) { int32_t upto = 0; MiscUtils::arrayFill(buffer.get(), 0, buffer.size(), (wchar_t)0); for (int32_t i = 0; i < tokens; ++i) upto = addUTF8Token(upto); return String(buffer.get(), upto); } String getIdString() { return StringUtils::toString(base + nextInt(range)); } void indexDoc() { DocumentPtr d = newLucene(); Collection fields = Collection::newInstance(); String idString = getIdString(); FieldPtr idField = newLucene(newLucene(L"id", L"")->field(), idString, Field::STORE_YES, Field::INDEX_ANALYZED_NO_NORMS); fields.add(idField); int32_t numFields = nextInt(maxFields); for (int32_t i = 0; i < numFields; ++i) { Field::TermVector tvVal = Field::TERM_VECTOR_NO; switch (nextInt(4)) { case 0: tvVal = Field::TERM_VECTOR_NO; break; case 1: tvVal = Field::TERM_VECTOR_YES; break; case 2: tvVal = Field::TERM_VECTOR_WITH_POSITIONS; break; case 3: tvVal = Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS; break; } switch (nextInt(4)) { case 0: fields.add(newLucene(L"f" + StringUtils::toString(nextInt(100)), getString(1), Field::STORE_YES, Field::INDEX_ANALYZED_NO_NORMS, tvVal)); break; case 1: fields.add(newLucene(L"f" + StringUtils::toString(nextInt(100)), getString(0), Field::STORE_NO, Field::INDEX_ANALYZED, tvVal)); break; case 2: fields.add(newLucene(L"f" + StringUtils::toString(nextInt(100)), getString(0), Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_NO)); break; case 3: fields.add(newLucene(L"f" + StringUtils::toString(nextInt(100)), getString(bigFieldSize), Field::STORE_YES, Field::INDEX_ANALYZED, tvVal)); break; } } if (sameFieldOrder) std::sort(fields.begin(), fields.end(), lessFieldName()); else { // random placement of id field also std::swap(*fields.begin(), *(fields.begin() + nextInt(fields.size()))); } for (int32_t i = 0; i < fields.size(); ++i) d->add(fields[i]); w->updateDocument(newLucene(L"id", L"")->createTerm(idString), d); docs.put(idString, d); } void deleteDoc() { String idString = getIdString(); w->deleteDocuments(newLucene(L"id", L"")->createTerm(idString)); docs.remove(idString); } void deleteByQuery() { String idString = getIdString(); w->deleteDocuments(newLucene(newLucene(L"id", L"")->createTerm(idString))); docs.remove(idString); } virtual void run() { try { r->setSeed(base + range + seed); for (int32_t i = 0; i < iterations; ++i) { int32_t what = nextInt(100); if (what < 5) deleteDoc(); else if (what < 10) deleteByQuery(); else indexDoc(); } } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; static void verifyEquals(IndexReaderPtr r1, DirectoryPtr dir2, const String& idField); static void verifyEquals(DirectoryPtr dir1, DirectoryPtr dir2, const String& idField); static void verifyEquals(IndexReaderPtr r1, IndexReaderPtr r2, const String& idField); static void verifyEquals(DocumentPtr d1, DocumentPtr d2); static void verifyEquals(Collection d1, Collection d2); static DocsAndWriterPtr indexRandomIWReader(int32_t numThreads, int32_t iterations, int32_t range, DirectoryPtr dir) { HashMap docs = HashMap::newInstance(); IndexWriterPtr w = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); w->setUseCompoundFile(false); // force many merges w->setMergeFactor(mergeFactor); w->setRAMBufferSizeMB(0.1); w->setMaxBufferedDocs(maxBufferedDocs); Collection threads = Collection::newInstance(numThreads); for (int32_t i = 0; i < threads.size(); ++i) { IndexingThreadPtr th = newLucene(); th->w = w; th->base = 1000000 * i; th->range = range; th->iterations = iterations; threads[i] = th; } for (int32_t i = 0; i < threads.size(); ++i) threads[i]->start(); for (int32_t i = 0; i < threads.size(); ++i) threads[i]->join(); for (int32_t i = 0; i < threads.size(); ++i) { IndexingThreadPtr th = threads[i]; SyncLock syncLock(th); docs.putAll(th->docs.begin(), th->docs.end()); } checkIndex(dir); DocsAndWriterPtr dw = newLucene(); dw->docs = docs; dw->writer = w; return dw; } static HashMap indexRandom(int32_t numThreads, int32_t iterations, int32_t range, DirectoryPtr dir) { HashMap docs = HashMap::newInstance(); for (int32_t iter = 0; iter < 3; ++iter) { IndexWriterPtr w = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); w->setUseCompoundFile(false); // force many merges w->setMergeFactor(mergeFactor); w->setRAMBufferSizeMB(0.1); w->setMaxBufferedDocs(maxBufferedDocs); Collection threads = Collection::newInstance(numThreads); for (int32_t i = 0; i < threads.size(); ++i) { IndexingThreadPtr th = newLucene(); th->w = w; th->base = 1000000 * i; th->range = range; th->iterations = iterations; threads[i] = th; } for (int32_t i = 0; i < threads.size(); ++i) threads[i]->start(); for (int32_t i = 0; i < threads.size(); ++i) threads[i]->join(); w->close(); for (int32_t i = 0; i < threads.size(); ++i) { IndexingThreadPtr th = threads[i]; SyncLock syncLock(th); docs.putAll(th->docs.begin(), th->docs.end()); } } checkIndex(dir); return docs; } static void indexSerial(HashMap docs, DirectoryPtr dir) { IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); // index all docs in a single thread for (HashMap::iterator iter = docs.begin(); iter != docs.end(); ++iter) { DocumentPtr d = iter->second; Collection fields = d->getFields(); // put fields in same order each time std::sort(fields.begin(), fields.end(), lessFieldName()); DocumentPtr d1 = newLucene(); d1->setBoost(d->getBoost()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) d1->add(*field); w->addDocument(d1); } w->close(); } static void verifyEquals(IndexReaderPtr r1, DirectoryPtr dir2, const String& idField) { IndexReaderPtr r2 = IndexReader::open(dir2, true); verifyEquals(r1, r2, idField); r2->close(); } static void verifyEquals(DirectoryPtr dir1, DirectoryPtr dir2, const String& idField) { IndexReaderPtr r1 = IndexReader::open(dir1, true); IndexReaderPtr r2 = IndexReader::open(dir2, true); verifyEquals(r1, r2, idField); r1->close(); r2->close(); } static void verifyEquals(IndexReaderPtr r1, IndexReaderPtr r2, const String& idField) { BOOST_CHECK_EQUAL(r1->numDocs(), r2->numDocs()); bool hasDeletes = !(r1->maxDoc() == r2->maxDoc() && r1->numDocs() == r1->maxDoc()); Collection r2r1 = Collection::newInstance(r2->maxDoc()); // r2 id to r1 id mapping TermDocsPtr termDocs1 = r1->termDocs(); TermDocsPtr termDocs2 = r2->termDocs(); // create mapping from id2 space to id2 based on idField TermEnumPtr termEnum = r1->terms(newLucene(idField, L"")); do { TermPtr term = termEnum->term(); if (!term || term->field() != idField) break; termDocs1->seek(termEnum); if (!termDocs1->next()) { // This doc is deleted and wasn't replaced termDocs2->seek(termEnum); BOOST_CHECK(!termDocs2->next()); continue; } int32_t id1 = termDocs1->doc(); BOOST_CHECK(!termDocs1->next()); termDocs2->seek(termEnum); BOOST_CHECK(termDocs2->next()); int32_t id2 = termDocs2->doc(); BOOST_CHECK(!termDocs2->next()); r2r1[id2] = id1; // verify stored fields are equivalent BOOST_CHECK_NO_THROW(verifyEquals(r1->document(id1), r2->document(id2))); // verify term vectors are equivalent BOOST_CHECK_NO_THROW(verifyEquals(r1->getTermFreqVectors(id1), r2->getTermFreqVectors(id2))); } while (termEnum->next()); termEnum->close(); // Verify postings TermEnumPtr termEnum1 = r1->terms(newLucene(L"", L"")); TermEnumPtr termEnum2 = r2->terms(newLucene(L"", L"")); // pack both doc and freq into single element for easy sorting Collection info1 = Collection::newInstance(r1->numDocs()); Collection info2 = Collection::newInstance(r2->numDocs()); while (true) { TermPtr term1; TermPtr term2; // iterate until we get some docs int32_t len1 = 0; while (true) { len1 = 0; term1 = termEnum1->term(); if (!term1) break; termDocs1->seek(termEnum1); while (termDocs1->next()) { int32_t d1 = termDocs1->doc(); int32_t f1 = termDocs1->freq(); info1[len1] = (((int64_t)d1) << 32) | f1; len1++; } if (len1 > 0) break; if (!termEnum1->next()) break; } // iterate until we get some docs int32_t len2 = 0; while (true) { len2 = 0; term2 = termEnum2->term(); if (!term2) break; termDocs2->seek(termEnum2); while (termDocs2->next()) { int32_t d2 = termDocs2->doc(); int32_t f2 = termDocs2->freq(); info2[len2] = (((int64_t)r2r1[d2]) << 32) | f2; len2++; } if (len2 > 0) break; if (!termEnum2->next()) break; } if (!hasDeletes) BOOST_CHECK_EQUAL(termEnum1->docFreq(), termEnum2->docFreq()); BOOST_CHECK_EQUAL(len1, len2); if (len1 == 0) break; // no more terms BOOST_CHECK_EQUAL(term1, term2); // sort info2 to get it into ascending docid std::sort(info2.begin(), info2.begin() + len2); // now compare for (int32_t i = 0; i < len1; ++i) BOOST_CHECK_EQUAL(info1[i], info2[i]); termEnum1->next(); termEnum2->next(); } } static void verifyEquals(DocumentPtr d1, DocumentPtr d2) { Collection ff1 = d1->getFields(); Collection ff2 = d2->getFields(); std::sort(ff1.begin(), ff1.end(), lessFieldName()); std::sort(ff2.begin(), ff2.end(), lessFieldName()); BOOST_CHECK_EQUAL(ff1.size(), ff2.size()); for (int32_t i = 0; i < ff1.size(); ++i) { FieldablePtr f1 = ff1[i]; FieldablePtr f2 = ff2[i]; if (f1->isBinary()) BOOST_CHECK(f2->isBinary()); else BOOST_CHECK_EQUAL(f1->stringValue(), f2->stringValue()); } } static void verifyEquals(Collection d1, Collection d2) { if (!d1) { BOOST_CHECK(!d2); return; } BOOST_CHECK(d2); BOOST_CHECK_EQUAL(d1.size(), d2.size()); for (int32_t i = 0; i < d1.size(); ++i) { TermFreqVectorPtr v1 = d1[i]; TermFreqVectorPtr v2 = d2[i]; BOOST_CHECK_EQUAL(v1->size(), v2->size()); int32_t numTerms = v1->size(); Collection terms1 = v1->getTerms(); Collection terms2 = v2->getTerms(); Collection freq1 = v1->getTermFrequencies(); Collection freq2 = v2->getTermFrequencies(); for (int32_t j = 0; j < numTerms; ++j) { BOOST_CHECK_EQUAL(terms1[j], terms2[j]); BOOST_CHECK_EQUAL(freq1[j], freq2[j]); } if (boost::dynamic_pointer_cast(v1)) { BOOST_CHECK(boost::dynamic_pointer_cast(v2)); SegmentTermPositionVectorPtr tpv1 = boost::dynamic_pointer_cast(v1); SegmentTermPositionVectorPtr tpv2 = boost::dynamic_pointer_cast(v2); for (int32_t j = 0; j < numTerms; ++j) { Collection pos1 = tpv1->getTermPositions(j); Collection pos2 = tpv2->getTermPositions(j); BOOST_CHECK_EQUAL(pos1.size(), pos2.size()); Collection offsets1 = tpv1->getOffsets(j); Collection offsets2 = tpv2->getOffsets(j); if (!offsets1) BOOST_CHECK(!offsets2); else BOOST_CHECK(offsets2); for (int32_t k = 0; k < pos1.size(); ++k) { BOOST_CHECK_EQUAL(pos1[k], pos2[k]); if (offsets1) { BOOST_CHECK_EQUAL(offsets1[k]->getStartOffset(), offsets2[k]->getStartOffset()); BOOST_CHECK_EQUAL(offsets1[k]->getEndOffset(), offsets2[k]->getEndOffset()); } } } } } } namespace RunStressTest { DECLARE_SHARED_PTR(TimedThread) DECLARE_SHARED_PTR(IndexerThread) DECLARE_SHARED_PTR(SearcherThread) class TimedThread : public LuceneThread { public: TimedThread() { this->failed = false; this->RUN_TIME_SEC = 6; this->rand = newLucene(); } virtual ~TimedThread() { } LUCENE_CLASS(TimedThread); public: bool failed; protected: int32_t RUN_TIME_SEC; RandomPtr rand; public: virtual void doWork() = 0; virtual void run() { int64_t stopTime = MiscUtils::currentTimeMillis() + 1000 * RUN_TIME_SEC; try { while ((int64_t)MiscUtils::currentTimeMillis() < stopTime && !failed) doWork(); } catch (LuceneException& e) { failed = true; BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; class IndexerThread : public TimedThread { public: IndexerThread(IndexWriterPtr writer) { this->writer = writer; this->nextID = 0; } virtual ~IndexerThread() { } LUCENE_CLASS(IndexerThread); public: IndexWriterPtr writer; int32_t nextID; public: virtual void doWork() { // Add 10 docs for (int32_t i = 0; i < 10; ++i) { DocumentPtr d = newLucene(); d->add(newLucene(L"id", StringUtils::toString(nextID++), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene(L"contents", intToEnglish(rand->nextInt()), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } // Delete 5 docs int32_t deleteID = nextID - 1; for (int32_t i = 0; i < 5; ++i) { writer->deleteDocuments(newLucene(L"id", StringUtils::toString(deleteID))); deleteID -= 2; } } }; class SearcherThread : public TimedThread { public: SearcherThread(DirectoryPtr directory) { this->directory = directory; } virtual ~SearcherThread() { } LUCENE_CLASS(SearcherThread); protected: DirectoryPtr directory; public: virtual void doWork() { for (int32_t i = 0; i < 100; ++i) newLucene(directory, true)->close(); } }; } /// Run one indexer and 2 searchers against single index as stress test. static void runStressTest(DirectoryPtr directory, MergeSchedulerPtr mergeScheduler) { AnalyzerPtr analyzer = newLucene(); IndexWriterPtr modifier = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); modifier->setMaxBufferedDocs(10); Collection threads = Collection::newInstance(4); int32_t numThread = 0; if (mergeScheduler) modifier->setMergeScheduler(mergeScheduler); // One modifier that writes 10 docs then removes 5, over and over RunStressTest::IndexerThreadPtr indexerThread1 = newLucene(modifier); threads[numThread++] = indexerThread1; indexerThread1->start(); RunStressTest::IndexerThreadPtr indexerThread2 = newLucene(modifier); threads[numThread++] = indexerThread2; indexerThread2->start(); // Two searchers that constantly just re-instantiate the searcher RunStressTest::SearcherThreadPtr searcherThread1 = newLucene(directory); threads[numThread++] = searcherThread1; searcherThread1->start(); RunStressTest::SearcherThreadPtr searcherThread2 = newLucene(directory); threads[numThread++] = searcherThread2; searcherThread2->start(); for (int32_t i = 0; i < numThread; ++i) threads[i]->join(); modifier->close(); BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1 BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2 BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1 BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2 } BOOST_AUTO_TEST_CASE(testStressIndexAndSearching) { // With ConcurrentMergeScheduler, in RAMDir DirectoryPtr directory = newLucene(); runStressTest(directory, newLucene()); directory->close(); // With ConcurrentMergeScheduler, in FSDir String dirPath(FileUtils::joinPath(getTempDir(), L"lucene.test.stress")); directory = FSDirectory::open(dirPath); runStressTest(directory, newLucene()); directory->close(); FileUtils::removeDirectory(dirPath); } BOOST_AUTO_TEST_CASE(testRandomIWReader) { DirectoryPtr dir = newLucene(); DocsAndWriterPtr dw = indexRandomIWReader(10, 100, 100, dir); IndexReaderPtr r = dw->writer->getReader(); dw->writer->commit(); verifyEquals(r, dir, L"id"); r->close(); dw->writer->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testRandom) { DirectoryPtr dir1 = newLucene(); DirectoryPtr dir2 = newLucene(); HashMap docs = indexRandom(10, 100, 100, dir1); indexSerial(docs, dir2); verifyEquals(dir1, dir2, L"id"); } BOOST_AUTO_TEST_CASE(testMultiConfig) { RandomPtr r = newLucene(); // test lots of smaller different params together for (int32_t i = 0; i < 100; ++i) // increase iterations for better testing { sameFieldOrder = (r->nextInt() % 2 == 1); mergeFactor = r->nextInt(3) + 2; maxBufferedDocs = r->nextInt(3) + 2; seed++; int32_t numThreads = r->nextInt(5) + 1; int32_t iter = r->nextInt(10) + 1; int32_t range = r->nextInt(20) + 1; DirectoryPtr dir1 = newLucene(); DirectoryPtr dir2 = newLucene(); HashMap docs = indexRandom(numThreads, iter, range, dir1); indexSerial(docs, dir2); verifyEquals(dir1, dir2, L"id"); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/TermDocsPerfTest.cpp000066400000000000000000000077561217574114600241100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "RAMDirectory.h" #include "IndexReader.h" #include "TermEnum.h" #include "Term.h" #include "TermDocs.h" #include "TokenStream.h" #include "TermAttribute.h" #include "Analyzer.h" #include "Document.h" #include "Field.h" #include "IndexWriter.h" #include "Random.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(TermDocsPerfTest, LuceneTestFixture) DECLARE_SHARED_PTR(RepeatingTokenStream) class RepeatingTokenStream : public TokenStream { public: RepeatingTokenStream(const String& val) { this->num = 0; this->value = val; this->termAtt = addAttribute(); } virtual ~RepeatingTokenStream() { } LUCENE_CLASS(RepeatingTokenStream); public: int32_t num; TermAttributePtr termAtt; String value; public: virtual bool incrementToken() { --num; if (num >= 0) { clearAttributes(); termAtt->setTermBuffer(value); return true; } return false; } }; class TestAnalyzer : public Analyzer { public: TestAnalyzer(RepeatingTokenStreamPtr ts, RandomPtr random, int32_t maxTF, double percentDocs) { this->ts = ts; this->random = random; this->maxTF = maxTF; this->percentDocs = percentDocs; } virtual ~TestAnalyzer() { } LUCENE_CLASS(TestAnalyzer); protected: RepeatingTokenStreamPtr ts; RandomPtr random; int32_t maxTF; double percentDocs; public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { if (random->nextDouble() < percentDocs) ts->num = random->nextInt(maxTF) + 1; else ts->num = 0; return ts; } }; static void addDocs(DirectoryPtr dir, int32_t numDocs, const String& field, const String& val, int32_t maxTF, double percentDocs) { RepeatingTokenStreamPtr ts = newLucene(val); RandomPtr random = newLucene(); AnalyzerPtr analyzer = newLucene(ts, random, maxTF, percentDocs); DocumentPtr doc = newLucene(); doc->add(newLucene(field, val, Field::STORE_NO, Field::INDEX_NOT_ANALYZED_NO_NORMS)); IndexWriterPtr writer = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(100); writer->setMergeFactor(100); for (int32_t i = 0; i < numDocs; ++i) writer->addDocument(doc); writer->optimize(); writer->close(); } BOOST_AUTO_TEST_CASE(testTermDocsPerf) { static const int32_t iter = 100000; static const int32_t numDocs = 10000; static const int32_t maxTF = 3; static const double percentDocs = 0.1; DirectoryPtr dir = newLucene(); int64_t start = MiscUtils::currentTimeMillis(); addDocs(dir, numDocs, L"foo", L"val", maxTF, percentDocs); int64_t end = MiscUtils::currentTimeMillis(); BOOST_TEST_MESSAGE("Milliseconds for creation of " << numDocs << " docs = " << (end - start)); IndexReaderPtr reader = IndexReader::open(dir, true); TermEnumPtr termEnum = reader->terms(newLucene(L"foo", L"val")); TermDocsPtr termDocs = reader->termDocs(); start = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < iter; ++i) { termDocs->seek(termEnum); while (termDocs->next()) termDocs->doc(); } end = MiscUtils::currentTimeMillis(); BOOST_TEST_MESSAGE("Milliseconds for " << iter << " TermDocs iteration: " << (end - start)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/TermTest.cpp000066400000000000000000000017411217574114600224460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "Term.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(TermTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testEquals) { TermPtr base = newLucene(L"same", L"same"); TermPtr same = newLucene(L"same", L"same"); TermPtr differentField = newLucene(L"different", L"same"); TermPtr differentText = newLucene(L"same", L"different"); BOOST_CHECK(base->equals(base)); BOOST_CHECK(base->equals(same)); BOOST_CHECK(!base->equals(differentField)); BOOST_CHECK(!base->equals(differentText)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/TermVectorsReaderTest.cpp000066400000000000000000000425751217574114600251510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "TermVectorOffsetInfo.h" #include "MockRAMDirectory.h" #include "FieldInfos.h" #include "IndexWriter.h" #include "Analyzer.h" #include "TokenStream.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "Document.h" #include "Field.h" #include "IndexFileNames.h" #include "TermVectorsReader.h" #include "TermFreqVector.h" #include "SegmentInfo.h" #include "TermPositionVector.h" #include "SegmentTermVector.h" #include "SortedTermVectorMapper.h" #include "TermVectorEntryFreqSortedComparator.h" #include "FieldSortedTermVectorMapper.h" #include "TermVectorEntry.h" #include "IndexReader.h" #include "Random.h" using namespace Lucene; DECLARE_SHARED_PTR(TestToken) class TestToken : public LuceneObject { public: TestToken() { pos = 0; startOffset = 0; endOffset = 0; } virtual ~TestToken() { } LUCENE_CLASS(TestToken); public: String text; int32_t pos; int32_t startOffset; int32_t endOffset; public: int32_t compareTo(TestTokenPtr other) { return (pos - other->pos); } }; class MyTokenStream : public TokenStream { public: MyTokenStream(Collection tokens) { this->tokens = tokens; tokenUpto = 0; termAtt = addAttribute(); posIncrAtt = addAttribute(); offsetAtt = addAttribute(); } virtual ~MyTokenStream() { } LUCENE_CLASS(MyTokenStream); protected: Collection tokens; public: int32_t tokenUpto; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (tokenUpto >= tokens.size()) return false; else { TestTokenPtr testToken = tokens[tokenUpto++]; clearAttributes(); termAtt->setTermBuffer(testToken->text); offsetAtt->setOffset(testToken->startOffset, testToken->endOffset); if (tokenUpto > 1) posIncrAtt->setPositionIncrement(testToken->pos - tokens[tokenUpto - 2]->pos); else posIncrAtt->setPositionIncrement(testToken->pos + 1); } return true; } }; class MyAnalyzer : public Analyzer { public: MyAnalyzer(Collection tokens) { this->tokens = tokens; } virtual ~MyAnalyzer() { } LUCENE_CLASS(MyAnalyzer); protected: Collection tokens; public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(tokens); } }; DECLARE_SHARED_PTR(DocNumAwareMapper) class DocNumAwareMapper : public TermVectorMapper { public: DocNumAwareMapper() { documentNumber = -1; } virtual ~DocNumAwareMapper() { } LUCENE_CLASS(DocNumAwareMapper); protected: int32_t documentNumber; public: virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { if (documentNumber == -1) BOOST_FAIL("Documentnumber should be set at this point!"); } virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions) { if (documentNumber == -1) BOOST_FAIL("Documentnumber should be set at this point!"); } virtual int32_t getDocumentNumber() { return documentNumber; } virtual void setDocumentNumber(int32_t documentNumber) { this->documentNumber = documentNumber; } }; class TermVectorsReaderTestFixture : public LuceneTestFixture { public: TermVectorsReaderTestFixture() { // Must be lexicographically sorted, will do in setup, versus trying to maintain here testFields = newCollection(L"f1", L"f2", L"f3", L"f4"); testFieldsStorePos = newCollection(true, false, true, false); testFieldsStoreOff = newCollection(true, false, false, true); testTerms = newCollection(L"this", L"is", L"a", L"test"); positions = Collection< Collection >::newInstance(testTerms.size()); offsets = Collection< Collection >::newInstance(testTerms.size()); dir = newLucene(); tokens = Collection::newInstance(testTerms.size() * TERM_FREQ); RandomPtr random = newLucene(); std::sort(testTerms.begin(), testTerms.end()); int32_t tokenUpto = 0; for (int32_t i = 0; i < testTerms.size(); ++i) { positions[i] = Collection::newInstance(TERM_FREQ); offsets[i] = Collection::newInstance(TERM_FREQ); // first position must be 0 for (int32_t j = 0; j < TERM_FREQ; ++j) { // positions are always sorted in increasing order positions[i][j] = (int32_t)(j * 10 + (int32_t)((double)random->nextInt(100) / 100.0) * 10); // offsets are always sorted in increasing order offsets[i][j] = newLucene(j * 10, j * 10 + testTerms[i].size()); TestTokenPtr token = newLucene(); tokens[tokenUpto++] = token; token->text = testTerms[i]; token->pos = positions[i][j]; token->startOffset = offsets[i][j]->getStartOffset(); token->endOffset = offsets[i][j]->getEndOffset(); } } std::sort(tokens.begin(), tokens.end(), luceneCompare()); IndexWriterPtr writer = newLucene(dir, newLucene(tokens), true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); DocumentPtr doc = newLucene(); for (int32_t i = 0; i < testFields.size(); ++i) { Field::TermVector tv = Field::TERM_VECTOR_YES; if (testFieldsStorePos[i] && testFieldsStoreOff[i]) tv = Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS; else if (testFieldsStorePos[i] && !testFieldsStoreOff[i]) tv = Field::TERM_VECTOR_WITH_POSITIONS; else if (!testFieldsStorePos[i] && testFieldsStoreOff[i]) tv = Field::TERM_VECTOR_WITH_OFFSETS; doc->add(newLucene(testFields[i], L"", Field::STORE_NO, Field::INDEX_ANALYZED, tv)); } // Create 5 documents for testing, they all have the same terms for (int32_t j = 0; j < 5; ++j) writer->addDocument(doc); writer->commit(); seg = writer->newestSegment()->name; writer->close(); fieldInfos = newLucene(dir, seg + L"." + IndexFileNames::FIELD_INFOS_EXTENSION()); } virtual ~TermVectorsReaderTestFixture() { } protected: Collection testFields; Collection testFieldsStorePos; Collection testFieldsStoreOff; Collection testTerms; Collection< Collection > positions; Collection< Collection > offsets; MockRAMDirectoryPtr dir; String seg; FieldInfosPtr fieldInfos; Collection tokens; static const int32_t TERM_FREQ; }; const int32_t TermVectorsReaderTestFixture::TERM_FREQ = 3; BOOST_FIXTURE_TEST_SUITE(TermVectorsReaderTest, TermVectorsReaderTestFixture) BOOST_AUTO_TEST_CASE(testReader) { // Check to see the files were created properly in setup BOOST_CHECK(dir->fileExists(seg + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION())); BOOST_CHECK(dir->fileExists(seg + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION())); TermVectorsReaderPtr reader = newLucene(dir, seg, fieldInfos); BOOST_CHECK(reader); for (int32_t j = 0; j < 5; ++j) { TermFreqVectorPtr vector = reader->get(j, testFields[0]); BOOST_CHECK(vector); Collection terms = vector->getTerms(); BOOST_CHECK(terms); BOOST_CHECK_EQUAL(terms.size(), testTerms.size()); for (int32_t i = 0; i < terms.size(); ++i) BOOST_CHECK_EQUAL(terms[i], testTerms[i]); } } BOOST_AUTO_TEST_CASE(testPositionReader) { TermVectorsReaderPtr reader = newLucene(dir, seg, fieldInfos); BOOST_CHECK(reader); TermPositionVectorPtr vector = boost::dynamic_pointer_cast(reader->get(0, testFields[0])); BOOST_CHECK(vector); Collection terms = vector->getTerms(); BOOST_CHECK(terms); BOOST_CHECK_EQUAL(terms.size(), testTerms.size()); for (int32_t i = 0; i < terms.size(); ++i) { String term = terms[i]; BOOST_CHECK_EQUAL(term, testTerms[i]); Collection positions = vector->getTermPositions(i); BOOST_CHECK(positions); BOOST_CHECK_EQUAL(positions.size(), this->positions[i].size()); for (int32_t j = 0; j < positions.size(); ++j) BOOST_CHECK_EQUAL(positions[j], this->positions[i][j]); Collection offset = vector->getOffsets(i); BOOST_CHECK(offset); BOOST_CHECK_EQUAL(offset.size(), this->offsets[i].size()); for (int32_t j = 0; j < offset.size(); ++j) { TermVectorOffsetInfoPtr termVectorOffsetInfo = offset[j]; BOOST_CHECK(termVectorOffsetInfo->equals(offsets[i][j])); } } TermFreqVectorPtr freqVector = reader->get(0, testFields[1]); // no pos, no offset BOOST_CHECK(freqVector); BOOST_CHECK(boost::dynamic_pointer_cast(freqVector)); terms = freqVector->getTerms(); BOOST_CHECK(terms); BOOST_CHECK_EQUAL(terms.size(), testTerms.size()); for (int32_t i = 0; i < terms.size(); ++i) BOOST_CHECK_EQUAL(terms[i], testTerms[i]); } BOOST_AUTO_TEST_CASE(testOffsetReader) { TermVectorsReaderPtr reader = newLucene(dir, seg, fieldInfos); BOOST_CHECK(reader); TermPositionVectorPtr vector = boost::dynamic_pointer_cast(reader->get(0, testFields[0])); BOOST_CHECK(vector); Collection terms = vector->getTerms(); BOOST_CHECK(terms); BOOST_CHECK_EQUAL(terms.size(), testTerms.size()); for (int32_t i = 0; i < terms.size(); ++i) { String term = terms[i]; BOOST_CHECK_EQUAL(term, testTerms[i]); Collection positions = vector->getTermPositions(i); BOOST_CHECK(positions); BOOST_CHECK_EQUAL(positions.size(), this->positions[i].size()); for (int32_t j = 0; j < positions.size(); ++j) BOOST_CHECK_EQUAL(positions[j], this->positions[i][j]); Collection offset = vector->getOffsets(i); BOOST_CHECK(offset); BOOST_CHECK_EQUAL(offset.size(), this->offsets[i].size()); for (int32_t j = 0; j < offset.size(); ++j) { TermVectorOffsetInfoPtr termVectorOffsetInfo = offset[j]; BOOST_CHECK(termVectorOffsetInfo->equals(offsets[i][j])); } } } BOOST_AUTO_TEST_CASE(testMapper) { TermVectorsReaderPtr reader = newLucene(dir, seg, fieldInfos); BOOST_CHECK(reader); SortedTermVectorMapperPtr mapper = newLucene(TermVectorEntryFreqSortedComparator::compare); reader->get(0, mapper); Collection entrySet = mapper->getTermVectorEntrySet(); BOOST_CHECK(entrySet); // three fields, 4 terms, all terms are the same BOOST_CHECK_EQUAL(entrySet.size(), 4); // check offsets and positions for (Collection::iterator tve = entrySet.begin(); tve != entrySet.end(); ++tve) { BOOST_CHECK(*tve); BOOST_CHECK((*tve)->getOffsets()); BOOST_CHECK((*tve)->getPositions()); } mapper = newLucene(TermVectorEntryFreqSortedComparator::compare); reader->get(1, mapper); entrySet = mapper->getTermVectorEntrySet(); BOOST_CHECK(entrySet); // three fields, 4 terms, all terms are the same BOOST_CHECK_EQUAL(entrySet.size(), 4); // should have offsets and positions because we are munging all the fields together for (Collection::iterator tve = entrySet.begin(); tve != entrySet.end(); ++tve) { BOOST_CHECK(*tve); BOOST_CHECK((*tve)->getOffsets()); BOOST_CHECK((*tve)->getPositions()); } FieldSortedTermVectorMapperPtr fsMapper = newLucene(TermVectorEntryFreqSortedComparator::compare); reader->get(0, fsMapper); MapStringCollectionTermVectorEntry map = fsMapper->getFieldToTerms(); BOOST_CHECK_EQUAL(map.size(), testFields.size()); for (MapStringCollectionTermVectorEntry::iterator entry = map.begin(); entry != map.end(); ++entry) { Collection termVectorEntries = entry->second; BOOST_CHECK_EQUAL(termVectorEntries.size(), 4); for (Collection::iterator tve = termVectorEntries.begin(); tve != termVectorEntries.end(); ++tve) { BOOST_CHECK(*tve); // Check offsets and positions. String field = (*tve)->getField(); if (field == testFields[0]) { // should have offsets BOOST_CHECK((*tve)->getOffsets()); BOOST_CHECK((*tve)->getPositions()); } else if (field == testFields[1]) { // should not have offsets BOOST_CHECK(!(*tve)->getOffsets()); BOOST_CHECK(!(*tve)->getPositions()); } } } // Try mapper that ignores offs and positions fsMapper = newLucene(true, true, TermVectorEntryFreqSortedComparator::compare); reader->get(0, fsMapper); map = fsMapper->getFieldToTerms(); BOOST_CHECK_EQUAL(map.size(), testFields.size()); for (MapStringCollectionTermVectorEntry::iterator entry = map.begin(); entry != map.end(); ++entry) { Collection termVectorEntries = entry->second; BOOST_CHECK_EQUAL(termVectorEntries.size(), 4); for (Collection::iterator tve = termVectorEntries.begin(); tve != termVectorEntries.end(); ++tve) { BOOST_CHECK(*tve); // Check offsets and positions. String field = (*tve)->getField(); if (field == testFields[0]) { // should have offsets BOOST_CHECK(!(*tve)->getOffsets()); BOOST_CHECK(!(*tve)->getPositions()); } else if (field == testFields[1]) { // should not have offsets BOOST_CHECK(!(*tve)->getOffsets()); BOOST_CHECK(!(*tve)->getPositions()); } } } // test setDocumentNumber() IndexReaderPtr ir = IndexReader::open(dir, true); DocNumAwareMapperPtr docNumAwareMapper = newLucene(); BOOST_CHECK_EQUAL(-1, docNumAwareMapper->getDocumentNumber()); ir->getTermFreqVector(0, docNumAwareMapper); BOOST_CHECK_EQUAL(0, docNumAwareMapper->getDocumentNumber()); docNumAwareMapper->setDocumentNumber(-1); ir->getTermFreqVector(1, docNumAwareMapper); BOOST_CHECK_EQUAL(1, docNumAwareMapper->getDocumentNumber()); docNumAwareMapper->setDocumentNumber(-1); ir->getTermFreqVector(0, L"f1", docNumAwareMapper); BOOST_CHECK_EQUAL(0, docNumAwareMapper->getDocumentNumber()); docNumAwareMapper->setDocumentNumber(-1); ir->getTermFreqVector(1, L"f2", docNumAwareMapper); BOOST_CHECK_EQUAL(1, docNumAwareMapper->getDocumentNumber()); docNumAwareMapper->setDocumentNumber(-1); ir->getTermFreqVector(0, L"f1", docNumAwareMapper); BOOST_CHECK_EQUAL(0, docNumAwareMapper->getDocumentNumber()); ir->close(); } /// Make sure exceptions and bad params are handled appropriately BOOST_AUTO_TEST_CASE(testBadParams) { { TermVectorsReaderPtr reader = newLucene(dir, seg, fieldInfos); BOOST_CHECK(reader); // Bad document number, good field number BOOST_CHECK_EXCEPTION(reader->get(50, testFields[0]), IOException, check_exception(LuceneException::IO)); } { TermVectorsReaderPtr reader = newLucene(dir, seg, fieldInfos); BOOST_CHECK(reader); // Bad document number, no field BOOST_CHECK_EXCEPTION(reader->get(50), IOException, check_exception(LuceneException::IO)); } { TermVectorsReaderPtr reader = newLucene(dir, seg, fieldInfos); BOOST_CHECK(reader); // Good document number, bad field number TermFreqVectorPtr vector; BOOST_CHECK_NO_THROW(vector = reader->get(0, L"f50")); BOOST_CHECK(!vector); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/ThreadedOptimizeTest.cpp000066400000000000000000000135701217574114600250030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "SimpleAnalyzer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "LuceneThread.h" #include "Term.h" #include "IndexReader.h" #include "MockRAMDirectory.h" #include "SerialMergeScheduler.h" #include "ConcurrentMergeScheduler.h" #include "FSDirectory.h" #include "FileUtils.h" using namespace Lucene; class OptimizeThread : public LuceneThread { public: OptimizeThread(int32_t numIter, int32_t iterFinal, int32_t iFinal, IndexWriterPtr writer, IndexWriterPtr writerFinal) { this->numIter = numIter; this->iterFinal = iterFinal; this->iFinal = iFinal; this->writer = writer; this->writerFinal = writerFinal; } virtual ~OptimizeThread() { } LUCENE_CLASS(OptimizeThread); protected: int32_t numIter; int32_t iterFinal; int32_t iFinal; IndexWriterPtr writer; IndexWriterPtr writerFinal; public: virtual void run() { try { for (int32_t j = 0; j < numIter; ++j) { writerFinal->optimize(false); for (int32_t k = 0; k < 17 * (1 + iFinal); ++k) { DocumentPtr d = newLucene(); d->add(newLucene(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene(L"contents", intToEnglish(iFinal + k), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } for (int32_t k = 0; k < 9 * (1 + iFinal); ++k) writerFinal->deleteDocuments(newLucene(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k))); writerFinal->optimize(); } } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; class ThreadedOptimizeTestFixture : public LuceneTestFixture { public: ThreadedOptimizeTestFixture() { analyzer = newLucene(); } virtual ~ThreadedOptimizeTestFixture() { } protected: static const int32_t NUM_THREADS; static const int32_t NUM_ITER; static const int32_t NUM_ITER2; AnalyzerPtr analyzer; public: void runTest(DirectoryPtr directory, MergeSchedulerPtr merger) { IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); if (merger) writer->setMergeScheduler(merger); for (int32_t iter = 0; iter < NUM_ITER; ++iter) { int32_t iterFinal = iter; writer->setMergeFactor(1000); for (int32_t i = 0; i < 200; ++i) { DocumentPtr d = newLucene(); d->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } writer->setMergeFactor(4); Collection threads = Collection::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) { int32_t iFinal = i; IndexWriterPtr writerFinal = writer; threads[i] = newLucene(NUM_ITER2, iterFinal, iFinal, writer, writerFinal); } for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); int32_t expectedDocCount = (int32_t)((1 + iter) * (200 + 8 * NUM_ITER2 * (int32_t)(((double)NUM_THREADS / 2.0) * (double)(1 + NUM_THREADS)))); BOOST_CHECK_EQUAL(expectedDocCount, writer->maxDoc()); writer->close(); writer = newLucene(directory, analyzer, false, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); IndexReaderPtr reader = IndexReader::open(directory, true); BOOST_CHECK(reader->isOptimized()); BOOST_CHECK_EQUAL(expectedDocCount, reader->numDocs()); reader->close(); } writer->close(); } }; const int32_t ThreadedOptimizeTestFixture::NUM_THREADS = 3; const int32_t ThreadedOptimizeTestFixture::NUM_ITER = 1; const int32_t ThreadedOptimizeTestFixture::NUM_ITER2 = 1; BOOST_FIXTURE_TEST_SUITE(ThreadedOptimizeTest, ThreadedOptimizeTestFixture) BOOST_AUTO_TEST_CASE(testThreadedOptimize) { DirectoryPtr directory = newLucene(); runTest(directory, newLucene()); runTest(directory, newLucene()); directory->close(); String dirName(FileUtils::joinPath(getTempDir(), L"luceneTestThreadedOptimize")); directory = FSDirectory::open(dirName); runTest(directory, newLucene()); runTest(directory, newLucene()); directory->close(); FileUtils::removeDirectory(dirName); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/TransactionRollbackTest.cpp000066400000000000000000000154431217574114600255020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "IndexDeletionPolicy.h" #include "IndexWriter.h" #include "IndexReader.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "BitSet.h" #include "IndexCommit.h" using namespace Lucene; /// Keeps all commit points (used to build index) class KeepAllDeletionPolicy : public IndexDeletionPolicy { public: virtual ~KeepAllDeletionPolicy() { } LUCENE_CLASS(KeepAllDeletionPolicy); public: virtual void onInit(Collection commits) { } virtual void onCommit(Collection commits) { } }; /// Rolls back to previous commit point class RollbackDeletionPolicy : public IndexDeletionPolicy { public: RollbackDeletionPolicy(int32_t rollbackPoint) { this->rollbackPoint = rollbackPoint; } virtual ~RollbackDeletionPolicy() { } LUCENE_CLASS(RollbackDeletionPolicy); protected: int32_t rollbackPoint; public: virtual void onInit(Collection commits) { for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { MapStringString userData = (*commit)->getUserData(); if (!userData.empty()) { // Label for a commit point is "Records 1-30" // This code reads the last id ("30" in this example) and deletes it if it is after the desired rollback point String x = userData.get(L"index"); String lastVal = x.substr(x.find_last_of(L"-") + 1); int32_t last = StringUtils::toInt(lastVal); if (last > rollbackPoint) (*commit)->deleteCommit(); } } } virtual void onCommit(Collection commits) { } }; class DeleteLastCommitPolicy : public IndexDeletionPolicy { public: virtual ~DeleteLastCommitPolicy() { } LUCENE_CLASS(DeleteLastCommitPolicy); public: virtual void onInit(Collection commits) { commits[commits.size() - 1]->deleteCommit(); } virtual void onCommit(Collection commits) { } }; /// Test class to illustrate using IndexDeletionPolicy to provide multi-level rollback capability. /// This test case creates an index of records 1 to 100, introducing a commit point every 10 records. /// /// A "keep all" deletion policy is used to ensure we keep all commit points for testing purposes class TransactionRollbackTestFixture : public LuceneTestFixture { public: TransactionRollbackTestFixture() { FIELD_RECORD_ID = L"record_id"; dir = newLucene(); // Build index, of records 1 to 100, committing after each batch of 10 IndexDeletionPolicyPtr sdp = newLucene(); IndexWriterPtr w = newLucene(dir, newLucene(), sdp, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t currentRecordId = 1; currentRecordId <= 100; ++currentRecordId) { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD_RECORD_ID, StringUtils::toString(currentRecordId), Field::STORE_YES, Field::INDEX_ANALYZED)); w->addDocument(doc); if (currentRecordId % 10 == 0) { MapStringString data = MapStringString::newInstance(); data.put(L"index", L"records 1-" + StringUtils::toString(currentRecordId)); w->commit(data); } } w->close(); } virtual ~TransactionRollbackTestFixture() { } protected: String FIELD_RECORD_ID; DirectoryPtr dir; public: /// Rolls back index to a chosen ID void rollBackLast(int32_t id) { String ids = L"-" + StringUtils::toString(id); IndexCommitPtr last; Collection commits = IndexReader::listCommits(dir); for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { MapStringString ud = (*commit)->getUserData(); if (!ud.empty()) { if (boost::ends_with(ud.get(L"index"), ids)) last = *commit; } } BOOST_CHECK(last); IndexWriterPtr w = newLucene(dir, newLucene(), newLucene(id), IndexWriter::MaxFieldLengthUNLIMITED, last); MapStringString data = MapStringString::newInstance(); data.put(L"index", L"Rolled back to 1-" + StringUtils::toString(id)); w->commit(data); w->close(); } void checkExpecteds(BitSetPtr expecteds) { IndexReaderPtr r = IndexReader::open(dir, true); // Perhaps not the most efficient approach but meets our needs here. for (int32_t i = 0; i < r->maxDoc(); ++i) { if (!r->isDeleted(i)) { String sval = r->document(i)->get(FIELD_RECORD_ID); if (!sval.empty()) { int32_t val = StringUtils::toInt(sval); BOOST_CHECK(expecteds->get(val)); expecteds->set(val, false); } } } r->close(); BOOST_CHECK_EQUAL(0, expecteds->cardinality()); } }; BOOST_FIXTURE_TEST_SUITE(TransactionRollbackTest, TransactionRollbackTestFixture) BOOST_AUTO_TEST_CASE(testRepeatedRollBacks) { int32_t expectedLastRecordId = 100; while (expectedLastRecordId > 10) { expectedLastRecordId -= 10; rollBackLast(expectedLastRecordId); BitSetPtr expecteds = newLucene(100); expecteds->set(1, (expectedLastRecordId + 1), true); checkExpecteds(expecteds); } } BOOST_AUTO_TEST_CASE(testRollbackDeletionPolicy) { for (int32_t i = 0; i < 2; ++i) { // Unless you specify a prior commit point, rollback should not work newLucene(dir, newLucene(), (IndexDeletionPolicyPtr)newLucene(), IndexWriter::MaxFieldLengthUNLIMITED)->close(); IndexReaderPtr r = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(100, r->numDocs()); r->close(); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/TransactionsTest.cpp000066400000000000000000000165251217574114600242150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "ConcurrentMergeScheduler.h" #include "Document.h" #include "Field.h" #include "Term.h" #include "IndexReader.h" #include "MockRAMDirectory.h" #include "Random.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(TransactionsTest, LuceneTestFixture) static bool doFail = false; DECLARE_SHARED_PTR(TimedThread) DECLARE_SHARED_PTR(IndexerThread) DECLARE_SHARED_PTR(SearcherThread) class TimedThread : public LuceneThread { public: TimedThread() { } virtual ~TimedThread() { } LUCENE_CLASS(TimedThread); protected: static const int32_t RUN_TIME_SEC; public: virtual void doWork() = 0; virtual void run() { int64_t stopTime = MiscUtils::currentTimeMillis() + 1000 * RUN_TIME_SEC; try { while ((int64_t)MiscUtils::currentTimeMillis() < stopTime) doWork(); } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } }; const int32_t TimedThread::RUN_TIME_SEC = 6; class IndexerThread : public TimedThread { public: IndexerThread(SynchronizePtr lock, DirectoryPtr dir1, DirectoryPtr dir2) { this->lock = lock; this->dir1 = dir1; this->dir2 = dir2; this->nextID = 0; this->random = newLucene(); } virtual ~IndexerThread() { } LUCENE_CLASS(IndexerThread); public: DirectoryPtr dir1; DirectoryPtr dir2; SynchronizePtr lock; int32_t nextID; RandomPtr random; public: virtual void doWork() { IndexWriterPtr writer1 = newLucene(dir1, newLucene(), IndexWriter::MaxFieldLengthLIMITED); writer1->setMaxBufferedDocs(3); writer1->setMergeFactor(2); boost::dynamic_pointer_cast(writer1->getMergeScheduler())->setSuppressExceptions(); IndexWriterPtr writer2 = newLucene(dir2, newLucene(), IndexWriter::MaxFieldLengthLIMITED); // Intentionally use different params so flush/merge happen at different times writer2->setMaxBufferedDocs(2); writer2->setMergeFactor(3); boost::dynamic_pointer_cast(writer2->getMergeScheduler())->setSuppressExceptions(); update(writer1); update(writer2); doFail = true; bool continueWork = true; LuceneException finally; try { SyncLock syncLock(lock); try { writer1->prepareCommit(); } catch (...) { writer1->rollback(); writer2->rollback(); continueWork = false; } try { writer2->prepareCommit(); } catch (...) { writer1->rollback(); writer2->rollback(); continueWork = false; } writer1->commit(); writer2->commit(); } catch (LuceneException& e) { finally = e; } doFail = false; finally.throwException(); if (!continueWork) return; writer1->close(); writer2->close(); } void update(IndexWriterPtr writer) { // Add 10 docs for (int32_t j = 0; j < 10; ++j) { DocumentPtr d = newLucene(); d->add(newLucene(L"id", StringUtils::toString(nextID++), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene(L"contents", intToEnglish(random->nextInt()), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } // Delete 5 docs int32_t deleteID = nextID - 1; for (int32_t j = 0; j < 5; ++j) { writer->deleteDocuments(newLucene(L"id", StringUtils::toString(deleteID))); deleteID -= 2; } } }; class SearcherThread : public TimedThread { public: SearcherThread(SynchronizePtr lock, DirectoryPtr dir1, DirectoryPtr dir2) { this->lock = lock; this->dir1 = dir1; this->dir2 = dir2; } virtual ~SearcherThread() { } LUCENE_CLASS(SearcherThread); protected: DirectoryPtr dir1; DirectoryPtr dir2; SynchronizePtr lock; public: virtual void doWork() { IndexReaderPtr r1; IndexReaderPtr r2; { SyncLock syncLock(lock); r1 = IndexReader::open(dir1, true); r2 = IndexReader::open(dir2, true); } if (r1->numDocs() != r2->numDocs()) BOOST_FAIL("doc counts differ"); r1->close(); r2->close(); } }; DECLARE_SHARED_PTR(RandomFailure) class RandomFailure : public MockDirectoryFailure { public: RandomFailure() { random = newLucene(); } virtual ~RandomFailure() { } protected: RandomPtr random; public: virtual void eval(MockRAMDirectoryPtr dir) { if (doFail && random->nextInt() % 10 <= 3) boost::throw_exception(IOException(L"now failing randomly but on purpose")); } }; static void initIndex(DirectoryPtr dir) { IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); RandomPtr random = newLucene(); for (int32_t j = 0; j < 7; ++j) { DocumentPtr d = newLucene(); d->add(newLucene(L"contents", intToEnglish(random->nextInt()), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } writer->close(); } BOOST_AUTO_TEST_CASE(testTransactions) { MockRAMDirectoryPtr dir1 = newLucene(); MockRAMDirectoryPtr dir2 = newLucene(); dir1->setPreventDoubleWrite(false); dir2->setPreventDoubleWrite(false); dir1->failOn(newLucene()); dir2->failOn(newLucene()); initIndex(dir1); initIndex(dir2); Collection threads = Collection::newInstance(3); int32_t numThread = 0; SynchronizePtr lock = newInstance(); IndexerThreadPtr indexerThread = newLucene(lock, dir1, dir2); threads[numThread++] = indexerThread; indexerThread->start(); SearcherThreadPtr searcherThread1 = newLucene(lock, dir1, dir2); threads[numThread++] = searcherThread1; searcherThread1->start(); SearcherThreadPtr searcherThread2 = newLucene(lock, dir1, dir2); threads[numThread++] = searcherThread2; searcherThread2->start(); for (int32_t i = 0; i < numThread; ++i) threads[i]->join(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/index/WordlistLoaderTest.cpp000066400000000000000000000031631217574114600244750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "WordlistLoader.h" #include "StringReader.h" #include "BufferedReader.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(WordlistLoaderTest, LuceneTestFixture) static void checkSet(HashSet wordset) { BOOST_CHECK_EQUAL(3, wordset.size()); BOOST_CHECK(wordset.contains(L"ONE")); // case is not modified BOOST_CHECK(!wordset.contains(L"one")); // case is not modified BOOST_CHECK(wordset.contains(L"two")); // surrounding whitespace is removed BOOST_CHECK(wordset.contains(L"three")); BOOST_CHECK(!wordset.contains(L"four")); } BOOST_AUTO_TEST_CASE(testWordlistLoading) { String s = L"ONE\n two \nthree"; HashSet wordSet1 = WordlistLoader::getWordSet(newLucene(s)); checkSet(wordSet1); HashSet wordSet2 = WordlistLoader::getWordSet(newLucene(newLucene(s))); checkSet(wordSet2); } BOOST_AUTO_TEST_CASE(testComments) { String s = L"ONE\n two \nthree\n#comment"; HashSet wordSet1 = WordlistLoader::getWordSet(newLucene(s), L"#"); checkSet(wordSet1); BOOST_CHECK(!wordSet1.contains(L"#comment")); BOOST_CHECK(!wordSet1.contains(L"comment")); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/main/000077500000000000000000000000001217574114600200055ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/main/main.cpp000066400000000000000000000040571217574114600214430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifdef _WIN32 #pragma once #include "targetver.h" #define WIN32_LEAN_AND_MEAN #define NOMINMAX #include #endif #include "TestUtils.h" #include "MiscUtils.h" #include "FileUtils.h" #include "StringUtils.h" #define BOOST_TEST_MODULE "Lucene" #define BOOST_TEST_NO_MAIN #include #include using namespace Lucene; int main(int argc, char* argv[]) { String testDir; uint64_t startTime = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < argc; ++i) { if (strncmp(argv[i], "--test_dir", 9) == 0) { String testParam = StringUtils::toUnicode(argv[i]); Collection vals = StringUtils::split(testParam, L"="); if (vals.size() == 2) { testDir = vals[1]; boost::replace_all(testDir, L"\"", L""); boost::trim(testDir); break; } } } if (testDir.empty()) { testDir = L"../../src/test/testfiles"; if (!FileUtils::isDirectory(testDir)) { testDir = L"../src/test/testfiles"; if (!FileUtils::isDirectory(testDir)) testDir = L"./src/test/testfiles"; } } if (!FileUtils::isDirectory(testDir)) { std::wcout << L"Test directory not found. (override default by using --test_dir=\"./src/test/testfiles\")\n"; return 1; } setTestDir(testDir); int testMain = boost::unit_test::unit_test_main(init_unit_test_suite, argc, argv); std::wcout << L"*** Test duration: " << (MiscUtils::currentTimeMillis() - startTime) / 1000 << L" sec\n"; return testMain; } LucenePlusPlus-rel_3.0.4/src/test/msvc/000077500000000000000000000000001217574114600200315ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/msvc/TestInc.cpp000066400000000000000000000005501217574114600221060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" LucenePlusPlus-rel_3.0.4/src/test/msvc/lucene_tester.vcproj000066400000000000000000000755201217574114600241300ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/queryparser/000077500000000000000000000000001217574114600214435ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/queryparser/MultiAnalyzerTest.cpp000066400000000000000000000231101217574114600256040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "QueryParser.h" #include "Analyzer.h" #include "StandardTokenizer.h" #include "LowerCaseFilter.h" #include "TokenFilter.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "TypeAttribute.h" #include "Query.h" using namespace Lucene; /// Test QueryParser's ability to deal with Analyzers that return more than one token per /// position or that return tokens with a position increment > 1. BOOST_FIXTURE_TEST_SUITE(MultiAnalyzerTest, BaseTokenStreamFixture) static int32_t multiToken = 0; DECLARE_SHARED_PTR(MultiAnalyzer) DECLARE_SHARED_PTR(TestFilter) DECLARE_SHARED_PTR(DumbQueryWrapper) DECLARE_SHARED_PTR(DumbQueryParser) DECLARE_SHARED_PTR(TestPosIncrementFilter) class TestFilter : public TokenFilter { public: TestFilter(TokenStreamPtr in) : TokenFilter(in) { prevStartOffset = 0; prevEndOffset = 0; termAtt = addAttribute(); posIncrAtt = addAttribute(); offsetAtt = addAttribute(); typeAtt = addAttribute(); } virtual ~TestFilter() { } LUCENE_CLASS(TestFilter); protected: String prevType; int32_t prevStartOffset; int32_t prevEndOffset; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; OffsetAttributePtr offsetAtt; TypeAttributePtr typeAtt; public: virtual bool incrementToken() { if (multiToken > 0) { termAtt->setTermBuffer(L"multi" + StringUtils::toString(multiToken + 1)); offsetAtt->setOffset(prevStartOffset, prevEndOffset); typeAtt->setType(prevType); posIncrAtt->setPositionIncrement(0); --multiToken; return true; } else { bool next = input->incrementToken(); if (!next) return false; prevType = typeAtt->type(); prevStartOffset = offsetAtt->startOffset(); prevEndOffset = offsetAtt->endOffset(); String text = termAtt->term(); if (text == L"triplemulti") { multiToken = 2; return true; } else if (text == L"multi") { multiToken = 1; return true; } else return true; } } }; class MultiAnalyzer : public Analyzer { public: virtual ~MultiAnalyzer() { } LUCENE_CLASS(MultiAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(LuceneVersion::LUCENE_CURRENT, reader); result = newLucene(result); result = newLucene(result); return result; } }; /// A very simple wrapper to prevent typeOf checks but uses the toString of the query it wraps. class DumbQueryWrapper : public Query { public: DumbQueryWrapper(QueryPtr q) { this->q = q; } virtual ~DumbQueryWrapper() { } LUCENE_CLASS(DumbQueryWrapper); protected: QueryPtr q; public: virtual String toString(const String& field) { return q->toString(field); } }; /// A very simple subclass of QueryParser class DumbQueryParser : public QueryParser { public: DumbQueryParser(const String& f, AnalyzerPtr a) : QueryParser(LuceneVersion::LUCENE_CURRENT, f, a) { } virtual ~DumbQueryParser() { } LUCENE_CLASS(DumbQueryParser); public: virtual QueryPtr getFieldQuery(const String& field, const String& queryText) { return newLucene(QueryParser::getFieldQuery(field, queryText)); } }; class TestPosIncrementFilter : public TokenFilter { public: TestPosIncrementFilter(TokenStreamPtr in) : TokenFilter(in) { termAtt = addAttribute(); posIncrAtt = addAttribute(); } virtual ~TestPosIncrementFilter() { } LUCENE_CLASS(TestPosIncrementFilter); protected: TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; public: virtual bool incrementToken() { while (input->incrementToken()) { if (termAtt->term() == L"the") { // stopword, do nothing } else if (termAtt->term() == L"quick") { posIncrAtt->setPositionIncrement(2); return true; } else { posIncrAtt->setPositionIncrement(1); return true; } } return false; } }; /// Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). /// Does not work correctly for input other than "the quick brown ...". class PosIncrementAnalyzer : public Analyzer { public: virtual ~PosIncrementAnalyzer() { } LUCENE_CLASS(PosIncrementAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(LuceneVersion::LUCENE_CURRENT, reader); result = newLucene(result); result = newLucene(result); return result; } }; BOOST_AUTO_TEST_CASE(testMultiAnalyzer) { QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"", newLucene()); // trivial, no multiple tokens BOOST_CHECK_EQUAL(L"foo", qp->parse(L"foo")->toString()); BOOST_CHECK_EQUAL(L"foo", qp->parse(L"\"foo\"")->toString()); BOOST_CHECK_EQUAL(L"foo foobar", qp->parse(L"foo foobar")->toString()); BOOST_CHECK_EQUAL(L"\"foo foobar\"", qp->parse(L"\"foo foobar\"")->toString()); BOOST_CHECK_EQUAL(L"\"foo foobar blah\"", qp->parse(L"\"foo foobar blah\"")->toString()); // two tokens at the same position BOOST_CHECK_EQUAL(L"(multi multi2) foo", qp->parse(L"multi foo")->toString()); BOOST_CHECK_EQUAL(L"foo (multi multi2)", qp->parse(L"foo multi")->toString()); BOOST_CHECK_EQUAL(L"(multi multi2) (multi multi2)", qp->parse(L"multi multi")->toString()); BOOST_CHECK_EQUAL(L"+(foo (multi multi2)) +(bar (multi multi2))", qp->parse(L"+(foo multi) +(bar multi)")->toString()); BOOST_CHECK_EQUAL(L"+(foo (multi multi2)) field:\"bar (multi multi2)\"", qp->parse(L"+(foo multi) field:\"bar multi\"")->toString()); // phrases BOOST_CHECK_EQUAL(L"\"(multi multi2) foo\"", qp->parse(L"\"multi foo\"")->toString()); BOOST_CHECK_EQUAL(L"\"foo (multi multi2)\"", qp->parse(L"\"foo multi\"")->toString()); BOOST_CHECK_EQUAL(L"\"foo (multi multi2) foobar (multi multi2)\"", qp->parse(L"\"foo multi foobar multi\"")->toString()); // fields BOOST_CHECK_EQUAL(L"(field:multi field:multi2) field:foo", qp->parse(L"field:multi field:foo")->toString()); BOOST_CHECK_EQUAL(L"field:\"(multi multi2) foo\"", qp->parse(L"field:\"multi foo\"")->toString()); // three tokens at one position BOOST_CHECK_EQUAL(L"triplemulti multi3 multi2", qp->parse(L"triplemulti")->toString()); BOOST_CHECK_EQUAL(L"foo (triplemulti multi3 multi2) foobar", qp->parse(L"foo triplemulti foobar")->toString()); // phrase with non-default slop BOOST_CHECK_EQUAL(L"\"(multi multi2) foo\"~10", qp->parse(L"\"multi foo\"~10")->toString()); // phrase with non-default boost BOOST_CHECK_EQUAL(L"\"(multi multi2) foo\"^2.0", qp->parse(L"\"multi foo\"^2")->toString()); // phrase after changing default slop qp->setPhraseSlop(99); BOOST_CHECK_EQUAL(L"\"(multi multi2) foo\"~99 bar", qp->parse(L"\"multi foo\" bar")->toString()); BOOST_CHECK_EQUAL(L"\"(multi multi2) foo\"~99 \"foo bar\"~2", qp->parse(L"\"multi foo\" \"foo bar\"~2")->toString()); qp->setPhraseSlop(0); // non-default operator qp->setDefaultOperator(QueryParser::AND_OPERATOR); BOOST_CHECK_EQUAL(L"+(multi multi2) +foo", qp->parse(L"multi foo")->toString()); } BOOST_AUTO_TEST_CASE(testMultiAnalyzerWithSubclassOfQueryParser) { DumbQueryParserPtr qp = newLucene(L"", newLucene()); qp->setPhraseSlop(99); // modified default slop // direct call to getFieldQuery to demonstrate difference between phrase and multiphrase with modified default slop BOOST_CHECK_EQUAL(L"\"foo bar\"~99", qp->getFieldQuery(L"", L"foo bar")->toString()); BOOST_CHECK_EQUAL(L"\"(multi multi2) bar\"~99", qp->getFieldQuery(L"", L"multi bar")->toString()); // ask subclass to parse phrase with modified default slop BOOST_CHECK_EQUAL(L"\"(multi multi2) foo\"~99 bar", qp->parse(L"\"multi foo\" bar")->toString()); } BOOST_AUTO_TEST_CASE(testPosIncrementAnalyzer) { QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_24, L"", newLucene()); BOOST_CHECK_EQUAL(L"quick brown", qp->parse(L"the quick brown")->toString()); BOOST_CHECK_EQUAL(L"\"quick brown\"", qp->parse(L"\"the quick brown\"")->toString()); BOOST_CHECK_EQUAL(L"quick brown fox", qp->parse(L"the quick brown fox")->toString()); BOOST_CHECK_EQUAL(L"\"quick brown fox\"", qp->parse(L"\"the quick brown fox\"")->toString()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/queryparser/MultiFieldQueryParserTest.cpp000066400000000000000000000361161217574114600272570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Analyzer.h" #include "LowerCaseTokenizer.h" #include "TokenFilter.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "MultiFieldQueryParser.h" #include "Query.h" #include "StandardAnalyzer.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(MultiFieldQueryParserTest, LuceneTestFixture) DECLARE_SHARED_PTR(TestAnalyzer) DECLARE_SHARED_PTR(TestFilter) /// Filter which discards the token 'stop' and which expands the token 'phrase' into 'phrase1 phrase2' class TestFilter : public TokenFilter { public: TestFilter(TokenStreamPtr in) : TokenFilter(in) { termAtt = addAttribute(); offsetAtt = addAttribute(); inPhrase = false; savedStart = 0; savedEnd = 0; } virtual ~TestFilter() { } LUCENE_CLASS(TestFilter); public: bool inPhrase; int32_t savedStart; int32_t savedEnd; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (inPhrase) { inPhrase = false; termAtt->setTermBuffer(L"phrase2"); offsetAtt->setOffset(savedStart, savedEnd); return true; } else { while (input->incrementToken()) { if (termAtt->term() == L"phrase") { inPhrase = true; savedStart = offsetAtt->startOffset(); savedEnd = offsetAtt->endOffset(); termAtt->setTermBuffer(L"phrase1"); offsetAtt->setOffset(savedStart, savedEnd); return true; } else if (termAtt->term() != L"stop") return true; } } return false; } }; class TestAnalyzer : public Analyzer { public: virtual ~TestAnalyzer() { } LUCENE_CLASS(TestAnalyzer); public: // Filters LowerCaseTokenizer with StopFilter. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(newLucene(reader)); } }; class EmptyTokenStream : public TokenStream { public: virtual ~EmptyTokenStream() { } LUCENE_CLASS(EmptyTokenStream); public: virtual bool incrementToken() { return false; } }; /// Return empty tokens for field "f1". class AnalyzerReturningNull : public Analyzer { public: AnalyzerReturningNull() { standardAnalyzer = newLucene(LuceneVersion::LUCENE_CURRENT); } virtual ~AnalyzerReturningNull() { } LUCENE_CLASS(AnalyzerReturningNull); protected: StandardAnalyzerPtr standardAnalyzer; public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { if (fieldName == L"f1") return newLucene(); else return standardAnalyzer->tokenStream(fieldName, reader); } }; /// verify parsing of query using a stopping analyzer static void checkStopQueryEquals(const String& qtxt, const String& expectedRes) { Collection fields = newCollection(L"b", L"t"); Collection occur = newCollection(BooleanClause::SHOULD, BooleanClause::SHOULD); TestAnalyzerPtr a = newLucene(); MultiFieldQueryParserPtr mfqp = newLucene(LuceneVersion::LUCENE_CURRENT, fields, a); QueryPtr q = mfqp->parse(qtxt); BOOST_CHECK_EQUAL(expectedRes, q->toString()); q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, qtxt, fields, occur, a); BOOST_CHECK_EQUAL(expectedRes, q->toString()); } /// test stop words arising for both the non static form, and for the corresponding static form (qtxt, fields[]). BOOST_AUTO_TEST_CASE(testStopwordsParsing) { checkStopQueryEquals(L"one", L"b:one t:one"); checkStopQueryEquals(L"one stop", L"b:one t:one"); checkStopQueryEquals(L"one (stop)", L"b:one t:one"); checkStopQueryEquals(L"one ((stop))", L"b:one t:one"); checkStopQueryEquals(L"stop", L""); checkStopQueryEquals(L"(stop)", L""); checkStopQueryEquals(L"((stop))", L""); } BOOST_AUTO_TEST_CASE(testSimple) { Collection fields = newCollection(L"b", L"t"); MultiFieldQueryParserPtr mfqp = newLucene(LuceneVersion::LUCENE_CURRENT, fields, newLucene(LuceneVersion::LUCENE_CURRENT)); QueryPtr q = mfqp->parse(L"one"); BOOST_CHECK_EQUAL(L"b:one t:one", q->toString()); q = mfqp->parse(L"one two"); BOOST_CHECK_EQUAL(L"(b:one t:one) (b:two t:two)", q->toString()); q = mfqp->parse(L"+one +two"); BOOST_CHECK_EQUAL(L"+(b:one t:one) +(b:two t:two)", q->toString()); q = mfqp->parse(L"+one -two -three"); BOOST_CHECK_EQUAL(L"+(b:one t:one) -(b:two t:two) -(b:three t:three)", q->toString()); q = mfqp->parse(L"one^2 two"); BOOST_CHECK_EQUAL(L"((b:one t:one)^2.0) (b:two t:two)", q->toString()); q = mfqp->parse(L"one~ two"); BOOST_CHECK_EQUAL(L"(b:one~0.5 t:one~0.5) (b:two t:two)", q->toString()); q = mfqp->parse(L"one~0.8 two^2"); BOOST_CHECK_EQUAL(L"(b:one~0.8 t:one~0.8) ((b:two t:two)^2.0)", q->toString()); q = mfqp->parse(L"one* two*"); BOOST_CHECK_EQUAL(L"(b:one* t:one*) (b:two* t:two*)", q->toString()); q = mfqp->parse(L"[a TO c] two"); BOOST_CHECK_EQUAL(L"(b:[a TO c] t:[a TO c]) (b:two t:two)", q->toString()); q = mfqp->parse(L"w?ldcard"); BOOST_CHECK_EQUAL(L"b:w?ldcard t:w?ldcard", q->toString()); q = mfqp->parse(L"\"foo bar\""); BOOST_CHECK_EQUAL(L"b:\"foo bar\" t:\"foo bar\"", q->toString()); q = mfqp->parse(L"\"aa bb cc\" \"dd ee\""); BOOST_CHECK_EQUAL(L"(b:\"aa bb cc\" t:\"aa bb cc\") (b:\"dd ee\" t:\"dd ee\")", q->toString()); q = mfqp->parse(L"\"foo bar\"~4"); BOOST_CHECK_EQUAL(L"b:\"foo bar\"~4 t:\"foo bar\"~4", q->toString()); q = mfqp->parse(L"b:\"foo bar\"~4"); BOOST_CHECK_EQUAL(L"b:\"foo bar\"~4", q->toString()); // make sure that terms which have a field are not touched q = mfqp->parse(L"one f:two"); BOOST_CHECK_EQUAL(L"(b:one t:one) f:two", q->toString()); // AND mode mfqp->setDefaultOperator(QueryParser::AND_OPERATOR); q = mfqp->parse(L"one two"); BOOST_CHECK_EQUAL(L"+(b:one t:one) +(b:two t:two)", q->toString()); q = mfqp->parse(L"\"aa bb cc\" \"dd ee\""); BOOST_CHECK_EQUAL(L"+(b:\"aa bb cc\" t:\"aa bb cc\") +(b:\"dd ee\" t:\"dd ee\")", q->toString()); } BOOST_AUTO_TEST_CASE(testBoostsSimple) { MapStringDouble boosts = MapStringDouble::newInstance(); boosts.put(L"b", 5.0); boosts.put(L"t", 10.0); Collection fields = newCollection(L"b", L"t"); MultiFieldQueryParserPtr mfqp = newLucene(LuceneVersion::LUCENE_CURRENT, fields, newLucene(LuceneVersion::LUCENE_CURRENT), boosts); // Check for simple QueryPtr q = mfqp->parse(L"one"); BOOST_CHECK_EQUAL(L"b:one^5.0 t:one^10.0", q->toString()); // Check for AND q = mfqp->parse(L"one AND two"); BOOST_CHECK_EQUAL(L"+(b:one^5.0 t:one^10.0) +(b:two^5.0 t:two^10.0)", q->toString()); // Check for OR q = mfqp->parse(L"one OR two"); BOOST_CHECK_EQUAL(L"(b:one^5.0 t:one^10.0) (b:two^5.0 t:two^10.0)", q->toString()); // Check for AND and a field q = mfqp->parse(L"one AND two AND foo:test"); BOOST_CHECK_EQUAL(L"+(b:one^5.0 t:one^10.0) +(b:two^5.0 t:two^10.0) +foo:test", q->toString()); q = mfqp->parse(L"one^3 AND two^4"); BOOST_CHECK_EQUAL(L"+((b:one^5.0 t:one^10.0)^3.0) +((b:two^5.0 t:two^10.0)^4.0)", q->toString()); } BOOST_AUTO_TEST_CASE(testStaticMethod1) { Collection fields = newCollection(L"b", L"t"); Collection queries = newCollection(L"one", L"two"); QueryPtr q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries, fields, newLucene(LuceneVersion::LUCENE_CURRENT)); BOOST_CHECK_EQUAL(L"b:one t:two", q->toString()); Collection queries2 = newCollection(L"+one", L"+two"); q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries2, fields, newLucene(LuceneVersion::LUCENE_CURRENT)); BOOST_CHECK_EQUAL(L"(+b:one) (+t:two)", q->toString()); Collection queries3 = newCollection(L"one", L"+two"); q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries3, fields, newLucene(LuceneVersion::LUCENE_CURRENT)); BOOST_CHECK_EQUAL(L"b:one (+t:two)", q->toString()); Collection queries4 = newCollection(L"one +more", L"+two"); q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries4, fields, newLucene(LuceneVersion::LUCENE_CURRENT)); BOOST_CHECK_EQUAL(L"(b:one +b:more) (+t:two)", q->toString()); Collection queries5 = newCollection(L"blah"); // expected exception, array length differs BOOST_CHECK_EXCEPTION(q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries5, fields, newLucene(LuceneVersion::LUCENE_CURRENT)), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); // check also with stop words for this static form (qtxts[], fields[]). TestAnalyzerPtr stopA = newLucene(); Collection queries6 = newCollection(L"((+stop))", L"+((stop))"); q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries6, fields, stopA); BOOST_CHECK_EQUAL(L"", q->toString()); Collection queries7 = newCollection(L"one ((+stop)) +more", L"+((stop)) +two"); q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries7, fields, stopA); BOOST_CHECK_EQUAL(L"(b:one +b:more) (+t:two)", q->toString()); } BOOST_AUTO_TEST_CASE(testStaticMethod2) { Collection fields = newCollection(L"b", L"t"); Collection flags = newCollection(BooleanClause::MUST, BooleanClause::MUST_NOT); QueryPtr q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, L"one", fields, flags, newLucene(LuceneVersion::LUCENE_CURRENT)); BOOST_CHECK_EQUAL(L"+b:one -t:one", q->toString()); q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, L"one two", fields, flags, newLucene(LuceneVersion::LUCENE_CURRENT)); BOOST_CHECK_EQUAL(L"+(b:one b:two) -(t:one t:two)", q->toString()); Collection flags2 = newCollection(BooleanClause::MUST); // expected exception, array length differs BOOST_CHECK_EXCEPTION(q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, L"blah", fields, flags2, newLucene(LuceneVersion::LUCENE_CURRENT)), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); } BOOST_AUTO_TEST_CASE(testStaticMethod3) { Collection queries = newCollection(L"one", L"two", L"three"); Collection fields = newCollection(L"f1", L"f2", L"f3"); Collection flags = newCollection(BooleanClause::MUST, BooleanClause::MUST_NOT, BooleanClause::SHOULD); QueryPtr q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries, fields, flags, newLucene(LuceneVersion::LUCENE_CURRENT)); BOOST_CHECK_EQUAL(L"+f1:one -f2:two f3:three", q->toString()); Collection flags2 = newCollection(BooleanClause::MUST); // expected exception, array length differs BOOST_CHECK_EXCEPTION(q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries, fields, flags2, newLucene(LuceneVersion::LUCENE_CURRENT)), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); } BOOST_AUTO_TEST_CASE(testStaticMethod3Old) { Collection queries = newCollection(L"one", L"two"); Collection fields = newCollection(L"b", L"t"); Collection flags = newCollection(BooleanClause::MUST, BooleanClause::MUST_NOT); QueryPtr q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries, fields, flags, newLucene(LuceneVersion::LUCENE_CURRENT)); BOOST_CHECK_EQUAL(L"+b:one -t:two", q->toString()); Collection flags2 = newCollection(BooleanClause::MUST); // expected exception, array length differs BOOST_CHECK_EXCEPTION(q = MultiFieldQueryParser::parse(LuceneVersion::LUCENE_CURRENT, queries, fields, flags2, newLucene(LuceneVersion::LUCENE_CURRENT)), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); } BOOST_AUTO_TEST_CASE(testAnalyzerReturningNull) { Collection fields = newCollection(L"f1", L"f2", L"f3"); MultiFieldQueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, fields, newLucene()); QueryPtr q = parser->parse(L"bla AND blo"); BOOST_CHECK_EQUAL(L"+(f2:bla f3:bla) +(f2:blo f3:blo)", q->toString()); // the following queries are not affected as their terms are not analyzed anyway q = parser->parse(L"bla*"); BOOST_CHECK_EQUAL(L"f1:bla* f2:bla* f3:bla*", q->toString()); q = parser->parse(L"bla~"); BOOST_CHECK_EQUAL(L"f1:bla~0.5 f2:bla~0.5 f3:bla~0.5", q->toString()); q = parser->parse(L"[a TO c]"); BOOST_CHECK_EQUAL(L"f1:[a TO c] f2:[a TO c] f3:[a TO c]", q->toString()); } BOOST_AUTO_TEST_CASE(testStopWordSearching) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); DirectoryPtr ramDir = newLucene(); IndexWriterPtr iw = newLucene(ramDir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"body", L"blah the footest blah", Field::STORE_NO, Field::INDEX_ANALYZED)); iw->addDocument(doc); iw->close(); MultiFieldQueryParserPtr mfqp = newLucene(LuceneVersion::LUCENE_CURRENT, newCollection(L"body"), analyzer); mfqp->setDefaultOperator(QueryParser::AND_OPERATOR); QueryPtr q = mfqp->parse(L"the footest"); IndexSearcherPtr is = newLucene(ramDir, true); Collection hits = is->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); is->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/queryparser/QueryParserTest.cpp000066400000000000000000001127021217574114600252740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "QueryParser.h" #include "WhitespaceAnalyzer.h" #include "KeywordAnalyzer.h" #include "Query.h" #include "SimpleAnalyzer.h" #include "TermQuery.h" #include "PhraseQuery.h" #include "FuzzyQuery.h" #include "PrefixQuery.h" #include "StandardAnalyzer.h" #include "WildcardQuery.h" #include "BooleanQuery.h" #include "TokenFilter.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "LowerCaseTokenizer.h" #include "MultiTermQuery.h" #include "TermRangeQuery.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "IndexSearcher.h" #include "Document.h" #include "Field.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "DateField.h" #include "Term.h" #include "StopAnalyzer.h" #include "StopFilter.h" #include "MatchAllDocsQuery.h" #include "IndexReader.h" #include "MiscUtils.h" using namespace Lucene; using namespace boost::posix_time; using namespace boost::gregorian; DECLARE_SHARED_PTR(TestAnalyzer) DECLARE_SHARED_PTR(TestFilter) DECLARE_SHARED_PTR(TestParser) /// Filter which discards the token 'stop' and which expands the token 'phrase' into 'phrase1 phrase2' class TestFilter : public TokenFilter { public: TestFilter(TokenStreamPtr in) : TokenFilter(in) { termAtt = addAttribute(); offsetAtt = addAttribute(); inPhrase = false; savedStart = 0; savedEnd = 0; } virtual ~TestFilter() { } LUCENE_CLASS(TestFilter); public: bool inPhrase; int32_t savedStart; int32_t savedEnd; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (inPhrase) { inPhrase = false; clearAttributes(); termAtt->setTermBuffer(L"phrase2"); offsetAtt->setOffset(savedStart, savedEnd); return true; } else { while (input->incrementToken()) { if (termAtt->term() == L"phrase") { inPhrase = true; savedStart = offsetAtt->startOffset(); savedEnd = offsetAtt->endOffset(); termAtt->setTermBuffer(L"phrase1"); offsetAtt->setOffset(savedStart, savedEnd); return true; } else if (termAtt->term() != L"stop") return true; } } return false; } }; class TestAnalyzer : public Analyzer { public: virtual ~TestAnalyzer() { } LUCENE_CLASS(TestAnalyzer); public: // Filters LowerCaseTokenizer with StopFilter. virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(newLucene(reader)); } }; class TestParser : public QueryParser { public: TestParser(const String& f, AnalyzerPtr a) : QueryParser(LuceneVersion::LUCENE_CURRENT, f, a) { } virtual ~TestParser() { } LUCENE_CLASS(TestParser); public: virtual QueryPtr getFuzzyQuery(const String& field, const String& termStr, double minSimilarity) { boost::throw_exception(QueryParserError(L"Fuzzy queries not allowed")); return QueryPtr(); } virtual QueryPtr getWildcardQuery(const String& field, const String& termStr) { boost::throw_exception(QueryParserError(L"Wildcard queries not allowed")); return QueryPtr(); } }; class QueryParserTestFixture : public LuceneTestFixture { public: QueryParserTestFixture() { originalMaxClauses = BooleanQuery::getMaxClauseCount(); DateTools::setDateOrder(DateTools::DATEORDER_LOCALE); } virtual ~QueryParserTestFixture() { BooleanQuery::setMaxClauseCount(originalMaxClauses); } protected: int32_t originalMaxClauses; QueryParserPtr getParser(AnalyzerPtr a) { if (!a) a = newLucene(); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", a); qp->setDefaultOperator(QueryParser::OR_OPERATOR); return qp; } QueryPtr getQuery(const String& query, AnalyzerPtr a) { return getParser(a)->parse(query); } void checkQueryEquals(const String& query, AnalyzerPtr a, const String& result) { QueryPtr q = getQuery(query, a); String s = q->toString(L"field"); if (s != result) BOOST_FAIL("Query \"" << StringUtils::toUTF8(query) << "\" yielded \"" << StringUtils::toUTF8(s) << "\", expecting \"" << StringUtils::toUTF8(result) << "\""); } void checkQueryEquals(QueryParserPtr qp, const String& field, const String& query, const String& result) { QueryPtr q = qp->parse(query); String s = q->toString(field); if (s != result) BOOST_FAIL("Query \"" << StringUtils::toUTF8(query) << "\" yielded \"" << StringUtils::toUTF8(s) << "\", expecting \"" << StringUtils::toUTF8(result) << "\""); } void checkParseException(const String& queryString) { BOOST_CHECK_EXCEPTION(getQuery(queryString, AnalyzerPtr()), QueryParserError, check_exception(LuceneException::QueryParser)); } void checkWildcardQueryEquals(const String& query, bool lowercase, const String& result, bool allowLeadingWildcard = false) { QueryParserPtr qp = getParser(AnalyzerPtr()); qp->setLowercaseExpandedTerms(lowercase); qp->setAllowLeadingWildcard(allowLeadingWildcard); QueryPtr q = qp->parse(query); String s = q->toString(L"field"); if (s != result) BOOST_FAIL("WildcardQuery \"" << StringUtils::toUTF8(query) << "\" yielded \"" << StringUtils::toUTF8(s) << "\", expecting \"" << StringUtils::toUTF8(result) << "\""); } void checkWildcardQueryEquals(const String& query, const String& result) { QueryParserPtr qp = getParser(AnalyzerPtr()); QueryPtr q = qp->parse(query); String s = q->toString(L"field"); if (s != result) BOOST_FAIL("WildcardQuery \"" << StringUtils::toUTF8(query) << "\" yielded \"" << StringUtils::toUTF8(s) << "\", expecting \"" << StringUtils::toUTF8(result) << "\""); } void checkEscapedQueryEquals(const String& query, AnalyzerPtr a, const String& result) { class TestableQueryParser : public QueryParser { public: using QueryParser::escape; }; String escapedQuery = TestableQueryParser::escape(query); if (escapedQuery != result) BOOST_FAIL("Query \"" << StringUtils::toUTF8(query) << "\" yielded \"" << StringUtils::toUTF8(escapedQuery) << "\", expecting \"" << StringUtils::toUTF8(result) << "\""); } QueryPtr getQueryDOA(const String& query, AnalyzerPtr a) { if (!a) a = newLucene(); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", a); qp->setDefaultOperator(QueryParser::AND_OPERATOR); return qp->parse(query); } void checkQueryEqualsDOA(const String& query, AnalyzerPtr a, const String& result) { QueryPtr q = getQueryDOA(query, a); String s = q->toString(L"field"); if (s != result) BOOST_FAIL("Query \"" << StringUtils::toUTF8(query) << "\" yielded \"" << StringUtils::toUTF8(s) << "\", expecting \"" << StringUtils::toUTF8(result) << "\""); } void addDateDoc(const String& content, boost::posix_time::ptime date, IndexWriterPtr iw) { DocumentPtr d = newLucene(); d->add(newLucene(L"f", content, Field::STORE_YES, Field::INDEX_ANALYZED)); d->add(newLucene(L"date", DateField::dateToString(date), Field::STORE_YES, Field::INDEX_ANALYZED)); iw->addDocument(d); } void checkHits(int32_t expected, const String& query, IndexSearcherPtr is) { QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"date", newLucene()); qp->setLocale(std::locale()); QueryPtr q = qp->parse(query); Collection hits = is->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(expected, hits.size()); } }; BOOST_FIXTURE_TEST_SUITE(QueryParserTest, QueryParserTestFixture) BOOST_AUTO_TEST_CASE(testSimple) { checkQueryEquals(L"term term term", AnalyzerPtr(), L"term term term"); const uint8_t term[] = {0x74, 0xc3, 0xbc, 0x72, 0x6d, 0x20, 0x74, 0x65, 0x72, 0x6d, 0x20, 0x74, 0x65, 0x72, 0x6d}; String termText = UTF8_TO_STRING(term); checkQueryEquals(termText, newLucene(), termText); const uint8_t umlaut[] = {0xc3, 0xbc, 0x6d, 0x6c, 0x61, 0x75, 0x74}; String umlautText = UTF8_TO_STRING(umlaut); checkQueryEquals(umlautText, newLucene(), umlautText); checkQueryEquals(L"\"\"", newLucene(), L""); checkQueryEquals(L"foo:\"\"", newLucene(), L"foo:"); checkQueryEquals(L"a AND b", AnalyzerPtr(), L"+a +b"); checkQueryEquals(L"(a AND b)", AnalyzerPtr(), L"+a +b"); checkQueryEquals(L"c OR (a AND b)", AnalyzerPtr(), L"c (+a +b)"); checkQueryEquals(L"a AND NOT b", AnalyzerPtr(), L"+a -b"); checkQueryEquals(L"a AND -b", AnalyzerPtr(), L"+a -b"); checkQueryEquals(L"a AND !b", AnalyzerPtr(), L"+a -b"); checkQueryEquals(L"a && b", AnalyzerPtr(), L"+a +b"); checkQueryEquals(L"a && ! b", AnalyzerPtr(), L"+a -b"); checkQueryEquals(L"a OR b", AnalyzerPtr(), L"a b"); checkQueryEquals(L"a || b", AnalyzerPtr(), L"a b"); checkQueryEquals(L"a OR !b", AnalyzerPtr(), L"a -b"); checkQueryEquals(L"a OR ! b", AnalyzerPtr(), L"a -b"); checkQueryEquals(L"a OR -b", AnalyzerPtr(), L"a -b"); checkQueryEquals(L"+term -term term", AnalyzerPtr(), L"+term -term term"); checkQueryEquals(L"foo:term AND field:anotherTerm", AnalyzerPtr(), L"+foo:term +anotherterm"); checkQueryEquals(L"term AND \"phrase phrase\"", AnalyzerPtr(), L"+term +\"phrase phrase\""); checkQueryEquals(L"\"hello there\"", AnalyzerPtr(), L"\"hello there\""); BOOST_CHECK(MiscUtils::typeOf(getQuery(L"a AND b", AnalyzerPtr()))); BOOST_CHECK(MiscUtils::typeOf(getQuery(L"hello", AnalyzerPtr()))); BOOST_CHECK(MiscUtils::typeOf(getQuery(L"\"hello there\"", AnalyzerPtr()))); checkQueryEquals(L"germ term^2.0", AnalyzerPtr(), L"germ term^2.0"); checkQueryEquals(L"(term)^2.0", AnalyzerPtr(), L"term^2.0"); checkQueryEquals(L"(germ term)^2.0", AnalyzerPtr(), L"(germ term)^2.0"); checkQueryEquals(L"term^2.0", AnalyzerPtr(), L"term^2.0"); checkQueryEquals(L"term^2", AnalyzerPtr(), L"term^2.0"); checkQueryEquals(L"\"germ term\"^2.0", AnalyzerPtr(), L"\"germ term\"^2.0"); checkQueryEquals(L"\"term germ\"^2", AnalyzerPtr(), L"\"term germ\"^2.0"); checkQueryEquals(L"(foo OR bar) AND (baz OR boo)", AnalyzerPtr(), L"+(foo bar) +(baz boo)"); checkQueryEquals(L"((a OR b) AND NOT c) OR d", AnalyzerPtr(), L"(+(a b) -c) d"); checkQueryEquals(L"+(apple \"steve jobs\") -(foo bar baz)", AnalyzerPtr(), L"+(apple \"steve jobs\") -(foo bar baz)"); checkQueryEquals(L"+title:(dog OR cat) -author:\"bob dole\"", AnalyzerPtr(), L"+(title:dog title:cat) -author:\"bob dole\""); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene(LuceneVersion::LUCENE_CURRENT)); // make sure OR is the default BOOST_CHECK_EQUAL(QueryParser::OR_OPERATOR, qp->getDefaultOperator()); qp->setDefaultOperator(QueryParser::AND_OPERATOR); BOOST_CHECK_EQUAL(QueryParser::AND_OPERATOR, qp->getDefaultOperator()); qp->setDefaultOperator(QueryParser::OR_OPERATOR); BOOST_CHECK_EQUAL(QueryParser::OR_OPERATOR, qp->getDefaultOperator()); } BOOST_AUTO_TEST_CASE(testPunct) { AnalyzerPtr a = newLucene(); checkQueryEquals(L"a&b", a, L"a&b"); checkQueryEquals(L"a&&b", a, L"a&&b"); checkQueryEquals(L".NET", a, L".NET"); } BOOST_AUTO_TEST_CASE(testSlop) { checkQueryEquals(L"\"term germ\"~2", AnalyzerPtr(), L"\"term germ\"~2"); checkQueryEquals(L"\"term germ\"~2 flork", AnalyzerPtr(), L"\"term germ\"~2 flork"); checkQueryEquals(L"\"term\"~2", AnalyzerPtr(), L"term"); checkQueryEquals(L"\" \"~2 germ", AnalyzerPtr(), L"germ"); checkQueryEquals(L"\"term germ\"~2^2", AnalyzerPtr(), L"\"term germ\"~2^2.0"); } BOOST_AUTO_TEST_CASE(testNumber) { // The numbers go away because SimpleAnalzyer ignores them checkQueryEquals(L"3", AnalyzerPtr(), L""); checkQueryEquals(L"term 1.0 1 2", AnalyzerPtr(), L"term"); checkQueryEquals(L"term term1 term2", AnalyzerPtr(), L"term term term"); AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkQueryEquals(L"3", a, L"3"); checkQueryEquals(L"term 1.0 1 2", a, L"term 1.0 1 2"); checkQueryEquals(L"term term1 term2", a, L"term term1 term2"); } BOOST_AUTO_TEST_CASE(testWildcard) { checkQueryEquals(L"term*", AnalyzerPtr(), L"term*"); checkQueryEquals(L"term*^2", AnalyzerPtr(), L"term*^2.0"); checkQueryEquals(L"term~", AnalyzerPtr(), L"term~0.5"); checkQueryEquals(L"term~0.7", AnalyzerPtr(), L"term~0.7"); checkQueryEquals(L"term~^2", AnalyzerPtr(), L"term~0.5^2.0"); checkQueryEquals(L"term^2~", AnalyzerPtr(), L"term~0.5^2.0"); checkQueryEquals(L"term*germ", AnalyzerPtr(), L"term*germ"); checkQueryEquals(L"term*germ^3", AnalyzerPtr(), L"term*germ^3.0"); BOOST_CHECK(MiscUtils::typeOf(getQuery(L"term*", AnalyzerPtr()))); BOOST_CHECK(MiscUtils::typeOf(getQuery(L"term*^2", AnalyzerPtr()))); BOOST_CHECK(MiscUtils::typeOf(getQuery(L"term~", AnalyzerPtr()))); BOOST_CHECK(MiscUtils::typeOf(getQuery(L"term~0.7", AnalyzerPtr()))); FuzzyQueryPtr fq = boost::dynamic_pointer_cast(getQuery(L"term~0.7", AnalyzerPtr())); BOOST_CHECK_CLOSE_FRACTION(0.7, fq->getMinSimilarity(), 0.1); BOOST_CHECK_EQUAL(FuzzyQuery::defaultPrefixLength, fq->getPrefixLength()); fq = boost::dynamic_pointer_cast(getQuery(L"term~", AnalyzerPtr())); BOOST_CHECK_CLOSE_FRACTION(0.5, fq->getMinSimilarity(), 0.1); BOOST_CHECK_EQUAL(FuzzyQuery::defaultPrefixLength, fq->getPrefixLength()); checkParseException(L"term~1.1"); // value > 1, throws exception BOOST_CHECK(MiscUtils::typeOf(getQuery(L"term*germ", AnalyzerPtr()))); // Tests to see that wild card terms are (or are not) properly lower-cased with propery parser configuration // First prefix queries // by default, convert to lowercase checkWildcardQueryEquals(L"Term*", true, L"term*"); // explicitly set lowercase checkWildcardQueryEquals(L"term*", true, L"term*"); checkWildcardQueryEquals(L"Term*", true, L"term*"); checkWildcardQueryEquals(L"TERM*", true, L"term*"); // explicitly disable lowercase conversion checkWildcardQueryEquals(L"term*", false, L"term*"); checkWildcardQueryEquals(L"Term*", false, L"Term*"); checkWildcardQueryEquals(L"TERM*", false, L"TERM*"); // Then 'full' wildcard queries // by default, convert to lowercase checkWildcardQueryEquals(L"Te?m", L"te?m"); // explicitly set lowercase checkWildcardQueryEquals(L"te?m", true, L"te?m"); checkWildcardQueryEquals(L"Te?m", true, L"te?m"); checkWildcardQueryEquals(L"TE?M", true, L"te?m"); checkWildcardQueryEquals(L"Te?m*gerM", true, L"te?m*germ"); // explicitly disable lowercase conversion checkWildcardQueryEquals(L"te?m", false, L"te?m"); checkWildcardQueryEquals(L"Te?m", false, L"Te?m"); checkWildcardQueryEquals(L"TE?M", false, L"TE?M"); checkWildcardQueryEquals(L"Te?m*gerM", false, L"Te?m*gerM"); // Fuzzy queries checkWildcardQueryEquals(L"Term~", L"term~0.5"); checkWildcardQueryEquals(L"Term~", true, L"term~0.5"); checkWildcardQueryEquals(L"Term~", false, L"Term~0.5"); // Range queries checkWildcardQueryEquals(L"[A TO C]", L"[a TO c]"); checkWildcardQueryEquals(L"[A TO C]", true, L"[a TO c]"); checkWildcardQueryEquals(L"[A TO C]", false, L"[A TO C]"); // Test suffix queries: first disallow BOOST_CHECK_EXCEPTION(checkWildcardQueryEquals(L"*Term", true, L"*term"), QueryParserError, check_exception(LuceneException::QueryParser)); BOOST_CHECK_EXCEPTION(checkWildcardQueryEquals(L"?Term", true, L"?term"), QueryParserError, check_exception(LuceneException::QueryParser)); // Test suffix queries: then allow checkWildcardQueryEquals(L"*Term", true, L"*term", true); checkWildcardQueryEquals(L"?Term", true, L"?term", true); } BOOST_AUTO_TEST_CASE(testLeadingWildcardType) { QueryParserPtr qp = getParser(AnalyzerPtr()); qp->setAllowLeadingWildcard(true); BOOST_CHECK(MiscUtils::typeOf(qp->parse(L"t*erm*"))); BOOST_CHECK(MiscUtils::typeOf(qp->parse(L"?term*"))); BOOST_CHECK(MiscUtils::typeOf(qp->parse(L"*term*"))); } BOOST_AUTO_TEST_CASE(testQPA) { AnalyzerPtr qpAnalyzer = newLucene(); checkQueryEquals(L"term term^3.0 term", qpAnalyzer, L"term term^3.0 term"); checkQueryEquals(L"term stop^3.0 term", qpAnalyzer, L"term term"); checkQueryEquals(L"term term term", qpAnalyzer, L"term term term"); checkQueryEquals(L"term +stop term", qpAnalyzer, L"term term"); checkQueryEquals(L"term -stop term", qpAnalyzer, L"term term"); checkQueryEquals(L"drop AND (stop) AND roll", qpAnalyzer, L"+drop +roll"); checkQueryEquals(L"term +(stop) term", qpAnalyzer, L"term term"); checkQueryEquals(L"term -(stop) term", qpAnalyzer, L"term term"); checkQueryEquals(L"drop AND stop AND roll", qpAnalyzer, L"+drop +roll"); checkQueryEquals(L"term phrase term", qpAnalyzer, L"term \"phrase1 phrase2\" term"); checkQueryEquals(L"term AND NOT phrase term", qpAnalyzer, L"+term -\"phrase1 phrase2\" term"); checkQueryEquals(L"stop^3", qpAnalyzer, L""); checkQueryEquals(L"stop", qpAnalyzer, L""); checkQueryEquals(L"(stop)^3", qpAnalyzer, L""); checkQueryEquals(L"((stop))^3", qpAnalyzer, L""); checkQueryEquals(L"(stop^3)", qpAnalyzer, L""); checkQueryEquals(L"((stop)^3)", qpAnalyzer, L""); checkQueryEquals(L"(stop)", qpAnalyzer, L""); checkQueryEquals(L"((stop))", qpAnalyzer, L""); BOOST_CHECK(MiscUtils::typeOf(getQuery(L"term term term", qpAnalyzer))); BOOST_CHECK(MiscUtils::typeOf(getQuery(L"term +stop", qpAnalyzer))); } BOOST_AUTO_TEST_CASE(testRange) { checkQueryEquals(L"[ a TO z]", AnalyzerPtr(), L"[a TO z]"); BOOST_CHECK_EQUAL(MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(), boost::dynamic_pointer_cast(getQuery(L"[ a TO z]", AnalyzerPtr()))->getRewriteMethod()); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene()); qp->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); BOOST_CHECK_EQUAL(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE(), boost::dynamic_pointer_cast(qp->parse(L"[ a TO z]"))->getRewriteMethod()); checkQueryEquals(L"[ a TO z ]", AnalyzerPtr(), L"[a TO z]"); checkQueryEquals(L"{ a TO z}", AnalyzerPtr(), L"{a TO z}"); checkQueryEquals(L"{ a TO z }", AnalyzerPtr(), L"{a TO z}"); checkQueryEquals(L"{ a TO z }^2.0", AnalyzerPtr(), L"{a TO z}^2.0"); checkQueryEquals(L"[ a TO z] OR bar", AnalyzerPtr(), L"[a TO z] bar"); checkQueryEquals(L"[ a TO z] AND bar", AnalyzerPtr(), L"+[a TO z] +bar"); checkQueryEquals(L"( bar blar { a TO z}) ", AnalyzerPtr(), L"bar blar {a TO z}"); checkQueryEquals(L"gack ( bar blar { a TO z}) ", AnalyzerPtr(), L"gack (bar blar {a TO z})"); } BOOST_AUTO_TEST_CASE(testLegacyDateRange) { DateTools::setDateOrder(DateTools::DATEORDER_DMY); checkQueryEquals(L"[01/02/02 TO 04/02/02]", AnalyzerPtr(), L"[0cx597uo0 TO 0cxayz9bz]"); checkQueryEquals(L"{01/02/02 04/02/02}", AnalyzerPtr(), L"{0cx597uo0 TO 0cx9jjeo0}"); } BOOST_AUTO_TEST_CASE(testDateRange) { DateTools::setDateOrder(DateTools::DATEORDER_DMY); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene()); // Don't set any date resolution and verify if DateField is used checkQueryEquals(qp, L"default", L"default:[01/02/02 TO 04/02/02]", L"[0cx597uo0 TO 0cxayz9bz]"); checkQueryEquals(qp, L"default", L"default:{01/02/02 TO 04/02/02}", L"{0cx597uo0 TO 0cx9jjeo0}"); // set a field specific date resolution qp->setDateResolution(L"month", DateTools::RESOLUTION_MONTH); // DateField should still be used for defaultField checkQueryEquals(qp, L"default", L"default:[01/02/02 TO 04/02/02]", L"[0cx597uo0 TO 0cxayz9bz]"); checkQueryEquals(qp, L"default", L"default:{01/02/02 TO 04/02/02}", L"{0cx597uo0 TO 0cx9jjeo0}"); // set default date resolution to MILLISECOND qp->setDateResolution(DateTools::RESOLUTION_MILLISECOND); // set second field specific date resolution qp->setDateResolution(L"hour", DateTools::RESOLUTION_HOUR); // for this field no field specific date resolution has been set, so verify if the default resolution is used checkQueryEquals(qp, L"default", L"default:[01/02/02 TO 04/02/02]", L"[20020201000000000 TO 20020204235959999]"); checkQueryEquals(qp, L"default", L"default:{01/02/02 TO 04/02/02}", L"{20020201000000000 TO 20020204000000000}"); // verify if field specific date resolutions are used for these two fields checkQueryEquals(qp, L"month", L"month:[01/02/02 TO 04/02/02]", L"[200202 TO 200202]"); checkQueryEquals(qp, L"month", L"month:{01/02/02 TO 04/02/02}", L"{200202 TO 200202}"); checkQueryEquals(qp, L"hour", L"hour:[01/02/02 TO 04/02/02]", L"[2002020100 TO 2002020423]"); checkQueryEquals(qp, L"hour", L"hour:{01/02/02 TO 04/02/02}", L"{2002020100 TO 2002020400}"); } BOOST_AUTO_TEST_CASE(testEscaped) { AnalyzerPtr a = newLucene(); checkQueryEquals(L"\\a", a, L"a"); checkQueryEquals(L"a\\-b:c", a, L"a-b:c"); checkQueryEquals(L"a\\+b:c", a, L"a+b:c"); checkQueryEquals(L"a\\:b:c", a, L"a:b:c"); checkQueryEquals(L"a\\\\b:c", a, L"a\\b:c"); checkQueryEquals(L"a:b\\-c", a, L"a:b-c"); checkQueryEquals(L"a:b\\+c", a, L"a:b+c"); checkQueryEquals(L"a:b\\:c", a, L"a:b:c"); checkQueryEquals(L"a:b\\\\c", a, L"a:b\\c"); checkQueryEquals(L"a:b\\-c*", a, L"a:b-c*"); checkQueryEquals(L"a:b\\+c*", a, L"a:b+c*"); checkQueryEquals(L"a:b\\:c*", a, L"a:b:c*"); checkQueryEquals(L"a:b\\\\c*", a, L"a:b\\c*"); checkQueryEquals(L"a:b\\-?c", a, L"a:b-?c"); checkQueryEquals(L"a:b\\+?c", a, L"a:b+?c"); checkQueryEquals(L"a:b\\:?c", a, L"a:b:?c"); checkQueryEquals(L"a:b\\\\?c", a, L"a:b\\?c"); checkQueryEquals(L"a:b\\-c~", a, L"a:b-c~0.5"); checkQueryEquals(L"a:b\\+c~", a, L"a:b+c~0.5"); checkQueryEquals(L"a:b\\:c~", a, L"a:b:c~0.5"); checkQueryEquals(L"a:b\\\\c~", a, L"a:b\\c~0.5"); checkQueryEquals(L"[ a\\- TO a\\+ ]", AnalyzerPtr(), L"[a- TO a+]"); checkQueryEquals(L"[ a\\: TO a\\~ ]", AnalyzerPtr(), L"[a: TO a~]"); checkQueryEquals(L"[ a\\\\ TO a\\* ]", AnalyzerPtr(), L"[a\\ TO a*]"); checkQueryEquals(L"[\"c\\:\\\\temp\\\\\\~foo0.txt\" TO \"c\\:\\\\temp\\\\\\~foo9.txt\"]", a, L"[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]"); checkQueryEquals(L"a\\\\\\+b", a, L"a\\+b"); checkQueryEquals(L"a \\\"b c\\\" d", a, L"a \"b c\" d"); checkQueryEquals(L"\"a \\\"b c\\\" d\"", a, L"\"a \"b c\" d\""); checkQueryEquals(L"\"a \\+b c d\"", a, L"\"a +b c d\""); checkQueryEquals(L"c\\:\\\\temp\\\\\\~foo.txt", a, L"c:\\temp\\~foo.txt"); checkParseException(L"XY\\"); // there must be a character after the escape char // test unicode escaping checkQueryEquals(L"a\\u0062c", a, L"abc"); checkQueryEquals(L"XY\\u005a", a, L"XYZ"); checkQueryEquals(L"XY\\u005A", a, L"XYZ"); checkQueryEquals(L"\"a \\\\\\u0028\\u0062\\\" c\"", a, L"\"a \\(b\" c\""); checkParseException(L"XY\\u005G"); // test non-hex character in escaped unicode sequence checkParseException(L"XY\\u005"); // test incomplete escaped unicode sequence checkQueryEquals(L"(item:\\\\ item:ABCD\\\\)", a, L"item:\\ item:ABCD\\"); checkParseException(L"(item:\\\\ item:ABCD\\\\))"); // unmatched closing parenthesis checkQueryEquals(L"\\*", a, L"*"); checkQueryEquals(L"\\\\", a, L"\\"); // escaped backslash checkParseException(L"\\"); // a backslash must always be escaped checkQueryEquals(L"(\"a\\\\\") or (\"b\")", a, L"a\\ or b"); } BOOST_AUTO_TEST_CASE(testQueryStringEscaping) { AnalyzerPtr a = newLucene(); checkEscapedQueryEquals(L"a-b:c", a, L"a\\-b\\:c"); checkEscapedQueryEquals(L"a+b:c", a, L"a\\+b\\:c"); checkEscapedQueryEquals(L"a:b:c", a, L"a\\:b\\:c"); checkEscapedQueryEquals(L"a\\b:c", a, L"a\\\\b\\:c"); checkEscapedQueryEquals(L"a:b-c", a, L"a\\:b\\-c"); checkEscapedQueryEquals(L"a:b+c", a, L"a\\:b\\+c"); checkEscapedQueryEquals(L"a:b:c", a, L"a\\:b\\:c"); checkEscapedQueryEquals(L"a:b\\c", a, L"a\\:b\\\\c"); checkEscapedQueryEquals(L"a:b-c*", a, L"a\\:b\\-c\\*"); checkEscapedQueryEquals(L"a:b+c*", a, L"a\\:b\\+c\\*"); checkEscapedQueryEquals(L"a:b:c*", a, L"a\\:b\\:c\\*"); checkEscapedQueryEquals(L"a:b\\\\c*", a, L"a\\:b\\\\\\\\c\\*"); checkEscapedQueryEquals(L"a:b-?c", a, L"a\\:b\\-\\?c"); checkEscapedQueryEquals(L"a:b+?c", a, L"a\\:b\\+\\?c"); checkEscapedQueryEquals(L"a:b:?c", a, L"a\\:b\\:\\?c"); checkEscapedQueryEquals(L"a:b?c", a, L"a\\:b\\?c"); checkEscapedQueryEquals(L"a:b-c~", a, L"a\\:b\\-c\\~"); checkEscapedQueryEquals(L"a:b+c~", a, L"a\\:b\\+c\\~"); checkEscapedQueryEquals(L"a:b:c~", a, L"a\\:b\\:c\\~"); checkEscapedQueryEquals(L"a:b\\c~", a, L"a\\:b\\\\c\\~"); checkEscapedQueryEquals(L"[ a - TO a+ ]", AnalyzerPtr(), L"\\[ a \\- TO a\\+ \\]"); checkEscapedQueryEquals(L"[ a : TO a~ ]", AnalyzerPtr(), L"\\[ a \\: TO a\\~ \\]"); checkEscapedQueryEquals(L"[ a\\ TO a* ]", AnalyzerPtr(), L"\\[ a\\\\ TO a\\* \\]"); checkEscapedQueryEquals(L"|| abc ||", a, L"\\|\\| abc \\|\\|"); checkEscapedQueryEquals(L"&& abc &&", a, L"\\&\\& abc \\&\\&"); } BOOST_AUTO_TEST_CASE(testTabNewlineCarriageReturn) { checkQueryEqualsDOA(L"+weltbank +worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"+weltbank\n+worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"weltbank \n+worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"weltbank \n +worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"+weltbank\r+worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"weltbank \r+worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"weltbank \r +worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"+weltbank\r\n+worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"weltbank \r\n+worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"weltbank \r\n +worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"weltbank \r \n +worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"+weltbank\t+worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"weltbank \t+worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); checkQueryEqualsDOA(L"weltbank \t +worlbank", AnalyzerPtr(), L"+weltbank +worlbank"); } BOOST_AUTO_TEST_CASE(testSimpleDAO) { checkQueryEqualsDOA(L"term term term", AnalyzerPtr(), L"+term +term +term"); checkQueryEqualsDOA(L"term +term term", AnalyzerPtr(), L"+term +term +term"); checkQueryEqualsDOA(L"term term +term", AnalyzerPtr(), L"+term +term +term"); checkQueryEqualsDOA(L"term +term +term", AnalyzerPtr(), L"+term +term +term"); checkQueryEqualsDOA(L"-term term term", AnalyzerPtr(), L"-term +term +term"); } BOOST_AUTO_TEST_CASE(testBoost) { HashSet stopWords = HashSet::newInstance(); stopWords.add(L"on"); StandardAnalyzerPtr oneStopAnalyzer = newLucene(LuceneVersion::LUCENE_CURRENT, stopWords); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", oneStopAnalyzer); QueryPtr q = qp->parse(L"on^1.0"); BOOST_CHECK(q); q = qp->parse(L"\"hello\"^2.0"); BOOST_CHECK(q); BOOST_CHECK_CLOSE_FRACTION(q->getBoost(), 2.0, 0.5); q = qp->parse(L"hello^2.0"); BOOST_CHECK(q); BOOST_CHECK_CLOSE_FRACTION(q->getBoost(), 2.0, 0.5); q = qp->parse(L"\"on\"^1.0"); BOOST_CHECK(q); QueryParserPtr qp2 = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene(LuceneVersion::LUCENE_CURRENT)); q = qp2->parse(L"the^3"); // "the" is a stop word so the result is an empty query BOOST_CHECK(q); BOOST_CHECK(q->toString().empty()); BOOST_CHECK_CLOSE_FRACTION(1.0, q->getBoost(), 0.01); } BOOST_AUTO_TEST_CASE(testException) { checkParseException(L"\"some phrase"); checkParseException(L"(foo bar"); checkParseException(L"foo bar))"); checkParseException(L"field:term:with:colon some more terms"); checkParseException(L"(sub query)^5.0^2.0 plus more"); checkParseException(L"secret AND illegal) AND access:confidential"); } BOOST_AUTO_TEST_CASE(testCustomQueryParserWildcard) { BOOST_CHECK_EXCEPTION(newLucene(L"contents", newLucene())->parse(L"a?t"), QueryParserError, check_exception(LuceneException::QueryParser)); } BOOST_AUTO_TEST_CASE(testCustomQueryParserFuzzy) { BOOST_CHECK_EXCEPTION(newLucene(L"contents", newLucene())->parse(L"xunit~"), QueryParserError, check_exception(LuceneException::QueryParser)); } BOOST_AUTO_TEST_CASE(testBooleanQuery) { BooleanQuery::setMaxClauseCount(2); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene()); // too many boolean clauses, so ParseException is expected BOOST_CHECK_EXCEPTION(qp->parse(L"one two three"), QueryParserError, check_exception(LuceneException::QueryParser)); } BOOST_AUTO_TEST_CASE(testPrecedence) { QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene()); QueryPtr query1 = qp->parse(L"A AND B OR C AND D"); QueryPtr query2 = qp->parse(L"+A +B +C +D"); BOOST_CHECK(query1->equals(query2)); } BOOST_AUTO_TEST_CASE(testLocalDateFormat) { DateTools::setDateOrder(DateTools::DATEORDER_DMY); RAMDirectoryPtr ramDir = newLucene(); IndexWriterPtr iw = newLucene(ramDir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDateDoc(L"a", ptime(date(2005, Dec, 2), hours(10) + minutes(15) + seconds(33)), iw); addDateDoc(L"a", ptime(date(2005, Dec, 4), hours(22) + minutes(15) + seconds(00)), iw); iw->close(); IndexSearcherPtr is = newLucene(ramDir, true); checkHits(1, L"[1/12/2005 TO 3/12/2005]", is); checkHits(2, L"[1/12/2005 TO 4/12/2005]", is); checkHits(1, L"[3/12/2005 TO 4/12/2005]", is); checkHits(1, L"{1/12/2005 TO 3/12/2005}", is); checkHits(1, L"{1/12/2005 TO 4/12/2005}", is); checkHits(0, L"{3/12/2005 TO 4/12/2005}", is); is->close(); } namespace TestStarParsing { DECLARE_SHARED_PTR(StarParser) class StarParser : public QueryParser { public: StarParser(const String& f, AnalyzerPtr a) : QueryParser(LuceneVersion::LUCENE_CURRENT, f, a) { type = Collection::newInstance(1); } virtual ~StarParser() { } LUCENE_CLASS(StarParser); public: Collection type; public: virtual QueryPtr getWildcardQuery(const String& field, const String& termStr) { // override error checking of superclass type[0] = 1; return newLucene(newLucene(field, termStr)); } virtual QueryPtr getPrefixQuery(const String& field, const String& termStr) { // override error checking of superclass type[0] = 2; return newLucene(newLucene(field, termStr)); } virtual QueryPtr getFieldQuery(const String& field, const String& queryText) { type[0] = 3; return QueryParser::getFieldQuery(field, queryText); } }; } BOOST_AUTO_TEST_CASE(testStarParsing) { TestStarParsing::StarParserPtr qp = newLucene(L"field", newLucene()); TermQueryPtr tq = boost::dynamic_pointer_cast(qp->parse(L"foo:zoo*")); BOOST_CHECK_EQUAL(L"zoo", tq->getTerm()->text()); BOOST_CHECK_EQUAL(2, qp->type[0]); tq = boost::dynamic_pointer_cast(qp->parse(L"foo:zoo*^2")); BOOST_CHECK_EQUAL(L"zoo", tq->getTerm()->text()); BOOST_CHECK_EQUAL(2, qp->type[0]); BOOST_CHECK_EQUAL(tq->getBoost(), 2); tq = boost::dynamic_pointer_cast(qp->parse(L"foo:*")); BOOST_CHECK_EQUAL(L"*", tq->getTerm()->text()); BOOST_CHECK_EQUAL(1, qp->type[0]); // could be a valid prefix query in the future too tq = boost::dynamic_pointer_cast(qp->parse(L"foo:*^2")); BOOST_CHECK_EQUAL(L"*", tq->getTerm()->text()); BOOST_CHECK_EQUAL(1, qp->type[0]); BOOST_CHECK_EQUAL(tq->getBoost(), 2); tq = boost::dynamic_pointer_cast(qp->parse(L"*:foo")); BOOST_CHECK_EQUAL(L"*", tq->getTerm()->field()); BOOST_CHECK_EQUAL(L"foo", tq->getTerm()->text()); BOOST_CHECK_EQUAL(3, qp->type[0]); tq = boost::dynamic_pointer_cast(qp->parse(L"*:*")); BOOST_CHECK_EQUAL(L"*", tq->getTerm()->field()); BOOST_CHECK_EQUAL(L"*", tq->getTerm()->text()); BOOST_CHECK_EQUAL(1, qp->type[0]); // could be handled as a prefix query in the future tq = boost::dynamic_pointer_cast(qp->parse(L"(*:*)")); BOOST_CHECK_EQUAL(L"*", tq->getTerm()->field()); BOOST_CHECK_EQUAL(L"*", tq->getTerm()->text()); BOOST_CHECK_EQUAL(1, qp->type[0]); } BOOST_AUTO_TEST_CASE(testStopwords) { QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"a", newLucene(LuceneVersion::LUCENE_CURRENT, StopFilter::makeStopSet(newCollection(L"the", L"foo")))); QueryPtr result = qp->parse(L"a:the OR a:foo"); BOOST_CHECK(result); BOOST_CHECK(MiscUtils::typeOf(result)); BOOST_CHECK(boost::dynamic_pointer_cast(result)->getClauses().empty()); result = qp->parse(L"a:woo OR a:the"); BOOST_CHECK(result); BOOST_CHECK(MiscUtils::typeOf(result)); result = qp->parse(L"(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)"); BOOST_CHECK(result); BOOST_CHECK(MiscUtils::typeOf(result)); BOOST_CHECK_EQUAL(boost::dynamic_pointer_cast(result)->getClauses().size(), 2); } BOOST_AUTO_TEST_CASE(testPositionIncrement) { QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"a", newLucene(LuceneVersion::LUCENE_CURRENT, StopFilter::makeStopSet(newCollection(L"the", L"in", L"are", L"this")))); qp->setEnablePositionIncrements(true); String qtxt = L"\"the words in positions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 Collection expectedPositions = newCollection(1, 3, 4, 6, 9); PhraseQueryPtr pq = boost::dynamic_pointer_cast(qp->parse(qtxt)); Collection t = pq->getTerms(); Collection pos = pq->getPositions(); for (int32_t i = 0; i < t.size(); ++i) BOOST_CHECK_EQUAL(expectedPositions[i], pos[i]); } BOOST_AUTO_TEST_CASE(testMatchAllDocs) { QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene()); BOOST_CHECK(newLucene()->equals(qp->parse(L"*:*"))); BOOST_CHECK(newLucene()->equals(qp->parse(L"(*:*)"))); BooleanQueryPtr bq = boost::dynamic_pointer_cast(qp->parse(L"+*:* -*:*")); BOOST_CHECK(MiscUtils::typeOf(bq->getClauses()[0]->getQuery())); BOOST_CHECK(MiscUtils::typeOf(bq->getClauses()[1]->getQuery())); } BOOST_AUTO_TEST_CASE(testPositionIncrements) { DirectoryPtr dir = newLucene(); AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); IndexWriterPtr w = newLucene(dir, a, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"f", L"the wizard of ozzy", Field::STORE_NO, Field::INDEX_ANALYZED)); w->addDocument(doc); IndexReaderPtr r = w->getReader(); w->close(); IndexSearcherPtr s = newLucene(r); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"f", a); QueryPtr q = qp->parse(L"\"wizard of ozzy\""); BOOST_CHECK_EQUAL(1, s->search(q, 1)->totalHits); r->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/000077500000000000000000000000001217574114600203265ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/search/BaseTestRangeFilterFixture.cpp000066400000000000000000000052561217574114600262460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTestRangeFilterFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "Random.h" namespace Lucene { TestIndex::TestIndex(int32_t minR, int32_t maxR, bool allowNegativeRandomInts) { this->minR = minR; this->maxR = maxR; this->allowNegativeRandomInts = allowNegativeRandomInts; this->index = newLucene(); } TestIndex::~TestIndex() { } BaseTestRangeFilterFixture::BaseTestRangeFilterFixture() { signedIndex = newLucene(INT_MAX, INT_MIN, true); unsignedIndex = newLucene(INT_MAX, 0, false); minId = 0; maxId = 10000; intLength = StringUtils::toString(INT_MAX).length(); random = newLucene(); build(signedIndex); build(unsignedIndex); } BaseTestRangeFilterFixture::~BaseTestRangeFilterFixture() { } void BaseTestRangeFilterFixture::build(TestIndexPtr index) { IndexWriterPtr writer = newLucene(index->index, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t d = minId; d <= maxId; ++d) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", pad(d), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); int32_t r = index->allowNegativeRandomInts ? random->nextInt() : random->nextInt(INT_MAX); index->maxR = std::max(index->maxR, r); index->minR = std::min(index->minR, r); doc->add(newLucene(L"rand", pad(r), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"body", L"body", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->optimize(); writer->close(); } String BaseTestRangeFilterFixture::pad(int32_t n) { StringStream buf; String p = L"0"; if (n < 0) { p = L"-"; n = INT_MAX + n + 1; } buf << p; String s = StringUtils::toString(n); for (int32_t i = s.length(); i <= intLength; ++i) buf << L"0"; buf << s; return buf.str(); } } LucenePlusPlus-rel_3.0.4/src/test/search/BaseTestRangeFilterTest.cpp000066400000000000000000000016771217574114600255420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTestRangeFilterFixture.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BaseTestRangeFilterTest, BaseTestRangeFilterFixture) BOOST_AUTO_TEST_CASE(testPad) { Collection tests = newCollection(-9999999, -99560, -100, -1, 0, 3, 9, 10, 1000, 999999999); for (int32_t i = 0; i < tests.size() - 1; ++i) { int32_t a = tests[i]; int32_t b = tests[i + 1]; String aa = pad(a); String bb = pad(b); BOOST_CHECK_EQUAL(aa.length(), bb.length()); BOOST_CHECK(aa.compare(bb) < 0); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/Boolean2Test.cpp000066400000000000000000000233701217574114600233400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexSearcher.h" #include "IndexReader.h" #include "MockRAMDirectory.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexWriter.h" #include "TopScoreDocCollector.h" #include "TopFieldCollector.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "CheckHits.h" #include "QueryParser.h" #include "DefaultSimilarity.h" #include "BooleanQuery.h" #include "TermQuery.h" #include "Term.h" #include "WildcardQuery.h" #include "Sort.h" #include "QueryUtils.h" #include "PrefixQuery.h" #include "Random.h" using namespace Lucene; /// Test BooleanQuery2 against BooleanQuery by overriding the standard query parser. /// This also tests the scoring order of BooleanQuery. class Boolean2Fixture : public LuceneTestFixture { public: Boolean2Fixture() { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); docFields = newCollection(L"w1 w2 w3 w4 w5", L"w1 w3 w2 w3", L"w1 xx w2 yy w3", L"w1 w3 xx w2 yy w3"); for (int32_t i = 0; i < docFields.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(field, docFields[i], Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); searcher = newLucene(directory, true); // Make big index dir2 = newLucene(directory); // First multiply small test index mulFactor = 1; int32_t docCount = 0; do { DirectoryPtr copy = newLucene(dir2); IndexWriterPtr w = newLucene(dir2, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); w->addIndexesNoOptimize(newCollection(copy)); docCount = w->maxDoc(); w->close(); mulFactor *= 2; } while (docCount < 3000); IndexWriterPtr w = newLucene(dir2, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field2", L"xxx", Field::STORE_NO, Field::INDEX_ANALYZED)); for (int32_t i = 0; i addDocument(doc); doc = newLucene(); doc->add(newLucene(L"field2", L"big bad bug", Field::STORE_NO, Field::INDEX_ANALYZED)); for (int32_t i = 0; i addDocument(doc); // optimize to 1 segment w->optimize(); reader = w->getReader(); w->close(); bigSearcher = newLucene(reader); } virtual ~Boolean2Fixture() { reader->close(); dir2->close(); } protected: IndexSearcherPtr searcher; IndexSearcherPtr bigSearcher; IndexReaderPtr reader; DirectoryPtr dir2; int32_t mulFactor; Collection docFields; public: static const int32_t NUM_EXTRA_DOCS; static const String field; public: QueryPtr makeQuery(const String& queryText) { return newLucene(LuceneVersion::LUCENE_CURRENT, field, newLucene())->parse(queryText); } void queriesTest(const String& queryText, Collection expDocNrs) { QueryPtr query1 = makeQuery(queryText); TopScoreDocCollectorPtr collector = TopScoreDocCollector::create(1000, false); searcher->search(query1, FilterPtr(), collector); Collection hits1 = collector->topDocs()->scoreDocs; QueryPtr query2 = makeQuery(queryText); // there should be no need to parse again... collector = TopScoreDocCollector::create(1000, true); searcher->search(query2, FilterPtr(), collector); Collection hits2 = collector->topDocs()->scoreDocs; BOOST_CHECK_EQUAL(mulFactor * collector->getTotalHits(), bigSearcher->search(query1, 1)->totalHits); CheckHits::checkHitsQuery(query2, hits1, hits2, expDocNrs); } /// Random rnd is passed in so that the exact same random query may be created more than once. BooleanQueryPtr randBoolQuery(RandomPtr rnd, bool allowMust, int32_t level, const String& field, Collection vals) { BooleanQueryPtr current = newLucene(rnd->nextInt() < 0); for (int32_t i = 0; i < rnd->nextInt(vals.size()) + 1; ++i) { int32_t qType = 0; // term query if (level > 0) qType = rnd->nextInt(10); QueryPtr q; if (qType < 3) q = newLucene(newLucene(field, vals[rnd->nextInt(vals.size())])); else if (qType < 7) q = newLucene(newLucene(field, L"w*")); else q = randBoolQuery(rnd, allowMust, level - 1, field, vals); int32_t r = rnd->nextInt(10); BooleanClause::Occur occur = BooleanClause::SHOULD; if (r < 2) occur = BooleanClause::MUST_NOT; else if (r < 5) { if (allowMust) occur = BooleanClause::MUST; else occur = BooleanClause::SHOULD; } current->add(q, occur); } return current; } }; const String Boolean2Fixture::field = L"field"; const int32_t Boolean2Fixture::NUM_EXTRA_DOCS = 6000; BOOST_FIXTURE_TEST_SUITE(Boolean2Test, Boolean2Fixture) BOOST_AUTO_TEST_CASE(testQueries01) { String queryText = L"+w3 +xx"; Collection expDocNrs = newCollection(2, 3); queriesTest(queryText, expDocNrs); } BOOST_AUTO_TEST_CASE(testQueries02) { String queryText = L"+w3 xx"; Collection expDocNrs = newCollection(2, 3, 1, 0); queriesTest(queryText, expDocNrs); } BOOST_AUTO_TEST_CASE(testQueries03) { String queryText = L"w3 xx"; Collection expDocNrs = newCollection(2, 3, 1, 0); queriesTest(queryText, expDocNrs); } BOOST_AUTO_TEST_CASE(testQueries04) { String queryText = L"w3 -xx"; Collection expDocNrs = newCollection(1, 0); queriesTest(queryText, expDocNrs); } BOOST_AUTO_TEST_CASE(testQueries05) { String queryText = L"+w3 -xx"; Collection expDocNrs = newCollection(1, 0); queriesTest(queryText, expDocNrs); } BOOST_AUTO_TEST_CASE(testQueries06) { String queryText = L"+w3 -xx -w5"; Collection expDocNrs = newCollection(1); queriesTest(queryText, expDocNrs); } BOOST_AUTO_TEST_CASE(testQueries07) { String queryText = L"-w3 -xx -w5"; Collection expDocNrs = Collection::newInstance(); queriesTest(queryText, expDocNrs); } BOOST_AUTO_TEST_CASE(testQueries08) { String queryText = L"+w3 xx -w5"; Collection expDocNrs = newCollection(2, 3, 1); queriesTest(queryText, expDocNrs); } BOOST_AUTO_TEST_CASE(testQueries09) { String queryText = L"+w3 +xx +w2 zz"; Collection expDocNrs = newCollection(2, 3); queriesTest(queryText, expDocNrs); } namespace TestQueries10 { class OverlapSimilarity : public DefaultSimilarity { public: virtual ~OverlapSimilarity() { } public: virtual double coord(int32_t overlap, int32_t maxOverlap) { return (double)overlap / ((double)maxOverlap - 1.0); } }; } BOOST_AUTO_TEST_CASE(testQueries10) { String queryText = L"+w3 +xx +w2 zz"; Collection expDocNrs = newCollection(2, 3); searcher->setSimilarity(newLucene()); queriesTest(queryText, expDocNrs); } BOOST_AUTO_TEST_CASE(testRandomQueries) { RandomPtr rnd = newLucene(17); Collection vals = newCollection(L"w1", L"w2", L"w3", L"w4", L"w5", L"xx", L"yy", L"zzz"); int32_t tot = 0; // increase number of iterations for more complete testing for (int32_t i = 0; i < 1000; ++i) { int32_t level = rnd->nextInt(3); BooleanQueryPtr q1 = randBoolQuery(rnd, rnd->nextInt() % 2 == 0, level, field, vals); // Can't sort by relevance since floating point numbers may not quite match up. SortPtr sort = Sort::INDEXORDER(); QueryUtils::check(q1, searcher); TopFieldCollectorPtr collector = TopFieldCollector::create(sort, 1000, false, true, true, true); searcher->search(q1, FilterPtr(), collector); Collection hits1 = collector->topDocs()->scoreDocs; collector = TopFieldCollector::create(sort, 1000, false, true, true, false); searcher->search(q1, FilterPtr(), collector); Collection hits2 = collector->topDocs()->scoreDocs; tot += hits2.size(); CheckHits::checkEqual(q1, hits1, hits2); BooleanQueryPtr q3 = newLucene(); q3->add(q1, BooleanClause::SHOULD); q3->add(newLucene(newLucene(L"field2", L"b")), BooleanClause::SHOULD); TopDocsPtr hits4 = bigSearcher->search(q3, 1); BOOST_CHECK_EQUAL(mulFactor * collector->getTotalHits() + NUM_EXTRA_DOCS / 2, hits4->totalHits); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/BooleanMinShouldMatchTest.cpp000066400000000000000000000364701217574114600260630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "BooleanQuery.h" #include "TermQuery.h" #include "Term.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "QueryUtils.h" #include "WildcardQuery.h" #include "Random.h" using namespace Lucene; class BooleanMinShouldMatchFixture : public LuceneTestFixture { public: BooleanMinShouldMatchFixture() { Collection data = newCollection( L"A 1 2 3 4 5 6", L"Z 4 5 6", L"", L"B 2 4 5 6", L"Y 3 5 6", L"", L"C 3 6", L"X 4 5 6" ); index = newLucene(); IndexWriterPtr writer = newLucene(index, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < data.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"all", L"all", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); if (!data[i].empty()) doc->add(newLucene(L"data", data[i], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->optimize(); writer->close(); r = IndexReader::open(index, true); s = newLucene(r); } virtual ~BooleanMinShouldMatchFixture() { } public: DirectoryPtr index; IndexReaderPtr r; IndexSearcherPtr s; public: void verifyNrHits(QueryPtr q, int32_t expected) { Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(expected, h.size()); QueryUtils::check(q, s); } /// Random rnd is passed in so that the exact same random query may be created more than once. BooleanQueryPtr randBoolQuery(RandomPtr rnd, bool allowMust, int32_t level, const String& field, Collection vals) { BooleanQueryPtr current = newLucene(rnd->nextInt() < 0); for (int32_t i = 0; i < rnd->nextInt(vals.size()) + 1; ++i) { int32_t qType = 0; // term query if (level > 0) qType = rnd->nextInt(10); QueryPtr q; if (qType < 3) q = newLucene(newLucene(field, vals[rnd->nextInt(vals.size())])); else if (qType < 7) q = newLucene(newLucene(field, L"w*")); else q = randBoolQuery(rnd, allowMust, level - 1, field, vals); int32_t r = rnd->nextInt(10); BooleanClause::Occur occur = BooleanClause::SHOULD; if (r < 2) occur = BooleanClause::MUST_NOT; else if (r < 5) { if (allowMust) occur = BooleanClause::MUST; else occur = BooleanClause::SHOULD; } current->add(q, occur); } return current; } void minNrCB(RandomPtr rnd, BooleanQueryPtr q) { Collection c = q->getClauses(); int32_t opt = 0; for (int32_t i = 0; i < c.size(); ++i) { if (c[i]->getOccur() == BooleanClause::SHOULD) ++opt; } q->setMinimumNumberShouldMatch(rnd->nextInt(opt + 2)); } }; BOOST_FIXTURE_TEST_SUITE(BooleanMinShouldMatchTest, BooleanMinShouldMatchFixture) BOOST_AUTO_TEST_CASE(testAllOptional) { BooleanQueryPtr q = newLucene(); for (int32_t i = 1; i <= 4; ++i) q->add(newLucene(newLucene(L"data", StringUtils::toString(i))), BooleanClause::SHOULD); q->setMinimumNumberShouldMatch(2); // match at least two of 4 verifyNrHits(q, 2); } BOOST_AUTO_TEST_CASE(testOneReqAndSomeOptional) { // one required, some optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"all", L"all")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"5")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"4")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::SHOULD); q->setMinimumNumberShouldMatch(2); // 2 of 3 optional verifyNrHits(q, 5); } BOOST_AUTO_TEST_CASE(testSomeReqAndSomeOptional) { // two required, some optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"all", L"all")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"6")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"5")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"4")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::SHOULD); q->setMinimumNumberShouldMatch(2); // 2 of 3 optional verifyNrHits(q, 5); } BOOST_AUTO_TEST_CASE(testOneProhibAndSomeOptional) { // one prohibited, some optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"data", L"1")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::MUST_NOT); q->add(newLucene(newLucene(L"data", L"4")), BooleanClause::SHOULD); q->setMinimumNumberShouldMatch(2); // 2 of 3 optional verifyNrHits(q, 1); } BOOST_AUTO_TEST_CASE(testSomeProhibAndSomeOptional) { // two prohibited, some optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"data", L"1")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::MUST_NOT); q->add(newLucene(newLucene(L"data", L"4")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"C")), BooleanClause::MUST_NOT); q->setMinimumNumberShouldMatch(2); // 2 of 3 optional verifyNrHits(q, 1); } BOOST_AUTO_TEST_CASE(testOneReqOneProhibAndSomeOptional) { // one required, one prohibited, some optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"data", L"6")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"5")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"4")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::MUST_NOT); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"1")), BooleanClause::SHOULD); q->setMinimumNumberShouldMatch(3); // 3 of 4 optional verifyNrHits(q, 1); } BOOST_AUTO_TEST_CASE(testSomeReqOneProhibAndSomeOptional) { // two required, one prohibited, some optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"all", L"all")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"6")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"5")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"4")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::MUST_NOT); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"1")), BooleanClause::SHOULD); q->setMinimumNumberShouldMatch(3); // 3 of 4 optional verifyNrHits(q, 1); } BOOST_AUTO_TEST_CASE(testOneReqSomeProhibAndSomeOptional) { // one required, two prohibited, some optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"data", L"6")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"5")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"4")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::MUST_NOT); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"1")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"C")), BooleanClause::MUST_NOT); q->setMinimumNumberShouldMatch(3); // 3 of 4 optional verifyNrHits(q, 1); } BOOST_AUTO_TEST_CASE(testSomeReqSomeProhibAndSomeOptional) { // two required, two prohibited, some optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"all", L"all")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"6")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"5")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"4")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::MUST_NOT); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"1")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"C")), BooleanClause::MUST_NOT); q->setMinimumNumberShouldMatch(3); // 3 of 4 optional verifyNrHits(q, 1); } BOOST_AUTO_TEST_CASE(testMinHigherThenNumOptional) { // two required, two prohibited, some optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"all", L"all")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"6")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"5")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"4")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::MUST_NOT); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"1")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"C")), BooleanClause::MUST_NOT); q->setMinimumNumberShouldMatch(90); // 90 of 4 optional verifyNrHits(q, 0); } BOOST_AUTO_TEST_CASE(testMinEqualToNumOptional) { // two required, two optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"all", L"all")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"6")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::SHOULD); q->setMinimumNumberShouldMatch(2); // 2 of 2 optional verifyNrHits(q, 1); } BOOST_AUTO_TEST_CASE(testOneOptionalEqualToMin) { // two required, one optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"all", L"all")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"3")), BooleanClause::SHOULD); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::MUST); q->setMinimumNumberShouldMatch(1); // 1 of 1 optional verifyNrHits(q, 1); } BOOST_AUTO_TEST_CASE(testNoOptionalButMin) { // two required, no optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"all", L"all")), BooleanClause::MUST); q->add(newLucene(newLucene(L"data", L"2")), BooleanClause::MUST); q->setMinimumNumberShouldMatch(1); // 1 of 0 optional verifyNrHits(q, 0); } BOOST_AUTO_TEST_CASE(testNoOptionalButMin2) { // one required, no optional BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"all", L"all")), BooleanClause::MUST); q->setMinimumNumberShouldMatch(1); // 1 of 0 optional verifyNrHits(q, 0); } BOOST_AUTO_TEST_CASE(testRandomQueries) { RandomPtr rnd = newLucene(17); String field = L"data"; Collection vals = Collection::newInstance(); vals.add(L"1"); vals.add(L"2"); vals.add(L"3"); vals.add(L"4"); vals.add(L"5"); vals.add(L"6"); vals.add(L"A"); vals.add(L"Z"); vals.add(L"B"); vals.add(L"Y"); vals.add(L"Z"); vals.add(L"X"); vals.add(L"foo"); int32_t maxLev = 4; // increase number of iterations for more complete testing for (int32_t i = 0; i < 1000; ++i) { int32_t lev = rnd->nextInt(maxLev); int32_t seed = rnd->nextInt(); RandomPtr rndQuery = newLucene(); rndQuery->setSeed(seed); BooleanQueryPtr q1 = randBoolQuery(rndQuery, true, lev, field, vals); rndQuery->setSeed(seed); BooleanQueryPtr q2 = randBoolQuery(rndQuery, true, lev, field, vals); // only set minimumNumberShouldMatch on the top level query since setting at a lower level can change the score. minNrCB(rnd, q2); // Can't use Hits because normalized scores will mess things up. // The non-sorting version of search() that returns TopDocs will not normalize scores. TopDocsPtr top1 = s->search(q1, FilterPtr(), 100); TopDocsPtr top2 = s->search(q2, FilterPtr(), 100); QueryUtils::check(q1, s); QueryUtils::check(q2, s); // The constrained query should be a superset to the unconstrained query. BOOST_CHECK(top2->totalHits <= top1->totalHits); for (int32_t hit = 0; hit < top2->totalHits; ++hit) { int32_t id = top2->scoreDocs[hit]->doc; double score = top2->scoreDocs[hit]->score; bool found = false; // find this doc in other hits for (int32_t other = 0; other < top1->totalHits; ++other) { if (top1->scoreDocs[other]->doc == id) { found = true; double otherScore = top1->scoreDocs[other]->score; // check if scores match BOOST_CHECK_CLOSE_FRACTION(otherScore, score, 1.0e-6f); } } // check if subset BOOST_CHECK(found); } } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/BooleanOrTest.cpp000066400000000000000000000110461217574114600235540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TermQuery.h" #include "IndexSearcher.h" #include "Term.h" #include "BooleanQuery.h" #include "QueryUtils.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "TopDocs.h" using namespace Lucene; class BooleanOrFixture : public LuceneTestFixture { public: BooleanOrFixture() { t1 = newLucene(newLucene(FIELD_T, L"files")); t2 = newLucene(newLucene(FIELD_T, L"deleting")); c1 = newLucene(newLucene(FIELD_C, L"production")); c2 = newLucene(newLucene(FIELD_C, L"optimize")); RAMDirectoryPtr rd = newLucene(); IndexWriterPtr writer = newLucene(rd, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d = newLucene(); d->add(newLucene(FIELD_T, L"Optimize not deleting all files", Field::STORE_YES, Field::INDEX_ANALYZED)); d->add(newLucene(FIELD_C, L"Deleted when I run an optimize in our production environment.", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d); writer->close(); searcher = newLucene(rd, true); } virtual ~BooleanOrFixture() { } protected: static const String FIELD_T; static const String FIELD_C; TermQueryPtr t1; TermQueryPtr t2; TermQueryPtr c1; TermQueryPtr c2; IndexSearcherPtr searcher; public: int32_t search(QueryPtr q) { QueryUtils::check(q, searcher); return searcher->search(q, FilterPtr(), 1000)->totalHits; } }; const String BooleanOrFixture::FIELD_T = L"T"; const String BooleanOrFixture::FIELD_C = L"C"; BOOST_FIXTURE_TEST_SUITE(BooleanOrTest, BooleanOrFixture) BOOST_AUTO_TEST_CASE(testElements) { BOOST_CHECK_EQUAL(1, search(t1)); BOOST_CHECK_EQUAL(1, search(t2)); BOOST_CHECK_EQUAL(1, search(c1)); BOOST_CHECK_EQUAL(1, search(c2)); } BOOST_AUTO_TEST_CASE(testFlat) { BooleanQueryPtr q = newLucene(); q->add(newLucene(t1, BooleanClause::SHOULD)); q->add(newLucene(t2, BooleanClause::SHOULD)); q->add(newLucene(c1, BooleanClause::SHOULD)); q->add(newLucene(c2, BooleanClause::SHOULD)); BOOST_CHECK_EQUAL(1, search(q)); } BOOST_AUTO_TEST_CASE(testParenthesisMust) { BooleanQueryPtr q3 = newLucene(); q3->add(newLucene(t1, BooleanClause::SHOULD)); q3->add(newLucene(t2, BooleanClause::SHOULD)); BooleanQueryPtr q4 = newLucene(); q4->add(newLucene(c1, BooleanClause::MUST)); q4->add(newLucene(c2, BooleanClause::MUST)); BooleanQueryPtr q2 = newLucene(); q2->add(q3, BooleanClause::SHOULD); q2->add(q4, BooleanClause::SHOULD); BOOST_CHECK_EQUAL(1, search(q2)); } BOOST_AUTO_TEST_CASE(testParenthesisMust2) { BooleanQueryPtr q3 = newLucene(); q3->add(newLucene(t1, BooleanClause::SHOULD)); q3->add(newLucene(t2, BooleanClause::SHOULD)); BooleanQueryPtr q4 = newLucene(); q4->add(newLucene(c1, BooleanClause::SHOULD)); q4->add(newLucene(c2, BooleanClause::SHOULD)); BooleanQueryPtr q2 = newLucene(); q2->add(q3, BooleanClause::SHOULD); q2->add(q4, BooleanClause::MUST); BOOST_CHECK_EQUAL(1, search(q2)); } BOOST_AUTO_TEST_CASE(testParenthesisShould) { BooleanQueryPtr q3 = newLucene(); q3->add(newLucene(t1, BooleanClause::SHOULD)); q3->add(newLucene(t2, BooleanClause::SHOULD)); BooleanQueryPtr q4 = newLucene(); q4->add(newLucene(c1, BooleanClause::SHOULD)); q4->add(newLucene(c2, BooleanClause::SHOULD)); BooleanQueryPtr q2 = newLucene(); q2->add(q3, BooleanClause::SHOULD); q2->add(q4, BooleanClause::SHOULD); BOOST_CHECK_EQUAL(1, search(q2)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/BooleanPrefixQueryTest.cpp000066400000000000000000000047661217574114600254720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "PrefixQuery.h" #include "Term.h" #include "BooleanQuery.h" #include "ConstantScoreQuery.h" #include "Filter.h" #include "DocIdSetIterator.h" #include "DocIdSet.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BooleanPrefixQueryTest, LuceneTestFixture) static int32_t getCount(IndexReaderPtr r, QueryPtr q) { if (MiscUtils::typeOf(q)) return boost::dynamic_pointer_cast(q)->getClauses().size(); else if (MiscUtils::typeOf(q)) { DocIdSetIteratorPtr iter = boost::dynamic_pointer_cast(q)->getFilter()->getDocIdSet(r)->iterator(); int32_t count = 0; while (iter->nextDoc() != DocIdSetIterator::NO_MORE_DOCS) ++count; return count; } else { BOOST_FAIL("unexpected query"); return 0; } } BOOST_AUTO_TEST_CASE(testMethod) { RAMDirectoryPtr directory = newLucene(); Collection categories = newCollection(L"food", L"foodanddrink", L"foodanddrinkandgoodtimes", L"food and drink"); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < categories.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"category", categories[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); IndexReaderPtr reader = IndexReader::open(directory, true); PrefixQueryPtr query = newLucene(newLucene(L"category", L"foo")); QueryPtr rw1 = query->rewrite(reader); BooleanQueryPtr bq = newLucene(); bq->add(query, BooleanClause::MUST); QueryPtr rw2 = bq->rewrite(reader); BOOST_CHECK_EQUAL(getCount(reader, rw1), getCount(reader, rw2)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/BooleanQueryTest.cpp000066400000000000000000000111731217574114600243020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "BooleanQuery.h" #include "TermQuery.h" #include "Term.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "PhraseQuery.h" #include "DisjunctionMaxQuery.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BooleanQueryTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testEquality) { BooleanQueryPtr bq1 = newLucene(); bq1->add(newLucene(newLucene(L"field", L"value1")), BooleanClause::SHOULD); bq1->add(newLucene(newLucene(L"field", L"value2")), BooleanClause::SHOULD); BooleanQueryPtr nested1 = newLucene(); nested1->add(newLucene(newLucene(L"field", L"nestedvalue1")), BooleanClause::SHOULD); nested1->add(newLucene(newLucene(L"field", L"nestedvalue2")), BooleanClause::SHOULD); bq1->add(nested1, BooleanClause::SHOULD); BooleanQueryPtr bq2 = newLucene(); bq2->add(newLucene(newLucene(L"field", L"value1")), BooleanClause::SHOULD); bq2->add(newLucene(newLucene(L"field", L"value2")), BooleanClause::SHOULD); BooleanQueryPtr nested2 = newLucene(); nested2->add(newLucene(newLucene(L"field", L"nestedvalue1")), BooleanClause::SHOULD); nested2->add(newLucene(newLucene(L"field", L"nestedvalue2")), BooleanClause::SHOULD); bq2->add(nested2, BooleanClause::SHOULD); BOOST_CHECK(bq1->equals(bq2)); } BOOST_AUTO_TEST_CASE(testException) { BOOST_CHECK_EXCEPTION(BooleanQuery::setMaxClauseCount(0), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); } BOOST_AUTO_TEST_CASE(testNullOrSubScorer) { DirectoryPtr dir = newLucene(); IndexWriterPtr w = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"a b c d", Field::STORE_NO, Field::INDEX_ANALYZED)); w->addDocument(doc); IndexReaderPtr r = w->getReader(); IndexSearcherPtr s = newLucene(r); BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"field", L"a")), BooleanClause::SHOULD); double score = s->search(q, 10)->getMaxScore(); QueryPtr subQuery = newLucene(newLucene(L"field", L"not_in_index")); subQuery->setBoost(0); q->add(subQuery, BooleanClause::SHOULD); double score2 = s->search(q, 10)->getMaxScore(); BOOST_CHECK_CLOSE_FRACTION(score * 0.5, score2, 1e-6); BooleanQueryPtr qq = boost::dynamic_pointer_cast(q->clone()); PhraseQueryPtr phrase = newLucene(); phrase->add(newLucene(L"field", L"not_in_index")); phrase->add(newLucene(L"field", L"another_not_in_index")); phrase->setBoost(0); qq->add(phrase, BooleanClause::SHOULD); score2 = s->search(qq, 10)->getMaxScore(); BOOST_CHECK_CLOSE_FRACTION(score * (1.0 / 3), score2, 1e-6); // now test BooleanScorer2 subQuery = newLucene(newLucene(L"field", L"b")); subQuery->setBoost(0); q->add(subQuery, BooleanClause::MUST); score2 = s->search(q, 10)->getMaxScore(); BOOST_CHECK_CLOSE_FRACTION(score * (2.0 / 3), score2, 1e-6); // PhraseQuery with no terms added returns a null scorer PhraseQueryPtr pq = newLucene(); q->add(pq, BooleanClause::SHOULD); BOOST_CHECK_EQUAL(1, s->search(q, 10)->totalHits); // A required clause which returns null scorer should return null scorer to IndexSearcher. q = newLucene(); pq = newLucene(); q->add(newLucene(newLucene(L"field", L"a")), BooleanClause::SHOULD); q->add(pq, BooleanClause::MUST); BOOST_CHECK_EQUAL(0, s->search(q, 10)->totalHits); DisjunctionMaxQueryPtr dmq = newLucene(1.0); dmq->add(newLucene(newLucene(L"field", L"a"))); dmq->add(pq); BOOST_CHECK_EQUAL(1, s->search(dmq, 10)->totalHits); r->close(); w->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/BooleanScorerTest.cpp000066400000000000000000000071451217574114600244360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "BooleanQuery.h" #include "TermQuery.h" #include "Term.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "Similarity.h" #include "BooleanScorer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BooleanScorerTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testMethod) { static const String FIELD = L"category"; RAMDirectoryPtr directory = newLucene(); Collection values = newCollection(L"1", L"2", L"3", L"4"); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < values.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD, values[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); BooleanQueryPtr booleanQuery1 = newLucene(); booleanQuery1->add(newLucene(newLucene(FIELD, L"1")), BooleanClause::SHOULD); booleanQuery1->add(newLucene(newLucene(FIELD, L"2")), BooleanClause::SHOULD); BooleanQueryPtr query = newLucene(); query->add(booleanQuery1, BooleanClause::MUST); query->add(newLucene(newLucene(FIELD, L"9")), BooleanClause::MUST_NOT); IndexSearcherPtr indexSearcher = newLucene(directory, true); Collection hits = indexSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); } namespace TestEmptyBucketWithMoreDocs { class EmptyScorer : public Scorer { public: EmptyScorer(SimilarityPtr similarity) : Scorer(similarity) { doc = -1; } virtual ~EmptyScorer() { } protected: int32_t doc; public: virtual double score() { return 0.0; } virtual int32_t docID() { return doc; } virtual int32_t nextDoc() { doc = doc == -1 ? 3000 : NO_MORE_DOCS; return doc; } virtual int32_t advance(int32_t target) { doc = target <= 3000 ? 3000 : NO_MORE_DOCS; return doc; } }; } BOOST_AUTO_TEST_CASE(testEmptyBucketWithMoreDocs) { // This test checks the logic of nextDoc() when all sub scorers have docs beyond the first bucket // (for example). Currently, the code relies on the 'more' variable to work properly, and this // test ensures that if the logic changes, we have a test to back it up. SimilarityPtr sim = Similarity::getDefault(); Collection scorers = newCollection(newLucene(sim)); BooleanScorerPtr bs = newLucene(sim, 1, scorers, Collection()); BOOST_CHECK_EQUAL(3000, bs->nextDoc()); BOOST_CHECK_EQUAL(DocIdSetIterator::NO_MORE_DOCS, bs->nextDoc()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/CachingSpanFilterTest.cpp000066400000000000000000000103471217574114600252230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "MockRAMDirectory.h" #include "CachingSpanFilter.h" #include "CachingWrapperFilter.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "Field.h" #include "Document.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "MatchAllDocsQuery.h" #include "ConstantScoreQuery.h" #include "SpanFilter.h" #include "SpanTermQuery.h" #include "SpanQueryFilter.h" #include "TermQuery.h" #include "Term.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(CachingSpanFilterTest, LuceneTestFixture) static IndexReaderPtr refreshReader(IndexReaderPtr reader) { IndexReaderPtr oldReader = reader; reader = reader->reopen(); if (reader != oldReader) oldReader->close(); return reader; } BOOST_AUTO_TEST_CASE(testEnforceDeletions) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); IndexReaderPtr reader = writer->getReader(); IndexSearcherPtr searcher = newLucene(reader); // add a doc, refresh the reader, and check that its there DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", L"1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); reader = refreshReader(reader); searcher = newLucene(reader); TopDocsPtr docs = searcher->search(newLucene(), 1); BOOST_CHECK_EQUAL(1, docs->totalHits); SpanFilterPtr startFilter = newLucene(newLucene(newLucene(L"id", L"1"))); // ignore deletions CachingSpanFilterPtr filter = newLucene(startFilter, CachingWrapperFilter::DELETES_IGNORE); docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); ConstantScoreQueryPtr constantScore = newLucene(filter); docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); // now delete the doc, refresh the reader, and see that it's not there writer->deleteDocuments(newLucene(L"id", L"1")); reader = refreshReader(reader); searcher = newLucene(reader); docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(0, docs->totalHits); docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); // force cache to regenerate filter = newLucene(startFilter, CachingWrapperFilter::DELETES_RECACHE); writer->addDocument(doc); reader = refreshReader(reader); searcher = newLucene(reader); docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); constantScore = newLucene(filter); docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); // make sure we get a cache hit when we reopen readers that had no new deletions IndexReaderPtr newReader = refreshReader(reader); BOOST_CHECK_NE(reader, newReader); reader = newReader; searcher = newLucene(reader); int32_t missCount = filter->missCount; docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); BOOST_CHECK_EQUAL(missCount, filter->missCount); // now delete the doc, refresh the reader, and see that it's not there writer->deleteDocuments(newLucene(L"id", L"1")); reader = refreshReader(reader); searcher = newLucene(reader); docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(0, docs->totalHits); docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(0, docs->totalHits); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/CachingWrapperFilterTest.cpp000066400000000000000000000242561217574114600257460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "KeywordAnalyzer.h" #include "WhitespaceAnalyzer.h" #include "IndexReader.h" #include "MockFilter.h" #include "CachingWrapperFilter.h" #include "QueryWrapperFilter.h" #include "TermQuery.h" #include "Term.h" #include "NumericRangeFilter.h" #include "FieldCacheRangeFilter.h" #include "OpenBitSet.h" #include "DocIdSet.h" #include "OpenBitSetDISI.h" #include "IndexSearcher.h" #include "Field.h" #include "Document.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "MatchAllDocsQuery.h" #include "ConstantScoreQuery.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(CachingWrapperFilterTest, LuceneTestFixture) static void checkDocIdSetCacheable(IndexReaderPtr reader, FilterPtr filter, bool shouldCacheable) { CachingWrapperFilterPtr cacher = newLucene(filter); DocIdSetPtr originalSet = filter->getDocIdSet(reader); DocIdSetPtr cachedSet = cacher->getDocIdSet(reader); BOOST_CHECK(cachedSet->isCacheable()); BOOST_CHECK_EQUAL(shouldCacheable, originalSet->isCacheable()); if (originalSet->isCacheable()) BOOST_CHECK(MiscUtils::equalTypes(originalSet, cachedSet)); else BOOST_CHECK(MiscUtils::typeOf(cachedSet)); } static IndexReaderPtr refreshReader(IndexReaderPtr reader) { IndexReaderPtr oldReader = reader; reader = reader->reopen(); if (reader != oldReader) oldReader->close(); return reader; } BOOST_AUTO_TEST_CASE(testCachingWorks) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); MockFilterPtr filter = newLucene(); CachingWrapperFilterPtr cacher = newLucene(filter); // first time, nested filter is called cacher->getDocIdSet(reader); BOOST_CHECK(filter->wasCalled()); // make sure no exception if cache is holding the wrong docIdSet cacher->getDocIdSet(reader); // second time, nested filter should not be called filter->clear(); cacher->getDocIdSet(reader); BOOST_CHECK(!filter->wasCalled()); reader->close(); } namespace TestNullDocIdSet { class NullDocIdSetFilter : public Filter { public: virtual ~NullDocIdSetFilter() { } public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { return DocIdSetPtr(); } }; } BOOST_AUTO_TEST_CASE(testNullDocIdSet) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); FilterPtr filter = newLucene(); CachingWrapperFilterPtr cacher = newLucene(filter); // the caching filter should return the empty set constant BOOST_CHECK_EQUAL(DocIdSet::EMPTY_DOCIDSET(), cacher->getDocIdSet(reader)); reader->close(); } namespace TestNullDocIdSetIterator { class NullDocIdSetIterator : public DocIdSet { public: virtual ~NullDocIdSetIterator() { } public: virtual DocIdSetIteratorPtr iterator() { return DocIdSetIteratorPtr(); } }; class NullDocIdSetIteratorFilter : public Filter { public: virtual ~NullDocIdSetIteratorFilter() { } public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { return newLucene(); } }; } BOOST_AUTO_TEST_CASE(testNullDocIdSetIterator) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); FilterPtr filter = newLucene(); CachingWrapperFilterPtr cacher = newLucene(filter); // the caching filter should return the empty set constant BOOST_CHECK_EQUAL(DocIdSet::EMPTY_DOCIDSET(), cacher->getDocIdSet(reader)); reader->close(); } namespace TestIsCacheable { class OpenBitSetFilter : public Filter { public: virtual ~OpenBitSetFilter() { } public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { return newLucene(); } }; } BOOST_AUTO_TEST_CASE(testIsCacheable) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); // not cacheable checkDocIdSetCacheable(reader, newLucene(newLucene(newLucene(L"test", L"value"))), false); // returns default empty docidset, always cacheable checkDocIdSetCacheable(reader, NumericRangeFilter::newIntRange(L"test", 10000, -10000, true, true), true); // is cacheable checkDocIdSetCacheable(reader, FieldCacheRangeFilter::newIntRange(L"test", 10, 20, true, true), true); // a openbitset filter is always cacheable checkDocIdSetCacheable(reader, newLucene(), true); } BOOST_AUTO_TEST_CASE(testEnforceDeletions) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); IndexReaderPtr reader = writer->getReader(); IndexSearcherPtr searcher = newLucene(reader); // add a doc, refresh the reader, and check that its there DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", L"1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); reader = refreshReader(reader); searcher = newLucene(reader); TopDocsPtr docs = searcher->search(newLucene(), 1); BOOST_CHECK_EQUAL(1, docs->totalHits); FilterPtr startFilter = newLucene(newLucene(newLucene(L"id", L"1"))); // ignore deletions CachingWrapperFilterPtr filter = newLucene(startFilter, CachingWrapperFilter::DELETES_IGNORE); docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); ConstantScoreQueryPtr constantScore = newLucene(filter); docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); // now delete the doc, refresh the reader, and see that it's not there writer->deleteDocuments(newLucene(L"id", L"1")); reader = refreshReader(reader); searcher = newLucene(reader); docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(0, docs->totalHits); docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); // force cache to regenerate filter = newLucene(startFilter, CachingWrapperFilter::DELETES_RECACHE); writer->addDocument(doc); reader = refreshReader(reader); searcher = newLucene(reader); docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); constantScore = newLucene(filter); docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); // make sure we get a cache hit when we reopen reader that had no change to deletions IndexReaderPtr newReader = refreshReader(reader); BOOST_CHECK_NE(reader, newReader); reader = newReader; searcher = newLucene(reader); int32_t missCount = filter->missCount; docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); BOOST_CHECK_EQUAL(missCount, filter->missCount); // now delete the doc, refresh the reader, and see that it's not there writer->deleteDocuments(newLucene(L"id", L"1")); reader = refreshReader(reader); searcher = newLucene(reader); missCount = filter->missCount; docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(missCount + 1, filter->missCount); BOOST_CHECK_EQUAL(0, docs->totalHits); docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(0, docs->totalHits); // apply deletions dynamically filter = newLucene(startFilter, CachingWrapperFilter::DELETES_DYNAMIC); writer->addDocument(doc); reader = refreshReader(reader); searcher = newLucene(reader); docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); constantScore = newLucene(filter); docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(1, docs->totalHits); // now delete the doc, refresh the reader, and see that it's not there writer->deleteDocuments(newLucene(L"id", L"1")); reader = refreshReader(reader); searcher = newLucene(reader); docs = searcher->search(newLucene(), filter, 1); BOOST_CHECK_EQUAL(0, docs->totalHits); missCount = filter->missCount; docs = searcher->search(constantScore, 1); BOOST_CHECK_EQUAL(0, docs->totalHits); // doesn't count as a miss BOOST_CHECK_EQUAL(missCount, filter->missCount); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/CheckHits.cpp000066400000000000000000000234511217574114600227040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "CheckHits.h" #include "Searcher.h" #include "Explanation.h" #include "QueryUtils.h" #include "Collector.h" #include "Query.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "MultiSearcher.h" #include "IndexSearcher.h" #include "Scorer.h" #include "MiscUtils.h" namespace Lucene { /// Some explains methods calculate their values though a slightly different order of operations /// from the actual scoring method - this allows for a small amount of variation const double CheckHits::EXPLAIN_SCORE_TOLERANCE_DELTA = 0.00005; class SetCollector : public Collector { public: SetCollector(Set bag) { this->bag = bag; this->base = 0; } virtual ~SetCollector() { } public: Set bag; protected: int32_t base; public: virtual void setScorer(ScorerPtr scorer) { } virtual void collect(int32_t doc) { bag.add(doc + base); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { base = docBase; } virtual bool acceptsDocsOutOfOrder() { return true; } }; /// Asserts that the score explanation for every document matching a query corresponds with the true score. /// /// NOTE: this HitCollector should only be used with the Query and Searcher specified at when it is constructed. class ExplanationAsserter : public Collector { public: ExplanationAsserter(QueryPtr q, const String& defaultFieldName, SearcherPtr s, bool deep = false) { this->q=q; this->s=s; this->d = q->toString(defaultFieldName); this->deep=deep; this->base = 0; } virtual ~ExplanationAsserter() { } public: QueryPtr q; SearcherPtr s; String d; bool deep; ScorerPtr scorer; protected: int32_t base; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { doc = doc + base; ExplanationPtr exp = s->explain(q, doc); BOOST_CHECK(exp); CheckHits::verifyExplanation(d, doc, scorer->score(), deep, exp); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { base = docBase; } virtual bool acceptsDocsOutOfOrder() { return true; } }; CheckHits::~CheckHits() { } void CheckHits::checkNoMatchExplanations(QueryPtr q, const String& defaultFieldName, SearcherPtr searcher, Collection results) { String d = q->toString(defaultFieldName); Set ignore = Set::newInstance(); for (int32_t i = 0; i < results.size(); ++i) ignore.add(results[i]); int32_t maxDoc = searcher->maxDoc(); for (int32_t doc = 0; doc < maxDoc; ++doc) { if (ignore.contains(doc)) continue; ExplanationPtr exp = searcher->explain(q, doc); BOOST_CHECK(exp); BOOST_CHECK_EQUAL(0.0, exp->getValue()); } } void CheckHits::checkHitCollector(QueryPtr query, const String& defaultFieldName, SearcherPtr searcher, Collection results) { QueryUtils::check(query, searcher); Set correct = Set::newInstance(); for (int32_t i = 0; i < results.size(); ++i) correct.add(results[i]); Set actual = Set::newInstance(); CollectorPtr c = newLucene(actual); searcher->search(query, c); BOOST_CHECK(correct.equals(actual)); for (int32_t i = -1; i < 2; ++i) { actual.clear(); QueryUtils::wrapSearcher(searcher, i)->search(query, c); BOOST_CHECK(correct.equals(actual)); } if (!MiscUtils::typeOf(searcher)) return; for (int32_t i = -1; i < 2; ++i) { actual.clear(); QueryUtils::wrapUnderlyingReader(boost::dynamic_pointer_cast(searcher), i)->search(query, c); BOOST_CHECK(correct.equals(actual)); } } void CheckHits::checkHits(QueryPtr query, const String& defaultFieldName, SearcherPtr searcher, Collection results) { if (!MiscUtils::typeOf(searcher)) QueryUtils::check(query, searcher); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; Set correct = Set::newInstance(); for (int32_t i = 0; i < results.size(); ++i) correct.add(results[i]); Set actual = Set::newInstance(); for (int32_t i = 0; i < hits.size(); ++i) actual.add(hits[i]->doc); BOOST_CHECK(correct.equals(actual)); QueryUtils::check(query, searcher); } void CheckHits::checkDocIds(Collection results, Collection hits) { BOOST_CHECK_EQUAL(hits.size(), results.size()); for (int32_t i = 0; i < results.size(); ++i) BOOST_CHECK_EQUAL(results[i], hits[i]->doc); } void CheckHits::checkHitsQuery(QueryPtr query, Collection hits1, Collection hits2, Collection results) { checkDocIds(results, hits1); checkDocIds(results, hits2); checkEqual(query, hits1, hits2); } void CheckHits::checkEqual(QueryPtr query, Collection hits1, Collection hits2) { double scoreTolerance = 1.0e-6; BOOST_CHECK_EQUAL(hits1.size(), hits2.size()); for (int32_t i = 0; i < hits1.size(); ++i) { BOOST_CHECK_EQUAL(hits1[i]->doc, hits2[i]->doc); BOOST_CHECK_CLOSE_FRACTION(hits1[i]->score, hits2[i]->score, scoreTolerance); } } void CheckHits::checkExplanations(QueryPtr query, const String& defaultFieldName, SearcherPtr searcher, bool deep) { searcher->search(query, newLucene(query, defaultFieldName, searcher, deep)); } void CheckHits::verifyExplanation(const String& q, int32_t doc, double score, bool deep, ExplanationPtr expl) { double value = expl->getValue(); BOOST_CHECK_CLOSE_FRACTION(score, value, EXPLAIN_SCORE_TOLERANCE_DELTA); if (!deep) return; Collection detail = expl->getDetails(); if (detail) { if (detail.size() == 1) { // simple containment, no matter what the description says, just verify contained expl has same score verifyExplanation(q, doc, score, deep, detail[0]); } else { // explanation must either: // - end with one of: "product of:", "sum of:", "max of:", or // - have "max plus times others" (where is float). double x = 0; String descr = StringUtils::toLower(expl->getDescription()); bool productOf = boost::ends_with(descr, L"product of:"); bool sumOf = boost::ends_with(descr, L"sum of:"); bool maxOf = boost::ends_with(descr, L"max of:"); bool maxTimesOthers = false; if (!(productOf || sumOf || maxOf)) { // maybe 'max plus x times others' String::size_type k1 = descr.find(L"max plus "); if (k1 != String::npos) { k1 += String(L"max plus ").length(); String::size_type k2 = descr.find(L" ", k1); x = StringUtils::toDouble(descr.substr(k1)); String max(descr.substr(k2)); boost::trim(max); if (max == L"times others of:") maxTimesOthers = true; } } BOOST_CHECK(productOf || sumOf || maxOf || maxTimesOthers); double sum = 0.0; double product = 1.0; double max = 0.0; for (int32_t i = 0; i < detail.size(); ++i) { double dval = detail[i]->getValue(); verifyExplanation(q, doc, dval, deep, detail[i]); product *= dval; sum += dval; max = std::max(max, dval); } double combined = 0.0; if (productOf) combined = product; else if (sumOf) combined = sum; else if (maxOf) combined = max; else if (maxTimesOthers) combined = max + x * (sum - max); else BOOST_FAIL("should never get here!"); BOOST_CHECK_CLOSE_FRACTION(combined, value, EXPLAIN_SCORE_TOLERANCE_DELTA); } } } } LucenePlusPlus-rel_3.0.4/src/test/search/ComplexExplanationsOfNonMatchesTest.cpp000066400000000000000000000205621217574114600301410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "ExplanationsFixture.h" #include "DefaultSimilarity.h" #include "BooleanQuery.h" #include "QueryParser.h" #include "FilteredQuery.h" #include "FieldCacheTermsFilter.h" #include "DisjunctionMaxQuery.h" #include "IndexSearcher.h" #include "SpanFirstQuery.h" #include "SpanOrQuery.h" #include "SpanNearQuery.h" #include "SpanNotQuery.h" #include "SpanTermQuery.h" #include "ConstantScoreQuery.h" #include "MatchAllDocsQuery.h" #include "MultiPhraseQuery.h" #include "CheckHits.h" using namespace Lucene; class Qnorm1Similarity : public DefaultSimilarity { public: virtual ~Qnorm1Similarity() { } public: virtual double queryNorm(double sumOfSquaredWeights) { return 1.0; } }; class ItemizedFilter : public FieldCacheTermsFilter { public: ItemizedFilter(const String& field, Collection terms) : FieldCacheTermsFilter(field, int2str(terms)) { } ItemizedFilter(Collection terms) : FieldCacheTermsFilter(L"KEY", int2str(terms)) { } virtual ~ItemizedFilter() { } public: Collection int2str(Collection terms) { Collection out = Collection::newInstance(terms.size()); for (int32_t i = 0; i < terms.size(); ++i) out[i] = StringUtils::toString(terms[i]); return out; } }; /// TestExplanations subclass that builds up super crazy complex queries on the assumption that /// if the explanations work out right for them, they should work for anything. class ComplexExplanationsOfNonMatchesFixture : public ExplanationsFixture { public: ComplexExplanationsOfNonMatchesFixture() { searcher->setSimilarity(createQnorm1Similarity()); } virtual ~ComplexExplanationsOfNonMatchesFixture() { } protected: DefaultSimilarityPtr createQnorm1Similarity() { return newLucene(); } public: using ExplanationsFixture::qtest; /// ignore matches and focus on non-matches virtual void qtest(QueryPtr q, Collection expDocNrs) { CheckHits::checkNoMatchExplanations(q, FIELD, searcher, expDocNrs); } }; BOOST_FIXTURE_TEST_SUITE(ComplexExplanationsOfNonMatchesTest, ComplexExplanationsOfNonMatchesFixture) BOOST_AUTO_TEST_CASE(test1) { BooleanQueryPtr q = newLucene(); q->add(qp->parse(L"\"w1 w2\"~1"), BooleanClause::MUST); q->add(snear(st(L"w2"), sor(L"w5", L"zz"), 4, true), BooleanClause::SHOULD); q->add(snear(sf(L"w3", 2), st(L"w2"), st(L"w3"), 5, true), BooleanClause::SHOULD); QueryPtr t = newLucene(qp->parse(L"xx"), newLucene(newCollection(1, 3))); t->setBoost(1000); q->add(t, BooleanClause::SHOULD); t = newLucene(newLucene(newCollection(0, 2))); t->setBoost(30); q->add(t, BooleanClause::SHOULD); DisjunctionMaxQueryPtr dm = newLucene(0.2); dm->add(snear(st(L"w2"), sor(L"w5", L"zz"), 4, true)); dm->add(qp->parse(L"QQ")); dm->add(qp->parse(L"xx yy -zz")); dm->add(qp->parse(L"-xx -w1")); DisjunctionMaxQueryPtr dm2 = newLucene(0.5); dm2->add(qp->parse(L"w1")); dm2->add(qp->parse(L"w2")); dm2->add(qp->parse(L"w3")); dm->add(dm2); q->add(dm, BooleanClause::SHOULD); BooleanQueryPtr b = newLucene(); b->setMinimumNumberShouldMatch(2); b->add(snear(L"w1", L"w2", 1, true), BooleanClause::SHOULD); b->add(snear(L"w2", L"w3", 1, true), BooleanClause::SHOULD); b->add(snear(L"w1", L"w3", 3, true), BooleanClause::SHOULD); q->add(b, BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(test2) { BooleanQueryPtr q = newLucene(); q->add(qp->parse(L"\"w1 w2\"~1"), BooleanClause::MUST); q->add(snear(st(L"w2"), sor(L"w5", L"zz"), 4, true), BooleanClause::SHOULD); q->add(snear(sf(L"w3", 2), st(L"w2"), st(L"w3"), 5, true), BooleanClause::SHOULD); QueryPtr t = newLucene(qp->parse(L"xx"), newLucene(newCollection(1, 3))); t->setBoost(1000); q->add(t, BooleanClause::SHOULD); t = newLucene(newLucene(newCollection(0, 2))); t->setBoost(-20); q->add(t, BooleanClause::SHOULD); DisjunctionMaxQueryPtr dm = newLucene(0.2); dm->add(snear(st(L"w2"), sor(L"w5", L"zz"), 4, true)); dm->add(qp->parse(L"QQ")); dm->add(qp->parse(L"xx yy -zz")); dm->add(qp->parse(L"-xx -w1")); DisjunctionMaxQueryPtr dm2 = newLucene(0.5); dm2->add(qp->parse(L"w1")); dm2->add(qp->parse(L"w2")); dm2->add(qp->parse(L"w3")); dm->add(dm2); q->add(dm, BooleanClause::SHOULD); BooleanQueryPtr b = newLucene(); b->setMinimumNumberShouldMatch(2); b->add(snear(L"w1", L"w2", 1, true), BooleanClause::SHOULD); b->add(snear(L"w2", L"w3", 1, true), BooleanClause::SHOULD); b->add(snear(L"w1", L"w3", 3, true), BooleanClause::SHOULD); b->setBoost(0.0); q->add(b, BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(testT3) { bqtest(L"w1^0.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMA3) { QueryPtr q = newLucene(); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testFQ5) { bqtest(newLucene(qp->parse(L"xx^0"), newLucene(newCollection(1, 3))), newCollection(3)); } BOOST_AUTO_TEST_CASE(testCSQ4) { QueryPtr q = newLucene(newLucene(newCollection(3))); q->setBoost(0); bqtest(q, newCollection(3)); } BOOST_AUTO_TEST_CASE(testDMQ10) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"yy w5^100")); q->add(qp->parse(L"xx^0")); q->setBoost(0.0); bqtest(q, newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testMPQ7) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2"))); q->setSlop(1); q->setBoost(0.0); bqtest(q, newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(testBQ12) { qtest(L"w1 w2^0.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ13) { qtest(L"w1 -w5^0.0", newCollection(1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ18) { qtest(L"+w1^0.0 w2", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ21) { bqtest(L"(+w1 w2)^0.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ22) { bqtest(L"(+w1^0.0 w2)^0.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testST3) { SpanQueryPtr q = st(L"w1"); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testST6) { SpanQueryPtr q = st(L"xx"); q->setBoost(0); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testSF3) { SpanQueryPtr q = sf(L"w1", 1); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSF7) { SpanQueryPtr q = sf(L"xx", 3); q->setBoost(0); bqtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testSNot3) { SpanQueryPtr q = snot(sf(L"w1", 10), st(L"QQ")); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSNot6) { SpanQueryPtr q = snot(sf(L"w1", 10), st(L"xx")); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSNot8) { SpanQueryPtr f = snear(L"w1", L"w3", 10, true); f->setBoost(0); SpanQueryPtr q = snot(f, st(L"xx")); qtest(q, newCollection(0, 1, 3)); } BOOST_AUTO_TEST_CASE(testSNot9) { SpanQueryPtr t = st(L"xx"); t->setBoost(0); SpanQueryPtr q = snot(snear(L"w1", L"w3", 10, true), t); qtest(q, newCollection(0, 1, 3)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/ComplexExplanationsTest.cpp000066400000000000000000000200411217574114600256640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "ExplanationsFixture.h" #include "DefaultSimilarity.h" #include "BooleanQuery.h" #include "QueryParser.h" #include "FilteredQuery.h" #include "FieldCacheTermsFilter.h" #include "DisjunctionMaxQuery.h" #include "IndexSearcher.h" #include "SpanFirstQuery.h" #include "SpanOrQuery.h" #include "SpanNearQuery.h" #include "SpanNotQuery.h" #include "SpanTermQuery.h" #include "ConstantScoreQuery.h" #include "MatchAllDocsQuery.h" #include "MultiPhraseQuery.h" using namespace Lucene; class Qnorm1Similarity : public DefaultSimilarity { public: virtual ~Qnorm1Similarity() { } public: virtual double queryNorm(double sumOfSquaredWeights) { return 1.0; } }; class ItemizedFilter : public FieldCacheTermsFilter { public: ItemizedFilter(const String& field, Collection terms) : FieldCacheTermsFilter(field, int2str(terms)) { } ItemizedFilter(Collection terms) : FieldCacheTermsFilter(L"KEY", int2str(terms)) { } virtual ~ItemizedFilter() { } public: Collection int2str(Collection terms) { Collection out = Collection::newInstance(terms.size()); for (int32_t i = 0; i < terms.size(); ++i) out[i] = StringUtils::toString(terms[i]); return out; } }; /// TestExplanations subclass that builds up super crazy complex queries on the assumption that /// if the explanations work out right for them, they should work for anything. class ComplexExplanationsFixture : public ExplanationsFixture { public: ComplexExplanationsFixture() { searcher->setSimilarity(createQnorm1Similarity()); } virtual ~ComplexExplanationsFixture() { } protected: DefaultSimilarityPtr createQnorm1Similarity() { return newLucene(); } }; BOOST_FIXTURE_TEST_SUITE(ComplexExplanationsTest, ComplexExplanationsFixture) BOOST_AUTO_TEST_CASE(test1) { BooleanQueryPtr q = newLucene(); q->add(qp->parse(L"\"w1 w2\"~1"), BooleanClause::MUST); q->add(snear(st(L"w2"), sor(L"w5", L"zz"), 4, true), BooleanClause::SHOULD); q->add(snear(sf(L"w3", 2), st(L"w2"), st(L"w3"), 5, true), BooleanClause::SHOULD); QueryPtr t = newLucene(qp->parse(L"xx"), newLucene(newCollection(1, 3))); t->setBoost(1000); q->add(t, BooleanClause::SHOULD); t = newLucene(newLucene(newCollection(0, 2))); t->setBoost(30); q->add(t, BooleanClause::SHOULD); DisjunctionMaxQueryPtr dm = newLucene(0.2); dm->add(snear(st(L"w2"), sor(L"w5", L"zz"), 4, true)); dm->add(qp->parse(L"QQ")); dm->add(qp->parse(L"xx yy -zz")); dm->add(qp->parse(L"-xx -w1")); DisjunctionMaxQueryPtr dm2 = newLucene(0.5); dm2->add(qp->parse(L"w1")); dm2->add(qp->parse(L"w2")); dm2->add(qp->parse(L"w3")); dm->add(dm2); q->add(dm, BooleanClause::SHOULD); BooleanQueryPtr b = newLucene(); b->setMinimumNumberShouldMatch(2); b->add(snear(L"w1", L"w2", 1, true), BooleanClause::SHOULD); b->add(snear(L"w2", L"w3", 1, true), BooleanClause::SHOULD); b->add(snear(L"w1", L"w3", 3, true), BooleanClause::SHOULD); q->add(b, BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(test2) { BooleanQueryPtr q = newLucene(); q->add(qp->parse(L"\"w1 w2\"~1"), BooleanClause::MUST); q->add(snear(st(L"w2"), sor(L"w5", L"zz"), 4, true), BooleanClause::SHOULD); q->add(snear(sf(L"w3", 2), st(L"w2"), st(L"w3"), 5, true), BooleanClause::SHOULD); QueryPtr t = newLucene(qp->parse(L"xx"), newLucene(newCollection(1, 3))); t->setBoost(1000); q->add(t, BooleanClause::SHOULD); t = newLucene(newLucene(newCollection(0, 2))); t->setBoost(-20); q->add(t, BooleanClause::SHOULD); DisjunctionMaxQueryPtr dm = newLucene(0.2); dm->add(snear(st(L"w2"), sor(L"w5", L"zz"), 4, true)); dm->add(qp->parse(L"QQ")); dm->add(qp->parse(L"xx yy -zz")); dm->add(qp->parse(L"-xx -w1")); DisjunctionMaxQueryPtr dm2 = newLucene(0.5); dm2->add(qp->parse(L"w1")); dm2->add(qp->parse(L"w2")); dm2->add(qp->parse(L"w3")); dm->add(dm2); q->add(dm, BooleanClause::SHOULD); BooleanQueryPtr b = newLucene(); b->setMinimumNumberShouldMatch(2); b->add(snear(L"w1", L"w2", 1, true), BooleanClause::SHOULD); b->add(snear(L"w2", L"w3", 1, true), BooleanClause::SHOULD); b->add(snear(L"w1", L"w3", 3, true), BooleanClause::SHOULD); b->setBoost(0.0); q->add(b, BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(testT3) { bqtest(L"w1^0.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMA3) { QueryPtr q = newLucene(); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testFQ5) { bqtest(newLucene(qp->parse(L"xx^0"), newLucene(newCollection(1, 3))), newCollection(3)); } BOOST_AUTO_TEST_CASE(testCSQ4) { QueryPtr q = newLucene(newLucene(newCollection(3))); q->setBoost(0); bqtest(q, newCollection(3)); } BOOST_AUTO_TEST_CASE(testDMQ10) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"yy w5^100")); q->add(qp->parse(L"xx^0")); q->setBoost(0.0); bqtest(q, newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testMPQ7) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2"))); q->setSlop(1); q->setBoost(0.0); bqtest(q, newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(testBQ12) { qtest(L"w1 w2^0.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ13) { qtest(L"w1 -w5^0.0", newCollection(1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ18) { qtest(L"+w1^0.0 w2", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ21) { bqtest(L"(+w1 w2)^0.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ22) { bqtest(L"(+w1^0.0 w2)^0.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testST3) { SpanQueryPtr q = st(L"w1"); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testST6) { SpanQueryPtr q = st(L"xx"); q->setBoost(0); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testSF3) { SpanQueryPtr q = sf(L"w1", 1); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSF7) { SpanQueryPtr q = sf(L"xx", 3); q->setBoost(0); bqtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testSNot3) { SpanQueryPtr q = snot(sf(L"w1", 10), st(L"QQ")); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSNot6) { SpanQueryPtr q = snot(sf(L"w1", 10), st(L"xx")); q->setBoost(0); bqtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSNot8) { SpanQueryPtr f = snear(L"w1", L"w3", 10, true); f->setBoost(0); SpanQueryPtr q = snot(f, st(L"xx")); qtest(q, newCollection(0, 1, 3)); } BOOST_AUTO_TEST_CASE(testSNot9) { SpanQueryPtr t = st(L"xx"); t->setBoost(0); SpanQueryPtr q = snot(snear(L"w1", L"w3", 10, true), t); qtest(q, newCollection(0, 1, 3)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/CustomSearcherSortTest.cpp000066400000000000000000000160401217574114600254720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TermQuery.h" #include "Term.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "Sort.h" #include "SortField.h" #include "IndexSearcher.h" #include "IndexReader.h" #include "BooleanQuery.h" #include "DateTools.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "TopFieldDocs.h" #include "MultiSearcher.h" #include "Random.h" #include "MiscUtils.h" using namespace Lucene; class CustomSearcher : public IndexSearcher { public: CustomSearcher(DirectoryPtr directory, int32_t switcher) : IndexSearcher(directory, true) { this->switcher = switcher; } CustomSearcher(IndexReaderPtr r, int32_t switcher) : IndexSearcher(r) { this->switcher = switcher; } virtual ~CustomSearcher() { } protected: int32_t switcher; public: virtual TopFieldDocsPtr search(QueryPtr query, FilterPtr filter, int32_t n, SortPtr sort) { BooleanQueryPtr bq = newLucene(); bq->add(query, BooleanClause::MUST); bq->add(newLucene(newLucene(L"mandant", StringUtils::toString(switcher))), BooleanClause::MUST); return IndexSearcher::search(bq, filter, n, sort); } virtual TopDocsPtr search(QueryPtr query, FilterPtr filter, int32_t n) { BooleanQueryPtr bq = newLucene(); bq->add(query, BooleanClause::MUST); bq->add(newLucene(newLucene(L"mandant", StringUtils::toString(switcher))), BooleanClause::MUST); return IndexSearcher::search(bq, filter, n); } }; class CustomSearcherSortFixture : public LuceneTestFixture { public: CustomSearcherSortFixture() { random = newLucene(); index = getIndex(); query = newLucene(newLucene(L"content", L"test")); } virtual ~CustomSearcherSortFixture() { } protected: DirectoryPtr index; QueryPtr query; RandomPtr random; static const int32_t INDEX_SIZE; public: DirectoryPtr getIndex() { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < INDEX_SIZE; ++i) { DocumentPtr doc = newLucene(); if ((i % 5) != 0) doc->add(newLucene(L"publicationDate_", getLuceneDate(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); if ((i % 7) == 0) doc->add(newLucene(L"content", L"test", Field::STORE_YES, Field::INDEX_ANALYZED)); // every document has a defined 'mandant' field doc->add(newLucene(L"mandant", StringUtils::toString(i % 3), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->optimize(); writer->close(); return indexStore; } String getLuceneDate() { DateTools::setDateOrder(DateTools::DATEORDER_DMY); boost::posix_time::ptime base = DateTools::parseDate(L"01/01/1980"); return DateTools::timeToString(MiscUtils::getTimeMillis(base) + random->nextInt() - INT_MIN, DateTools::RESOLUTION_DAY); } /// make sure the documents returned by the search match the expected list void matchHits(SearcherPtr searcher, SortPtr sort) { // make a query without sorting first Collection hitsByRank = searcher->search(query, FilterPtr(), 1000)->scoreDocs; checkHits(hitsByRank); // check for duplicates Map resultMap = Map::newInstance(); // store hits in Map - Map does not allow duplicates; existing entries are silently overwritten for (int32_t hitid = 0; hitid < hitsByRank.size(); ++hitid) resultMap.put(hitsByRank[hitid]->doc, hitid); // now make a query using the sort criteria Collection resultSort = searcher->search (query, FilterPtr(), 1000, sort)->scoreDocs; checkHits(resultSort); // check for duplicates // besides the sorting both sets of hits must be identical for (int32_t hitid = 0; hitid < resultSort.size(); ++hitid) { int32_t idHitDate = resultSort[hitid]->doc; // document ID from sorted search BOOST_CHECK(resultMap.contains(idHitDate)); // same ID must be in the Map from the rank-sorted search // every hit must appear once in both result sets --> remove it from the Map. // At the end the Map must be empty! resultMap.remove(idHitDate); } BOOST_CHECK(resultMap.empty()); } void checkHits(Collection hits) { if (hits) { Map idMap = Map::newInstance(); for (int32_t docnum = 0; docnum < hits.size(); ++docnum) { int32_t luceneId = hits[docnum]->doc; BOOST_CHECK(!idMap.contains(luceneId)); idMap.put(luceneId, docnum); } } } }; const int32_t CustomSearcherSortFixture::INDEX_SIZE = 2000; BOOST_FIXTURE_TEST_SUITE(CustomSearcherSortTest, CustomSearcherSortFixture) /// Run the test using two CustomSearcher instances. BOOST_AUTO_TEST_CASE(testFieldSortCustomSearcher) { SortPtr custSort = newLucene(newCollection(newLucene(L"publicationDate_", SortField::STRING), SortField::FIELD_SCORE())); SearcherPtr searcher = newLucene(index, 2); // search and check hits matchHits(searcher, custSort); } /// Run the test using one CustomSearcher wrapped by a MultiSearcher. BOOST_AUTO_TEST_CASE(testFieldSortSingleSearcher) { SortPtr custSort = newLucene(newCollection(newLucene(L"publicationDate_", SortField::STRING), SortField::FIELD_SCORE())); SearcherPtr searcher = newLucene(newCollection(newLucene(index, 2))); // search and check hits matchHits(searcher, custSort); } /// Run the test using two CustomSearcher instances. BOOST_AUTO_TEST_CASE(testFieldSortMultiCustomSearcher) { SortPtr custSort = newLucene(newCollection(newLucene(L"publicationDate_", SortField::STRING), SortField::FIELD_SCORE())); SearcherPtr searcher = newLucene(newCollection(newLucene(index, 0), newLucene(index, 2))); // search and check hits matchHits(searcher, custSort); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/DateFilterTest.cpp000066400000000000000000000133111217574114600237140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "TermRangeFilter.h" #include "TermQuery.h" #include "Term.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "DateTools.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(DateFilterTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testBefore) { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); int64_t now = MiscUtils::currentTimeMillis(); DocumentPtr doc = newLucene(); // add time that is in the past doc->add(newLucene(L"datefield", DateTools::timeToString(now - 1000, DateTools::RESOLUTION_MILLISECOND), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"body", L"Today is a very sunny day in New York City", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(indexStore, true); // filter that should preserve matches TermRangeFilterPtr df1 = newLucene(L"datefield", DateTools::timeToString(now - 2000, DateTools::RESOLUTION_MILLISECOND), DateTools::timeToString(now, DateTools::RESOLUTION_MILLISECOND), false, true); // filter that should discard matches TermRangeFilterPtr df2 = newLucene(L"datefield", DateTools::timeToString(0, DateTools::RESOLUTION_MILLISECOND), DateTools::timeToString(now - 2000, DateTools::RESOLUTION_MILLISECOND), true, false); // search something that doesn't exist with DateFilter QueryPtr query1 = newLucene(newLucene(L"body", L"NoMatchForThis")); // search for something that does exists QueryPtr query2 = newLucene(newLucene(L"body", L"sunny")); // ensure that queries return expected results without DateFilter first Collection result = searcher->search(query1, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = searcher->search(query2, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); // run queries with DateFilter result = searcher->search(query1, df1, 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = searcher->search(query1, df2, 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = searcher->search(query2, df1, 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = searcher->search(query2, df2, 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); } BOOST_AUTO_TEST_CASE(testAfter) { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); int64_t now = MiscUtils::currentTimeMillis(); DocumentPtr doc = newLucene(); // add time that is in the future doc->add(newLucene(L"datefield", DateTools::timeToString(now + 888888, DateTools::RESOLUTION_MILLISECOND), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"body", L"Today is a very sunny day in New York City", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(indexStore, true); // filter that should preserve matches TermRangeFilterPtr df1 = newLucene(L"datefield", DateTools::timeToString(now, DateTools::RESOLUTION_MILLISECOND), DateTools::timeToString(now + 999999, DateTools::RESOLUTION_MILLISECOND), true, false); // filter that should discard matches TermRangeFilterPtr df2 = newLucene(L"datefield", DateTools::timeToString(now + 999999, DateTools::RESOLUTION_MILLISECOND), DateTools::timeToString(now + 999999999, DateTools::RESOLUTION_MILLISECOND), false, true); // search something that doesn't exist with DateFilter QueryPtr query1 = newLucene(newLucene(L"body", L"NoMatchForThis")); // search for something that does exists QueryPtr query2 = newLucene(newLucene(L"body", L"sunny")); // ensure that queries return expected results without DateFilter first Collection result = searcher->search(query1, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = searcher->search(query2, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); // run queries with DateFilter result = searcher->search(query1, df1, 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = searcher->search(query1, df2, 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = searcher->search(query2, df1, 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = searcher->search(query2, df2, 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/DateSortTest.cpp000066400000000000000000000102331217574114600234160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "DateTools.h" #include "IndexSearcher.h" #include "Sort.h" #include "SortField.h" #include "QueryParser.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "TopFieldDocs.h" using namespace Lucene; class DateSortFixture : public LuceneTestFixture { public: DateSortFixture() { // Create an index writer. directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // oldest doc: // Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 writer->addDocument(createDocument(L"Document 1", 1192001122000LL)); // Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 writer->addDocument(createDocument(L"Document 2", 1192001126000LL)); // Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 writer->addDocument(createDocument(L"Document 3", 1192101133000LL)); // Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 writer->addDocument(createDocument(L"Document 4", 1192104129000LL)); // latest doc: // Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 writer->addDocument(createDocument(L"Document 5", 1192209943000LL)); writer->optimize(); writer->close(); } virtual ~DateSortFixture() { } protected: static const String TEXT_FIELD; static const String DATE_TIME_FIELD; DirectoryPtr directory; public: DocumentPtr createDocument(const String& text, int64_t time) { DocumentPtr document = newLucene(); // Add the text field. FieldPtr textField = newLucene(TEXT_FIELD, text, Field::STORE_YES, Field::INDEX_ANALYZED); document->add(textField); // Add the date/time field. String dateTimeString = DateTools::timeToString(time, DateTools::RESOLUTION_SECOND); FieldPtr dateTimeField = newLucene(DATE_TIME_FIELD, dateTimeString, Field::STORE_YES, Field::INDEX_NOT_ANALYZED); document->add(dateTimeField); return document; } }; const String DateSortFixture::TEXT_FIELD = L"text"; const String DateSortFixture::DATE_TIME_FIELD = L"dateTime"; BOOST_FIXTURE_TEST_SUITE(DateSortTest, DateSortFixture) BOOST_AUTO_TEST_CASE(testReverseDateSort) { IndexSearcherPtr searcher = newLucene(directory, true); SortPtr sort = newLucene(newLucene(DATE_TIME_FIELD, SortField::STRING, true)); QueryParserPtr queryParser = newLucene(LuceneVersion::LUCENE_CURRENT, TEXT_FIELD, newLucene()); QueryPtr query = queryParser->parse(L"Document"); // Execute the search and process the search results. Collection actualOrder = Collection::newInstance(5); Collectionhits = searcher->search(query, FilterPtr(), 1000, sort)->scoreDocs; for (int32_t i = 0; i < hits.size(); ++i) { DocumentPtr document = searcher->doc(hits[i]->doc); String text = document->get(TEXT_FIELD); actualOrder[i] = text; } searcher->close(); // Set up the expected order (ie. Document 5, 4, 3, 2, 1). Collection expectedOrder = Collection::newInstance(5); expectedOrder[0] = L"Document 5"; expectedOrder[1] = L"Document 4"; expectedOrder[2] = L"Document 3"; expectedOrder[3] = L"Document 2"; expectedOrder[4] = L"Document 1"; BOOST_CHECK(expectedOrder.equals(actualOrder)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/DisjunctionMaxQueryTest.cpp000066400000000000000000000274431217574114600256710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "DefaultSimilarity.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "DisjunctionMaxQuery.h" #include "QueryUtils.h" #include "Weight.h" #include "Scorer.h" #include "DocIdSetIterator.h" #include "TermQuery.h" #include "Term.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "BooleanQuery.h" using namespace Lucene; /// Similarity to eliminate tf, idf and lengthNorm effects to isolate test case. class DisjunctionMaxSimilarity : public DefaultSimilarity { public: virtual ~DisjunctionMaxSimilarity() { } public: virtual double tf(double freq) { if (freq > 0.0) return 1.0; else return 0.0; } virtual double lengthNorm(const String& fieldName, int32_t numTokens) { return 1.0; } virtual double idf(int32_t docFreq, int32_t numDocs) { return 1.0; } }; class DisjunctionMaxQueryFixture : public LuceneTestFixture { public: DisjunctionMaxQueryFixture() { sim = newLucene(); index = newLucene(); IndexWriterPtr writer = newLucene(index, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setSimilarity(sim); // hed is the most important field, dek is secondary // d1 is an "ok" match for: albino elephant { DocumentPtr d1 = newLucene(); d1->add(newLucene(L"id", L"d1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d1->add(newLucene(L"hed", L"elephant", Field::STORE_YES, Field::INDEX_ANALYZED)); d1->add(newLucene(L"dek", L"elephant", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d1); } // d2 is a "good" match for: albino elephant { DocumentPtr d2 = newLucene(); d2->add(newLucene(L"id", L"d2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d2->add(newLucene(L"hed", L"elephant", Field::STORE_YES, Field::INDEX_ANALYZED)); d2->add(newLucene(L"dek", L"albino", Field::STORE_YES, Field::INDEX_ANALYZED)); d2->add(newLucene(L"dek", L"elephant", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d2); } // d3 is a "better" match for: albino elephant { DocumentPtr d3 = newLucene(); d3->add(newLucene(L"id", L"d3", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d3->add(newLucene(L"hed", L"albino", Field::STORE_YES, Field::INDEX_ANALYZED)); d3->add(newLucene(L"hed", L"elephant", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d3); } // d4 is the "best" match for: albino elephant { DocumentPtr d4 = newLucene(); d4->add(newLucene(L"id", L"d4", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d4->add(newLucene(L"hed", L"albino", Field::STORE_YES, Field::INDEX_ANALYZED)); d4->add(newLucene(L"hed", L"elephant", Field::STORE_YES, Field::INDEX_ANALYZED)); d4->add(newLucene(L"dek", L"albino", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d4); } writer->close(); r = IndexReader::open(index, true); s = newLucene(r); s->setSimilarity(sim); } virtual ~DisjunctionMaxQueryFixture() { } public: SimilarityPtr sim; DirectoryPtr index; IndexReaderPtr r; IndexSearcherPtr s; static const double SCORE_COMP_THRESH; protected: QueryPtr tq(const String& f, const String& t) { return newLucene(newLucene(f, t)); } QueryPtr tq(const String& f, const String& t, double b) { QueryPtr q = tq(f, t); q->setBoost(b); return q; } }; const double DisjunctionMaxQueryFixture::SCORE_COMP_THRESH = 0.00001; BOOST_FIXTURE_TEST_SUITE(DisjunctionMaxQueryTest, DisjunctionMaxQueryFixture) BOOST_AUTO_TEST_CASE(testSkipToFirsttimeMiss) { DisjunctionMaxQueryPtr dq = newLucene(0.0); dq->add(tq(L"id", L"d1")); dq->add(tq(L"dek", L"DOES_NOT_EXIST")); QueryUtils::check(dq, s); WeightPtr dw = dq->weight(s); ScorerPtr ds = dw->scorer(r, true, false); BOOST_CHECK_EQUAL(ds->advance(3), DocIdSetIterator::NO_MORE_DOCS); } BOOST_AUTO_TEST_CASE(testSkipToFirsttimeHit) { DisjunctionMaxQueryPtr dq = newLucene(0.0); dq->add(tq(L"dek", L"albino")); dq->add(tq(L"dek", L"DOES_NOT_EXIST")); QueryUtils::check(dq, s); WeightPtr dw = dq->weight(s); ScorerPtr ds = dw->scorer(r, true, false); BOOST_CHECK_NE(ds->advance(3), DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK_EQUAL(L"d4", r->document(ds->docID())->get(L"id")); } BOOST_AUTO_TEST_CASE(testSimpleEqualScores1) { DisjunctionMaxQueryPtr q = newLucene(0.0); q->add(tq(L"hed", L"albino")); q->add(tq(L"hed", L"elephant")); QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(4, h.size()); double score = h[0]->score; for (int32_t i = 1; i < h.size(); ++i) BOOST_CHECK_CLOSE_FRACTION(score, h[i]->score, SCORE_COMP_THRESH); } BOOST_AUTO_TEST_CASE(testSimpleEqualScores2) { DisjunctionMaxQueryPtr q = newLucene(0.0); q->add(tq(L"dek", L"albino")); q->add(tq(L"dek", L"elephant")); QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, h.size()); double score = h[0]->score; for (int32_t i = 1; i < h.size(); ++i) BOOST_CHECK_CLOSE_FRACTION(score, h[i]->score, SCORE_COMP_THRESH); } BOOST_AUTO_TEST_CASE(testSimpleEqualScores3) { DisjunctionMaxQueryPtr q = newLucene(0.0); q->add(tq(L"hed", L"albino")); q->add(tq(L"hed", L"elephant")); q->add(tq(L"dek", L"albino")); q->add(tq(L"dek", L"elephant")); QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(4, h.size()); double score = h[0]->score; for (int32_t i = 1; i < h.size(); ++i) BOOST_CHECK_CLOSE_FRACTION(score, h[i]->score, SCORE_COMP_THRESH); } BOOST_AUTO_TEST_CASE(testSimpleTiebreaker) { DisjunctionMaxQueryPtr q = newLucene(0.01); q->add(tq(L"dek", L"albino")); q->add(tq(L"dek", L"elephant")); QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, h.size()); BOOST_CHECK_EQUAL(L"d2", s->doc(h[0]->doc)->get(L"id")); double score0 = h[0]->score; double score1 = h[1]->score; double score2 = h[2]->score; BOOST_CHECK(score0 > score1); BOOST_CHECK_CLOSE_FRACTION(score1, score2, SCORE_COMP_THRESH); } BOOST_AUTO_TEST_CASE(testBooleanRequiredEqualScores) { BooleanQueryPtr q = newLucene(); { DisjunctionMaxQueryPtr q1 = newLucene(0.0); q1->add(tq(L"hed", L"albino")); q1->add(tq(L"dek", L"albino")); q->add(q1, BooleanClause::MUST); QueryUtils::check(q1, s); } { DisjunctionMaxQueryPtr q2 = newLucene(0.0); q2->add(tq(L"hed", L"elephant")); q2->add(tq(L"dek", L"elephant")); q->add(q2, BooleanClause::MUST); QueryUtils::check(q2, s); } QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, h.size()); double score = h[0]->score; for (int32_t i = 1; i < h.size(); ++i) BOOST_CHECK_CLOSE_FRACTION(score, h[i]->score, SCORE_COMP_THRESH); } BOOST_AUTO_TEST_CASE(testBooleanOptionalNoTiebreaker) { BooleanQueryPtr q = newLucene(); { DisjunctionMaxQueryPtr q1 = newLucene(0.0); q1->add(tq(L"hed", L"albino")); q1->add(tq(L"dek", L"albino")); q->add(q1, BooleanClause::SHOULD); } { DisjunctionMaxQueryPtr q2 = newLucene(0.0); q2->add(tq(L"hed", L"elephant")); q2->add(tq(L"dek", L"elephant")); q->add(q2, BooleanClause::SHOULD); } QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(4, h.size()); double score = h[0]->score; for (int32_t i = 1; i < h.size() - 1; ++i) BOOST_CHECK_CLOSE_FRACTION(score, h[i]->score, SCORE_COMP_THRESH); BOOST_CHECK_EQUAL(L"d1", s->doc(h[h.size() - 1]->doc)->get(L"id")); double score1 = h[h.size() - 1]->score; BOOST_CHECK(score > score1); } BOOST_AUTO_TEST_CASE(testBooleanOptionalWithTiebreaker) { BooleanQueryPtr q = newLucene(); { DisjunctionMaxQueryPtr q1 = newLucene(0.01); q1->add(tq(L"hed", L"albino")); q1->add(tq(L"dek", L"albino")); q->add(q1, BooleanClause::SHOULD); } { DisjunctionMaxQueryPtr q2 = newLucene(0.01); q2->add(tq(L"hed", L"elephant")); q2->add(tq(L"dek", L"elephant")); q->add(q2, BooleanClause::SHOULD); } QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(4, h.size()); double score0 = h[0]->score; double score1 = h[1]->score; double score2 = h[2]->score; double score3 = h[3]->score; String doc0 = s->doc(h[0]->doc)->get(L"id"); String doc1 = s->doc(h[1]->doc)->get(L"id"); String doc2 = s->doc(h[2]->doc)->get(L"id"); String doc3 = s->doc(h[3]->doc)->get(L"id"); BOOST_CHECK(doc0 == L"d2" || doc0 == L"d4"); BOOST_CHECK(doc1 == L"d2" || doc1 == L"d4"); BOOST_CHECK_CLOSE_FRACTION(score0, score1, SCORE_COMP_THRESH); BOOST_CHECK_EQUAL(L"d3", doc2); BOOST_CHECK(score1 > score2); BOOST_CHECK_EQUAL(L"d1", doc3); BOOST_CHECK(score2 > score3); } BOOST_AUTO_TEST_CASE(testBooleanOptionalWithTiebreakerAndBoost) { BooleanQueryPtr q = newLucene(); { DisjunctionMaxQueryPtr q1 = newLucene(0.01); q1->add(tq(L"hed", L"albino", 1.5)); q1->add(tq(L"dek", L"albino")); q->add(q1, BooleanClause::SHOULD); } { DisjunctionMaxQueryPtr q2 = newLucene(0.01); q2->add(tq(L"hed", L"elephant", 1.5)); q2->add(tq(L"dek", L"elephant")); q->add(q2, BooleanClause::SHOULD); } QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(4, h.size()); double score0 = h[0]->score; double score1 = h[1]->score; double score2 = h[2]->score; double score3 = h[3]->score; String doc0 = s->doc(h[0]->doc)->get(L"id"); String doc1 = s->doc(h[1]->doc)->get(L"id"); String doc2 = s->doc(h[2]->doc)->get(L"id"); String doc3 = s->doc(h[3]->doc)->get(L"id"); BOOST_CHECK(score0 > score1); BOOST_CHECK(score1 > score2); BOOST_CHECK(score2 > score3); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/DocBoostTest.cpp000066400000000000000000000056451217574114600234200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "Collector.h" #include "Scorer.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "Term.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(DocBoostTest, LuceneTestFixture) namespace TestDocBoost { class BoostCollector : public Collector { public: BoostCollector(Collection scores) { this->scores = scores; this->base = 0; } virtual ~BoostCollector() { } public: Collection scores; int32_t base; ScorerPtr scorer; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { scores[doc + base] = scorer->score(); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { base = docBase; } virtual bool acceptsDocsOutOfOrder() { return true; } }; } BOOST_AUTO_TEST_CASE(testDocBoost) { RAMDirectoryPtr store = newLucene(); IndexWriterPtr writer = newLucene(store, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); FieldablePtr f1 = newLucene(L"field", L"word", Field::STORE_YES, Field::INDEX_ANALYZED); FieldablePtr f2 = newLucene(L"field", L"word", Field::STORE_YES, Field::INDEX_ANALYZED); f2->setBoost(2.0); DocumentPtr d1 = newLucene(); DocumentPtr d2 = newLucene(); DocumentPtr d3 = newLucene(); DocumentPtr d4 = newLucene(); d3->setBoost(3.0); d4->setBoost(2.0); d1->add(f1); // boost = 1 d2->add(f2); // boost = 2 d3->add(f1); // boost = 3 d4->add(f2); // boost = 4 writer->addDocument(d1); writer->addDocument(d2); writer->addDocument(d3); writer->addDocument(d4); writer->optimize(); writer->close(); Collection scores = Collection::newInstance(4); IndexSearcherPtr searcher = newLucene(store, true); searcher->search(newLucene(newLucene(L"field", L"word")), newLucene(scores)); double lastScore = 0.0; for (int32_t i = 0; i < 4; ++i) { BOOST_CHECK(scores[i] > lastScore); lastScore = scores[i]; } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/DocIdSetTest.cpp000066400000000000000000000100501217574114600233240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "DocIdSet.h" #include "DocIdSetIterator.h" #include "FilteredDocIdSet.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "MatchAllDocsQuery.h" #include "Filter.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(DocIdSetTest, LuceneTestFixture) static const int32_t maxdoc = 10; namespace TestFilteredDocIdSet { class TestDocIdSetIterator : public DocIdSetIterator { public: TestDocIdSetIterator() { docid = -1; } virtual ~TestDocIdSetIterator() { } public: int32_t docid; public: virtual int32_t docID() { return docid; } virtual int32_t nextDoc() { return ++docid < maxdoc ? docid : (docid = NO_MORE_DOCS); } virtual int32_t advance(int32_t target) { while (nextDoc() < target) { } return docid; } }; class TestDocIdSet : public DocIdSet { public: virtual ~TestDocIdSet() { } public: virtual DocIdSetIteratorPtr iterator() { return newLucene(); } }; class TestFilteredDocIdSet : public FilteredDocIdSet { public: TestFilteredDocIdSet(DocIdSetPtr innerSet) : FilteredDocIdSet(innerSet) { } virtual ~TestFilteredDocIdSet() { } protected: virtual bool match(int32_t docid) { return (docid % 2 == 0); // validate only even docids } }; } BOOST_AUTO_TEST_CASE(testFilteredDocIdSet) { DocIdSetPtr innerSet = newLucene(); DocIdSetPtr filteredSet = newLucene(innerSet); DocIdSetIteratorPtr iter = filteredSet->iterator(); Collection docs = Collection::newInstance(); int32_t doc = iter->advance(3); if (doc != DocIdSetIterator::NO_MORE_DOCS) { docs.add(doc); while((doc = iter->nextDoc()) != DocIdSetIterator::NO_MORE_DOCS) docs.add(doc); } Collection answer = newCollection(4, 6, 8); BOOST_CHECK(docs.equals(answer)); } namespace TestNullDocIdSet { class TestFilter : public Filter { public: virtual ~TestFilter() { } public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { return DocIdSetPtr(); } }; } /// Tests that if a Filter produces a null DocIdSet, which is given to IndexSearcher, everything works fine BOOST_AUTO_TEST_CASE(testNullDocIdSet) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"c", L"val", Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS)); writer->addDocument(doc); writer->close(); // First verify the document is searchable. IndexSearcherPtr searcher = newLucene(dir, true); BOOST_CHECK_EQUAL(1, searcher->search(newLucene(), 10)->totalHits); // Now search with a Filter which returns a null DocIdSet FilterPtr f = newLucene(); BOOST_CHECK_EQUAL(0, searcher->search(newLucene(), f, 10)->totalHits); searcher->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/ElevationComparatorTest.cpp000066400000000000000000000147441217574114600256620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "BooleanQuery.h" #include "TermQuery.h" #include "Term.h" #include "Sort.h" #include "SortField.h" #include "TopFieldCollector.h" #include "TopDocsCollector.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "FieldComparatorSource.h" #include "FieldComparator.h" #include "FieldCache.h" using namespace Lucene; class ElevationFieldComparator : public FieldComparator { public: ElevationFieldComparator(MapStringInt priority, const String& fieldname, int32_t numHits) { this->priority = priority; this->fieldname = fieldname; this->values = Collection::newInstance(numHits); this->bottomVal = 0; } virtual ~ElevationFieldComparator() { } public: MapStringInt priority; String fieldname; StringIndexPtr idIndex; Collection values; int32_t bottomVal; public: virtual int32_t compare(int32_t slot1, int32_t slot2) { return values[slot2] - values[slot1]; // values will be small enough that there is no overflow concern } virtual void setBottom(int32_t slot) { bottomVal = values[slot]; } virtual int32_t compareBottom(int32_t doc) { return docVal(doc) - bottomVal; } virtual void copy(int32_t slot, int32_t doc) { values[slot] = docVal(doc); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { idIndex = FieldCache::DEFAULT()->getStringIndex(reader, fieldname); } virtual ComparableValue value(int32_t slot) { return values[slot]; } protected: int32_t docVal(int32_t doc) { String id = idIndex->lookup[idIndex->order[doc]]; return priority.contains(id) ? priority.get(id) : 0; } }; class ElevationComparatorSource : public FieldComparatorSource { public: ElevationComparatorSource(MapStringInt priority) { this->priority = priority; } virtual ~ElevationComparatorSource() { } protected: MapStringInt priority; public: virtual FieldComparatorPtr newComparator(const String& fieldname, int32_t numHits, int32_t sortPos, bool reversed) { return newLucene(priority, fieldname, numHits); } }; class ElevationComparatorFixture : public LuceneTestFixture { public: ElevationComparatorFixture() { priority = MapStringInt::newInstance(); } virtual ~ElevationComparatorFixture() { } public: MapStringInt priority; public: DocumentPtr adoc(Collection vals) { DocumentPtr doc = newLucene(); for (int32_t i = 0; i < vals.size() - 2; i += 2) doc->add(newLucene(vals[i], vals[i + 1], Field::STORE_YES, Field::INDEX_ANALYZED)); return doc; } void runTest(IndexSearcherPtr searcher, bool reversed) { BooleanQueryPtr newq = newLucene(false); TermQueryPtr query = newLucene(newLucene(L"title", L"ipod")); newq->add(query, BooleanClause::SHOULD); newq->add(getElevatedQuery(newCollection(L"id", L"a", L"id", L"x")), BooleanClause::SHOULD); SortPtr sort = newLucene(newCollection( newLucene(L"id", newLucene(priority), false), newLucene(L"", SortField::SCORE, reversed) )); TopDocsCollectorPtr topCollector = TopFieldCollector::create(sort, 50, false, true, true, true); searcher->search(newq, FilterPtr(), topCollector); TopDocsPtr topDocs = topCollector->topDocs(0, 10); int32_t numDocsReturned = topDocs->scoreDocs.size(); BOOST_CHECK_EQUAL(4, numDocsReturned); // 0 and 3 were elevated BOOST_CHECK_EQUAL(0, topDocs->scoreDocs[0]->doc); BOOST_CHECK_EQUAL(3, topDocs->scoreDocs[1]->doc); if (reversed) { BOOST_CHECK_EQUAL(2, topDocs->scoreDocs[2]->doc); BOOST_CHECK_EQUAL(1, topDocs->scoreDocs[3]->doc); } else { BOOST_CHECK_EQUAL(1, topDocs->scoreDocs[2]->doc); BOOST_CHECK_EQUAL(2, topDocs->scoreDocs[3]->doc); } } QueryPtr getElevatedQuery(Collection vals) { BooleanQueryPtr q = newLucene(false); q->setBoost(0); int32_t max = (vals.size() / 2) + 5; for (int32_t i = 0; i < vals.size() - 1; i += 2) { q->add(newLucene(newLucene(vals[i], vals[i + 1])), BooleanClause::SHOULD); priority.put(vals[i + 1], max--); } return q; } }; BOOST_FIXTURE_TEST_SUITE(ElevationComparatorTest, ElevationComparatorFixture) BOOST_AUTO_TEST_CASE(testSorting) { DirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(1000); writer->addDocument(adoc(newCollection(L"id", L"a", L"title", L"ipod", L"str_s", L"a"))); writer->addDocument(adoc(newCollection(L"id", L"b", L"title", L"ipod ipod", L"str_s", L"b"))); writer->addDocument(adoc(newCollection(L"id", L"c", L"title", L"ipod ipod ipod", L"str_s", L"c"))); writer->addDocument(adoc(newCollection(L"id", L"x", L"title", L"boosted", L"str_s", L"x"))); writer->addDocument(adoc(newCollection(L"id", L"y", L"title", L"boosted boosted", L"str_s", L"y"))); writer->addDocument(adoc(newCollection(L"id", L"z", L"title", L"boosted boosted boosted", L"str_s", L"z"))); IndexReaderPtr r = writer->getReader(); writer->close(); IndexSearcherPtr searcher = newLucene(r); runTest(searcher, true); runTest(searcher, false); searcher->close(); r->close(); directory->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/ExplanationsFixture.cpp000066400000000000000000000131471217574114600250540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "ExplanationsFixture.h" #include "SpanTermQuery.h" #include "Term.h" #include "CheckHits.h" #include "RAMDirectory.h" #include "WhitespaceAnalyzer.h" #include "QueryParser.h" #include "Document.h" #include "Field.h" #include "IndexWriter.h" #include "IndexSearcher.h" #include "SpanFirstQuery.h" #include "SpanOrQuery.h" #include "SpanNearQuery.h" #include "SpanNotQuery.h" #include "BooleanQuery.h" #include "TermQuery.h" namespace Lucene { const String ExplanationsFixture::KEY = L"KEY"; const String ExplanationsFixture::FIELD = L"field"; ExplanationsFixture::ExplanationsFixture() { qp = newLucene(LuceneVersion::LUCENE_CURRENT, FIELD, newLucene()); docFields = newCollection(L"w1 w2 w3 w4 w5", L"w1 w3 w2 w3 zz", L"w1 xx w2 yy w3", L"w1 w3 xx w2 yy w3 zz"); RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer= newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < docFields.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(KEY, StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(FIELD, docFields[i], Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); searcher = newLucene(directory, true); } ExplanationsFixture::~ExplanationsFixture() { searcher->close(); } SpanTermQueryPtr ExplanationsFixture::st(const String& s) { return newLucene(newLucene(FIELD, s)); } SpanFirstQueryPtr ExplanationsFixture::sf(const String& s, int32_t b) { return newLucene(st(s), b); } SpanNotQueryPtr ExplanationsFixture::snot(SpanQueryPtr i, SpanQueryPtr e) { return newLucene(i, e); } SpanOrQueryPtr ExplanationsFixture::sor(const String& s, const String& e) { return sor(st(s), st(e)); } SpanOrQueryPtr ExplanationsFixture::sor(SpanQueryPtr s, SpanQueryPtr e) { return newLucene(newCollection(s, e)); } SpanOrQueryPtr ExplanationsFixture::sor(const String& s, const String& m, const String& e) { return sor(st(s), st(m), st(e)); } SpanOrQueryPtr ExplanationsFixture::sor(SpanQueryPtr s, SpanQueryPtr m, SpanQueryPtr e) { return newLucene(newCollection(s, m, e)); } SpanNearQueryPtr ExplanationsFixture::snear(const String& s, const String& e, int32_t slop, bool inOrder) { return snear(st(s), st(e), slop, inOrder); } SpanNearQueryPtr ExplanationsFixture::snear(SpanQueryPtr s, SpanQueryPtr e, int32_t slop, bool inOrder) { return newLucene(newCollection(s, e), slop, inOrder); } SpanNearQueryPtr ExplanationsFixture::snear(const String& s, const String& m, const String& e, int32_t slop, bool inOrder) { return snear(st(s), st(m), st(e), slop, inOrder); } SpanNearQueryPtr ExplanationsFixture::snear(SpanQueryPtr s, SpanQueryPtr m, SpanQueryPtr e, int32_t slop, bool inOrder) { return newLucene(newCollection(s, m, e), slop, inOrder); } QueryPtr ExplanationsFixture::optB(const String& q) { return optB(makeQuery(q)); } QueryPtr ExplanationsFixture::optB(QueryPtr q) { BooleanQueryPtr bq = newLucene(true); bq->add(q, BooleanClause::SHOULD); bq->add(newLucene(newLucene(L"NEVER", L"MATCH")), BooleanClause::MUST_NOT); return bq; } QueryPtr ExplanationsFixture::reqB(const String& q) { return reqB(makeQuery(q)); } QueryPtr ExplanationsFixture::reqB(QueryPtr q) { BooleanQueryPtr bq = newLucene(true); bq->add(q, BooleanClause::MUST); bq->add(newLucene(newLucene(FIELD, L"w1")), BooleanClause::SHOULD); return bq; } Collection ExplanationsFixture::ta(Collection s) { Collection t = Collection::newInstance(s.size()); for (int32_t i = 0; i < s.size(); ++i) t[i] = newLucene(FIELD, s[i]); return t; } void ExplanationsFixture::qtest(const String& queryText, Collection expDocNrs) { qtest(makeQuery(queryText), expDocNrs); } void ExplanationsFixture::qtest(QueryPtr q, Collection expDocNrs) { CheckHits::checkHitCollector(q, FIELD, searcher, expDocNrs); } void ExplanationsFixture::bqtest(QueryPtr q, Collection expDocNrs) { qtest(reqB(q), expDocNrs); qtest(optB(q), expDocNrs); } void ExplanationsFixture::bqtest(const String& queryText, Collection expDocNrs) { bqtest(makeQuery(queryText), expDocNrs); } QueryPtr ExplanationsFixture::makeQuery(const String& queryText) { return qp->parse(queryText); } } LucenePlusPlus-rel_3.0.4/src/test/search/FieldCacheRangeFilterTest.cpp000066400000000000000000000507331217574114600257740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "BaseTestRangeFilterFixture.h" #include "RAMDirectory.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "Term.h" #include "FieldCacheRangeFilter.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "DocIdSet.h" using namespace Lucene; /// A basic 'positive' Unit test class for the FieldCacheRangeFilter class. BOOST_FIXTURE_TEST_SUITE(FieldCacheRangeFilterTest, BaseTestRangeFilterFixture) BOOST_AUTO_TEST_CASE(testRangeFilterId) { IndexReaderPtr reader = IndexReader::open((DirectoryPtr)signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); int32_t medId = ((maxId - minId) / 2); String minIP = pad(minId); String maxIP = pad(maxId); String medIP = pad(medId); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); QueryPtr q = newLucene(newLucene(L"body", L"body")); // test id, bounded on both ends FieldCacheRangeFilterPtr fcrf = FieldCacheRangeFilter::newStringRange(L"id", minIP, maxIP, true, true); Collection result = search->search(q, fcrf, numDocs)->scoreDocs; BOOST_CHECK(fcrf->getDocIdSet(reader->getSequentialSubReaders()[0])->isCacheable()); BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", minIP, maxIP, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", minIP, maxIP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", minIP, maxIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", medIP, maxIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + maxId - medId, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", minIP, medIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + medId - minId, result.size()); // unbounded id result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", L"", L"", true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", minIP, L"", true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", L"", maxIP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", minIP, L"", false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", L"", maxIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", medIP, maxIP, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(maxId - medId, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", minIP, medIP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(medId - minId, result.size()); // very small sets result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", minIP, minIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", medIP, medIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", maxIP, maxIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", minIP, minIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", L"", minIP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", maxIP, maxIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", maxIP, L"", true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"id", medIP, medIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_CASE(testFieldCacheRangeFilterRand) { IndexReaderPtr reader = IndexReader::open((DirectoryPtr)signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); int32_t medId = ((maxId - minId) / 2); String minRP = pad(signedIndex->minR); String maxRP = pad(signedIndex->maxR); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); QueryPtr q = newLucene(newLucene(L"body", L"body")); // test extremes, bounded on both ends Collection result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", minRP, maxRP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", minRP, maxRP, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", minRP, maxRP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", minRP, maxRP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); // unbounded result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", minRP, L"", true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", L"", maxRP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", minRP, L"", false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", L"", maxRP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); // very small sets result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", minRP, minRP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", maxRP, maxRP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", minRP, minRP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", L"", minRP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", maxRP, maxRP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newStringRange(L"rand", maxRP, L"", true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_CASE(testFieldCacheRangeFilterInts) { IndexReaderPtr reader = IndexReader::open((DirectoryPtr)signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); int32_t numDocs = reader->numDocs(); int32_t medId = ((maxId - minId) / 2); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); QueryPtr q = newLucene(newLucene(L"body", L"body")); // test id, bounded on both ends FieldCacheRangeFilterPtr fcrf = FieldCacheRangeFilter::newIntRange(L"id", minId, maxId, true, true); Collection result = search->search(q, fcrf, numDocs)->scoreDocs; BOOST_CHECK(fcrf->getDocIdSet(reader->getSequentialSubReaders()[0])->isCacheable()); BOOST_CHECK_EQUAL(numDocs, result.size()); // test extremes, bounded on both ends result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", minId, maxId, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", minId, maxId, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", minId, maxId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", medId, maxId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + maxId - medId, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", minId, medId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + medId - minId, result.size()); // unbounded id result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", INT_MIN, INT_MAX, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", minId, INT_MAX, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", INT_MIN, maxId, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", minId, INT_MAX, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", INT_MIN, maxId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", medId, maxId, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(maxId - medId, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", minId, medId, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(medId - minId, result.size()); // very small sets result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", minId, minId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", medId, medId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", maxId, maxId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", minId, minId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", INT_MIN, minId, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", maxId, maxId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", maxId, INT_MAX, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newIntRange(L"id", medId, medId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_CASE(testFieldCacheRangeFilterLongs) { IndexReaderPtr reader = IndexReader::open((DirectoryPtr)signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); int32_t numDocs = reader->numDocs(); int64_t medId = ((maxId - minId) / 2); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); QueryPtr q = newLucene(newLucene(L"body", L"body")); // test id, bounded on both ends FieldCacheRangeFilterPtr fcrf = FieldCacheRangeFilter::newLongRange(L"id", minId, maxId, true, true); Collection result = search->search(q, fcrf, numDocs)->scoreDocs; BOOST_CHECK(fcrf->getDocIdSet(reader->getSequentialSubReaders()[0])->isCacheable()); BOOST_CHECK_EQUAL(numDocs, result.size()); // test extremes, bounded on both ends result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)minId, (int64_t)maxId, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)minId, (int64_t)maxId, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)minId, (int64_t)maxId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", medId, (int64_t)maxId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + maxId - medId, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)minId, medId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + medId - minId, result.size()); // unbounded id result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", LLONG_MIN, LLONG_MAX, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)minId, LLONG_MAX, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", LLONG_MIN, (int64_t)maxId, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)minId, LLONG_MAX, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", LLONG_MIN, (int64_t)maxId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", medId, (int64_t)maxId, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(maxId - medId, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)minId, medId, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(medId - minId, result.size()); // very small sets result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)minId, (int64_t)minId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", medId, medId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)maxId, (int64_t)maxId, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)minId, (int64_t)minId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", LLONG_MIN, (int64_t)minId, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)maxId, (int64_t)maxId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", (int64_t)maxId, LLONG_MAX, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, FieldCacheRangeFilter::newLongRange(L"id", medId, medId, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_CASE(testFieldCacheRangeFilterDoubles) { IndexReaderPtr reader = IndexReader::open((DirectoryPtr)signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); int32_t numDocs = reader->numDocs(); double minIdO = (double)minId + 0.5; double medIdO = minIdO + (double)(maxId - minId) / 2.0; QueryPtr q = newLucene(newLucene(L"body", L"body")); Collection result = search->search(q, FieldCacheRangeFilter::newDoubleRange(L"id", minIdO, medIdO, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs / 2, result.size()); int32_t count = 0; result = search->search(q, FieldCacheRangeFilter::newDoubleRange(L"id", -DBL_MAX, medIdO, false, true), numDocs)->scoreDocs; count += result.size(); result = search->search(q, FieldCacheRangeFilter::newDoubleRange(L"id", medIdO, DBL_MAX, false, false), numDocs)->scoreDocs; count += result.size(); BOOST_CHECK_EQUAL(numDocs, count); result = search->search(q, FieldCacheRangeFilter::newDoubleRange(L"id", std::numeric_limits::infinity(), DBL_MAX, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, FieldCacheRangeFilter::newDoubleRange(L"id", -DBL_MAX, -std::numeric_limits::infinity(), false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); } BOOST_AUTO_TEST_CASE(testSparseIndex) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t d = -20; d <= 20; ++d) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(d), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"body", L"body", Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->optimize(); writer->deleteDocuments(newLucene(L"id", L"0")); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); IndexSearcherPtr search = newLucene(reader); BOOST_CHECK(reader->hasDeletions()); QueryPtr q = newLucene(newLucene(L"body", L"body")); FieldCacheRangeFilterPtr fcrf = FieldCacheRangeFilter::newIntRange(L"id", -20, 20, true, true); Collection result = search->search(q, fcrf, 100)->scoreDocs; BOOST_CHECK(!fcrf->getDocIdSet(reader->getSequentialSubReaders()[0])->isCacheable()); BOOST_CHECK_EQUAL(40, result.size()); fcrf = FieldCacheRangeFilter::newIntRange(L"id", 0, 20, true, true); result = search->search(q, fcrf, 100)->scoreDocs; BOOST_CHECK(!fcrf->getDocIdSet(reader->getSequentialSubReaders()[0])->isCacheable()); BOOST_CHECK_EQUAL(20, result.size()); fcrf = FieldCacheRangeFilter::newIntRange(L"id", -20, 0, true, true); result = search->search(q, fcrf, 100)->scoreDocs; BOOST_CHECK(!fcrf->getDocIdSet(reader->getSequentialSubReaders()[0])->isCacheable()); BOOST_CHECK_EQUAL(20, result.size()); fcrf = FieldCacheRangeFilter::newIntRange(L"id", 10, 20, true, true); result = search->search(q, fcrf, 100)->scoreDocs; BOOST_CHECK(fcrf->getDocIdSet(reader->getSequentialSubReaders()[0])->isCacheable()); BOOST_CHECK_EQUAL(11, result.size()); fcrf = FieldCacheRangeFilter::newIntRange(L"id", -20, -10, true, true); result = search->search(q, fcrf, 100)->scoreDocs; BOOST_CHECK(fcrf->getDocIdSet(reader->getSequentialSubReaders()[0])->isCacheable()); BOOST_CHECK_EQUAL(11, result.size()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/FieldCacheTermsFilterTest.cpp000066400000000000000000000042231217574114600260230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "KeywordAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "MatchAllDocsQuery.h" #include "FieldCacheTermsFilter.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(FieldCacheTermsFilterTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testMissingTerms) { String fieldName = L"field1"; MockRAMDirectoryPtr rd = newLucene(); IndexWriterPtr w = newLucene(rd, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < 100; ++i) { DocumentPtr doc = newLucene(); int32_t term = i * 10; // terms are units of 10 doc->add(newLucene(fieldName, StringUtils::toString(term), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); w->addDocument(doc); } w->close(); IndexReaderPtr reader = IndexReader::open(rd, true); IndexSearcherPtr searcher = newLucene(reader); int32_t numDocs = reader->numDocs(); MatchAllDocsQueryPtr q = newLucene(); Collection results = searcher->search(q, newLucene(fieldName, newCollection(L"5")), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, results.size()); results = searcher->search(q, newLucene(fieldName, newCollection(L"10")), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, results.size()); results = searcher->search(q, newLucene(fieldName, newCollection(L"10", L"20")), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(2, results.size()); reader->close(); rd->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/FieldCacheTest.cpp000066400000000000000000000075621217574114600236530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "FieldCache.h" using namespace Lucene; class FieldCacheFixture : public LuceneTestFixture { public: FieldCacheFixture() { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer= newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); int64_t theLong = LLONG_MAX; double theDouble = DBL_MAX; uint8_t theByte = UCHAR_MAX; int32_t theInt = INT_MAX; for (int32_t i = 0; i < NUM_DOCS; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"theLong", StringUtils::toString(theLong--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"theDouble", StringUtils::toString(theDouble--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"theByte", StringUtils::toString(theByte--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"theInt", StringUtils::toString(theInt--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); reader = IndexReader::open(directory, true); } virtual ~FieldCacheFixture() { } protected: IndexReaderPtr reader; public: static const int32_t NUM_DOCS; }; const int32_t FieldCacheFixture::NUM_DOCS = 1000; BOOST_FIXTURE_TEST_SUITE(FieldCacheTest, FieldCacheFixture) BOOST_AUTO_TEST_CASE(testFieldCache) { FieldCachePtr cache = FieldCache::DEFAULT(); Collection doubles = cache->getDoubles(reader, L"theDouble"); BOOST_CHECK_EQUAL(doubles.hashCode(), cache->getDoubles(reader, L"theDouble").hashCode()); BOOST_CHECK_EQUAL(doubles.hashCode(), cache->getDoubles(reader, L"theDouble", FieldCache::DEFAULT_DOUBLE_PARSER()).hashCode()); BOOST_CHECK_EQUAL(doubles.size(), NUM_DOCS); for (int32_t i = 0; i < doubles.size(); ++i) BOOST_CHECK_CLOSE_FRACTION(doubles[i], (DBL_MAX - i), 0.00001); Collection longs = cache->getLongs(reader, L"theLong"); BOOST_CHECK_EQUAL(longs.hashCode(), cache->getLongs(reader, L"theLong").hashCode()); BOOST_CHECK_EQUAL(longs.hashCode(), cache->getLongs(reader, L"theLong", FieldCache::DEFAULT_LONG_PARSER()).hashCode()); BOOST_CHECK_EQUAL(longs.size(), NUM_DOCS); for (int32_t i = 0; i < longs.size(); ++i) BOOST_CHECK_EQUAL(longs[i], (LLONG_MAX - i)); Collection bytes = cache->getBytes(reader, L"theByte"); BOOST_CHECK_EQUAL(bytes.hashCode(), cache->getBytes(reader, L"theByte").hashCode()); BOOST_CHECK_EQUAL(bytes.hashCode(), cache->getBytes(reader, L"theByte", FieldCache::DEFAULT_BYTE_PARSER()).hashCode()); BOOST_CHECK_EQUAL(bytes.size(), NUM_DOCS); for (int32_t i = 0; i < bytes.size(); ++i) BOOST_CHECK_EQUAL(bytes[i], (uint8_t)(UCHAR_MAX - i)); Collection ints = cache->getInts(reader, L"theInt"); BOOST_CHECK_EQUAL(ints.hashCode(), cache->getInts(reader, L"theInt").hashCode()); BOOST_CHECK_EQUAL(ints.hashCode(), cache->getInts(reader, L"theInt", FieldCache::DEFAULT_INT_PARSER()).hashCode()); BOOST_CHECK_EQUAL(ints.size(), NUM_DOCS); for (int32_t i = 0; i < ints.size(); ++i) BOOST_CHECK_EQUAL(ints[i], (INT_MAX - i)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/FilteredQueryTest.cpp000066400000000000000000000204701217574114600244610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "IndexReader.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "Term.h" #include "Filter.h" #include "BitSet.h" #include "DocIdBitSet.h" #include "FilteredQuery.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "QueryUtils.h" #include "Sort.h" #include "SortField.h" #include "BooleanQuery.h" #include "TermRangeQuery.h" #include "TopFieldDocs.h" #include "MatchAllDocsQuery.h" using namespace Lucene; class StaticFilterA : public Filter { public: virtual ~StaticFilterA() { } public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { BitSetPtr bitset = newLucene(5); bitset->set((uint32_t)0, (uint32_t)5); return newLucene(bitset); } }; class StaticFilterB : public Filter { public: virtual ~StaticFilterB() { } public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { BitSetPtr bitset = newLucene(5); bitset->set(1); bitset->set(3); return newLucene(bitset); } }; class SingleDocTestFilter : public Filter { public: SingleDocTestFilter(int32_t doc) { this->doc = doc; } virtual ~SingleDocTestFilter() { } protected: int32_t doc; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { BitSetPtr bits = newLucene(reader->maxDoc()); bits->set(doc); return newLucene(bits); } }; class FilteredQueryFixture : public LuceneTestFixture { public: FilteredQueryFixture() { directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"one two three four five", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"sorter", L"b", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"field", L"one two three four", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"sorter", L"d", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"field", L"one two three y", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"sorter", L"a", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"field", L"one two x", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"sorter", L"c", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->optimize(); writer->close(); searcher = newLucene(directory, true); query = newLucene(newLucene(L"field", L"three")); filter = newStaticFilterB(); } virtual ~FilteredQueryFixture() { searcher->close(); directory->close(); } protected: IndexSearcherPtr searcher; RAMDirectoryPtr directory; QueryPtr query; FilterPtr filter; public: FilterPtr newStaticFilterA() { return newLucene(); } FilterPtr newStaticFilterB() { return newLucene(); } void checkScoreEquals(QueryPtr q1, QueryPtr q2) { Collection hits1 = searcher->search(q1, FilterPtr(), 1000)->scoreDocs; Collection hits2 = searcher->search (q2, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(hits1.size(), hits2.size()); for (int32_t i = 0; i < hits1.size(); ++i) BOOST_CHECK_CLOSE_FRACTION(hits1[i]->score, hits2[i]->score, 0.0000001); } }; BOOST_FIXTURE_TEST_SUITE(FilteredQueryTest, FilteredQueryFixture) BOOST_AUTO_TEST_CASE(testFilteredQuery) { QueryPtr filteredquery = newLucene(query, filter); Collection hits = searcher->search(filteredquery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(1, hits[0]->doc); QueryUtils::check(filteredquery, searcher); hits = searcher->search(filteredquery, FilterPtr(), 1000, newLucene(newLucene(L"sorter", SortField::STRING)))->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(1, hits[0]->doc); filteredquery = newLucene(newLucene(newLucene(L"field", L"one")), filter); hits = searcher->search(filteredquery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); QueryUtils::check(filteredquery, searcher); filteredquery = newLucene(newLucene(newLucene(L"field", L"x")), filter); hits = searcher->search(filteredquery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(3, hits[0]->doc); QueryUtils::check(filteredquery, searcher); filteredquery = newLucene(newLucene(newLucene(L"field", L"y")), filter); hits = searcher->search(filteredquery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(filteredquery, searcher); // test boost FilterPtr f = newStaticFilterA(); double boost = 2.5; BooleanQueryPtr bq1 = newLucene(); TermQueryPtr tq = newLucene(newLucene(L"field", L"one")); tq->setBoost(boost); bq1->add(tq, BooleanClause::MUST); bq1->add(newLucene(newLucene(L"field", L"five")), BooleanClause::MUST); BooleanQueryPtr bq2 = newLucene(); tq = newLucene(newLucene(L"field", L"one")); filteredquery = newLucene(tq, f); filteredquery->setBoost(boost); bq2->add(filteredquery, BooleanClause::MUST); bq2->add(newLucene(newLucene(L"field", L"five")), BooleanClause::MUST); checkScoreEquals(bq1, bq2); BOOST_CHECK_EQUAL(boost, filteredquery->getBoost()); BOOST_CHECK_EQUAL(1.0, tq->getBoost()); // the boost value of the underlying query shouldn't have changed } BOOST_AUTO_TEST_CASE(testRangeQuery) { TermRangeQueryPtr rq = newLucene(L"sorter", L"b", L"d", true, true); QueryPtr filteredquery = newLucene(rq, filter); Collection hits = searcher->search(filteredquery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); QueryUtils::check(filteredquery, searcher); } BOOST_AUTO_TEST_CASE(testBoolean) { BooleanQueryPtr bq = newLucene(); QueryPtr query = newLucene(newLucene(), newLucene(0)); bq->add(query, BooleanClause::MUST); query = newLucene(newLucene(), newLucene(1)); bq->add(query, BooleanClause::MUST); Collection hits = searcher->search(bq, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(query, searcher); } /// Make sure BooleanQuery, which does out-of-order scoring, inside FilteredQuery, works BOOST_AUTO_TEST_CASE(testBoolean2) { BooleanQueryPtr bq = newLucene(); QueryPtr query = newLucene(bq, newLucene(0)); bq->add(newLucene(newLucene(L"field", L"one")), BooleanClause::SHOULD); bq->add(newLucene(newLucene(L"field", L"two")), BooleanClause::SHOULD); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/FilteredSearchTest.cpp000066400000000000000000000067151217574114600245670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "Filter.h" #include "OpenBitSet.h" #include "IndexWriter.h" #include "IndexReader.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "BooleanQuery.h" #include "TermQuery.h" #include "Term.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; DECLARE_SHARED_PTR(SimpleDocIdSetFilter) class SimpleDocIdSetFilter : public Filter { public: SimpleDocIdSetFilter(Collection docs) { this->docs = docs; this->docBase = 0; this->index = 0; } virtual ~SimpleDocIdSetFilter() { } protected: int32_t docBase; Collection docs; int32_t index; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { OpenBitSetPtr set = newLucene(); int32_t limit = docBase + reader->maxDoc(); for (; index < docs.size(); ++index) { int32_t docId = docs[index]; if (docId > limit) break; set->set(docId - docBase); } docBase = limit; return set->isEmpty() ? DocIdSetPtr() : set; } void reset() { index = 0; docBase = 0; } }; static const String FIELD = L"category"; static void searchFiltered(IndexWriterPtr writer, DirectoryPtr directory, FilterPtr filter, bool optimize) { for (int32_t i = 0; i < 60; ++i) { // Simple docs DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD, StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } if (optimize) writer->optimize(); writer->close(); BooleanQueryPtr booleanQuery = newLucene(); booleanQuery->add(newLucene(newLucene(FIELD, L"36")), BooleanClause::SHOULD); IndexSearcherPtr indexSearcher = newLucene(directory, true); Collection hits = indexSearcher->search(booleanQuery, filter, 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); } BOOST_FIXTURE_TEST_SUITE(FilteredSearchTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testFilteredSearch) { bool enforceSingleSegment = true; RAMDirectoryPtr directory = newLucene(); Collection filterBits = newCollection(1, 36); SimpleDocIdSetFilterPtr filter = newLucene(filterBits); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); searchFiltered(writer, directory, filter, enforceSingleSegment); // run the test on more than one segment enforceSingleSegment = false; // reset - it is stateful filter->reset(); writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // we index 60 docs - this will create 6 segments writer->setMaxBufferedDocs(10); searchFiltered(writer, directory, filter, enforceSingleSegment); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/FuzzyQueryTest.cpp000066400000000000000000000415441217574114600240570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "FuzzyQuery.h" #include "Term.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "BooleanQuery.h" #include "StandardAnalyzer.h" #include "QueryParser.h" #include "IndexReader.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(FuzzyQueryTest, LuceneTestFixture) static void addDoc(const String& text, IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", text, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } BOOST_AUTO_TEST_CASE(testFuzziness) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(L"aaaaa", writer); addDoc(L"aaaab", writer); addDoc(L"aaabb", writer); addDoc(L"aabbb", writer); addDoc(L"abbbb", writer); addDoc(L"bbbbb", writer); addDoc(L"ddddd", writer); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(directory, true); FuzzyQueryPtr query = newLucene(newLucene(L"field", L"aaaaa"), FuzzyQuery::defaultMinSimilarity(), 0); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); // same with prefix query = newLucene(newLucene(L"field", L"aaaaa"), FuzzyQuery::defaultMinSimilarity(), 1); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); query = newLucene(newLucene(L"field", L"aaaaa"), FuzzyQuery::defaultMinSimilarity(), 2); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); query = newLucene(newLucene(L"field", L"aaaaa"), FuzzyQuery::defaultMinSimilarity(), 3); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); query = newLucene(newLucene(L"field", L"aaaaa"), FuzzyQuery::defaultMinSimilarity(), 4); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); query = newLucene(newLucene(L"field", L"aaaaa"), FuzzyQuery::defaultMinSimilarity(), 5); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); query = newLucene(newLucene(L"field", L"aaaaa"), FuzzyQuery::defaultMinSimilarity(), 6); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); // test scoring query = newLucene(newLucene(L"field", L"bbbbb"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); Collection order = newCollection(L"bbbbb", L"abbbb", L"aabbb"); for (int32_t i = 0; i < hits.size(); ++i) { String term = searcher->doc(hits[i]->doc)->get(L"field"); BOOST_CHECK_EQUAL(order[i], term); } // test BooleanQuery.maxClauseCount int32_t savedClauseCount = BooleanQuery::getMaxClauseCount(); BooleanQuery::setMaxClauseCount(2); // This query would normally return 3 documents, because 3 terms match (see above) query = newLucene(newLucene(L"field", L"bbbbb"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); order = newCollection(L"bbbbb", L"abbbb"); for (int32_t i = 0; i < hits.size(); ++i) { String term = searcher->doc(hits[i]->doc)->get(L"field"); BOOST_CHECK_EQUAL(order[i], term); } BooleanQuery::setMaxClauseCount(savedClauseCount); // not similar enough query = newLucene(newLucene(L"field", L"xxxxx"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); query = newLucene(newLucene(L"field", L"aaccc"), FuzzyQuery::defaultMinSimilarity(), 0); // edit distance to "aaaaa" = 3 hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // query identical to a word in the index query = newLucene(newLucene(L"field", L"aaaaa"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"aaaaa"); // default allows for up to two edits BOOST_CHECK_EQUAL(searcher->doc(hits[1]->doc)->get(L"field"), L"aaaab"); BOOST_CHECK_EQUAL(searcher->doc(hits[2]->doc)->get(L"field"), L"aaabb"); // query similar to a word in the index query = newLucene(newLucene(L"field", L"aaaac"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"aaaaa"); BOOST_CHECK_EQUAL(searcher->doc(hits[1]->doc)->get(L"field"), L"aaaab"); BOOST_CHECK_EQUAL(searcher->doc(hits[2]->doc)->get(L"field"), L"aaabb"); // now with prefix query = newLucene(newLucene(L"field", L"aaaac"), FuzzyQuery::defaultMinSimilarity(), 1); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"aaaaa"); BOOST_CHECK_EQUAL(searcher->doc(hits[1]->doc)->get(L"field"), L"aaaab"); BOOST_CHECK_EQUAL(searcher->doc(hits[2]->doc)->get(L"field"), L"aaabb"); query = newLucene(newLucene(L"field", L"aaaac"), FuzzyQuery::defaultMinSimilarity(), 2); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"aaaaa"); BOOST_CHECK_EQUAL(searcher->doc(hits[1]->doc)->get(L"field"), L"aaaab"); BOOST_CHECK_EQUAL(searcher->doc(hits[2]->doc)->get(L"field"), L"aaabb"); query = newLucene(newLucene(L"field", L"aaaac"), FuzzyQuery::defaultMinSimilarity(), 3); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"aaaaa"); BOOST_CHECK_EQUAL(searcher->doc(hits[1]->doc)->get(L"field"), L"aaaab"); BOOST_CHECK_EQUAL(searcher->doc(hits[2]->doc)->get(L"field"), L"aaabb"); query = newLucene(newLucene(L"field", L"aaaac"), FuzzyQuery::defaultMinSimilarity(), 4); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"aaaaa"); BOOST_CHECK_EQUAL(searcher->doc(hits[1]->doc)->get(L"field"), L"aaaab"); query = newLucene(newLucene(L"field", L"aaaac"), FuzzyQuery::defaultMinSimilarity(), 5); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); query = newLucene(newLucene(L"field", L"ddddX"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"ddddd"); // now with prefix query = newLucene(newLucene(L"field", L"ddddX"), FuzzyQuery::defaultMinSimilarity(), 1); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"ddddd"); query = newLucene(newLucene(L"field", L"ddddX"), FuzzyQuery::defaultMinSimilarity(), 2); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"ddddd"); query = newLucene(newLucene(L"field", L"ddddX"), FuzzyQuery::defaultMinSimilarity(), 3); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"ddddd"); query = newLucene(newLucene(L"field", L"ddddX"), FuzzyQuery::defaultMinSimilarity(), 4); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"ddddd"); query = newLucene(newLucene(L"field", L"ddddX"), FuzzyQuery::defaultMinSimilarity(), 5); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // different field = no match query = newLucene(newLucene(L"anotherfield", L"ddddX"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); searcher->close(); directory->close(); } BOOST_AUTO_TEST_CASE(testFuzzinessLong) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(L"aaaaaaa", writer); addDoc(L"segment", writer); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(directory, true); // not similar enough FuzzyQueryPtr query = newLucene(newLucene(L"field", L"xxxxx"), FuzzyQuery::defaultMinSimilarity(), 0); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // edit distance to "aaaaaaa" = 3, this matches because the string is longer than // in testDefaultFuzziness so a bigger difference is allowed query = newLucene(newLucene(L"field", L"aaaaccc"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"aaaaaaa"); // now with prefix query = newLucene(newLucene(L"field", L"aaaaccc"), FuzzyQuery::defaultMinSimilarity(), 1); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"aaaaaaa"); query = newLucene(newLucene(L"field", L"aaaaccc"), FuzzyQuery::defaultMinSimilarity(), 4); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(searcher->doc(hits[0]->doc)->get(L"field"), L"aaaaaaa"); query = newLucene(newLucene(L"field", L"aaaaccc"), FuzzyQuery::defaultMinSimilarity(), 5); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // no match, more than half of the characters is wrong query = newLucene(newLucene(L"field", L"aaacccc"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // now with prefix query = newLucene(newLucene(L"field", L"aaacccc"), FuzzyQuery::defaultMinSimilarity(), 2); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // "student" and "stellent" are indeed similar to "segment" by default query = newLucene(newLucene(L"field", L"student"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); query = newLucene(newLucene(L"field", L"stellent"), FuzzyQuery::defaultMinSimilarity(), 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); // now with prefix query = newLucene(newLucene(L"field", L"student"), FuzzyQuery::defaultMinSimilarity(), 1); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); query = newLucene(newLucene(L"field", L"stellent"), FuzzyQuery::defaultMinSimilarity(), 1); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); query = newLucene(newLucene(L"field", L"student"), FuzzyQuery::defaultMinSimilarity(), 2); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); query = newLucene(newLucene(L"field", L"stellent"), FuzzyQuery::defaultMinSimilarity(), 2); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // "student" doesn't match anymore thanks to increased minimum similarity query = newLucene(newLucene(L"field", L"student"), 0.6, 0); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); BOOST_CHECK_EXCEPTION(query = newLucene(newLucene(L"field", L"student"), 1.1), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); BOOST_CHECK_EXCEPTION(query = newLucene(newLucene(L"field", L"student"), -0.1), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); searcher->close(); directory->close(); } BOOST_AUTO_TEST_CASE(testTokenLengthOpt) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(L"12345678911", writer); addDoc(L"segment", writer); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(directory, true); // term not over 10 chars, so optimization shortcuts FuzzyQueryPtr query = newLucene(newLucene(L"field", L"1234569"), 0.9); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // 10 chars, so no optimization query = newLucene(newLucene(L"field", L"1234567891"), 0.9); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // over 10 chars, so no optimization query = newLucene(newLucene(L"field", L"12345678911"), 0.9); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); // over 10 chars, no match query = newLucene(newLucene(L"field", L"sdfsdfsdfsdf"), 0.9); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); } BOOST_AUTO_TEST_CASE(testGiga) { StandardAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); DirectoryPtr index = newLucene(); IndexWriterPtr w = newLucene(index, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); addDoc(L"Lucene in Action", w); addDoc(L"Lucene for Dummies", w); addDoc(L"Giga byte", w); addDoc(L"ManagingGigabytesManagingGigabyte", w); addDoc(L"ManagingGigabytesManagingGigabytes", w); addDoc(L"The Art of Computer Science", w); addDoc(L"J. K. Rowling", w); addDoc(L"JK Rowling", w); addDoc(L"Joanne K Roling", w); addDoc(L"Bruce Willis", w); addDoc(L"Willis bruce", w); addDoc(L"Brute willis", w); addDoc(L"B. willis", w); IndexReaderPtr r = w->getReader(); w->close(); QueryPtr q = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", analyzer)->parse(L"giga~0.9"); IndexSearcherPtr searcher = newLucene(r); Collection hits = searcher->search(q, 10)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(L"Giga byte", searcher->doc(hits[0]->doc)->get(L"field")); r->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/MatchAllDocsQueryTest.cpp000066400000000000000000000110721217574114600252170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "StandardAnalyzer.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "MatchAllDocsQuery.h" #include "BooleanQuery.h" #include "TermQuery.h" #include "Term.h" #include "QueryParser.h" #include "Document.h" #include "Field.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(MatchAllDocsQueryTest, LuceneTestFixture) static void addDoc(const String& text, IndexWriterPtr iw, double boost) { DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"key", text, Field::STORE_YES, Field::INDEX_ANALYZED); f->setBoost(boost); doc->add(f); iw->addDocument(doc); } BOOST_AUTO_TEST_CASE(testQuery) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); RAMDirectoryPtr dir = newLucene(); IndexWriterPtr iw = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(2); // force multi-segment addDoc(L"one", iw, 1); addDoc(L"two", iw, 20); addDoc(L"three four", iw, 300); iw->close(); IndexReaderPtr ir = IndexReader::open(dir, false); IndexSearcherPtr is = newLucene(ir); // assert with norms scoring turned off Collection hits = is->search(newLucene(), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); BOOST_CHECK_EQUAL(L"one", ir->document(hits[0]->doc)->get(L"key")); BOOST_CHECK_EQUAL(L"two", ir->document(hits[1]->doc)->get(L"key")); BOOST_CHECK_EQUAL(L"three four", ir->document(hits[2]->doc)->get(L"key")); // assert with norms scoring turned on MatchAllDocsQueryPtr normsQuery = newLucene(L"key"); hits = is->search(normsQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); BOOST_CHECK_EQUAL(L"three four", ir->document(hits[0]->doc)->get(L"key")); BOOST_CHECK_EQUAL(L"two", ir->document(hits[1]->doc)->get(L"key")); BOOST_CHECK_EQUAL(L"one", ir->document(hits[2]->doc)->get(L"key")); // change norm & retest ir->setNorm(0, L"key", 400.0); normsQuery = newLucene(L"key"); hits = is->search(normsQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); BOOST_CHECK_EQUAL(L"one", ir->document(hits[0]->doc)->get(L"key")); BOOST_CHECK_EQUAL(L"three four", ir->document(hits[1]->doc)->get(L"key")); BOOST_CHECK_EQUAL(L"two", ir->document(hits[2]->doc)->get(L"key")); // some artificial queries to trigger the use of skipTo() BooleanQueryPtr bq = newLucene(); bq->add(newLucene(), BooleanClause::MUST); bq->add(newLucene(), BooleanClause::MUST); hits = is->search(bq, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); bq = newLucene(); bq->add(newLucene(), BooleanClause::MUST); bq->add(newLucene(newLucene(L"key", L"three")), BooleanClause::MUST); hits = is->search(bq, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); // delete a document is->getIndexReader()->deleteDocument(0); hits = is->search(newLucene(), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); // test parsable toString() QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"key", analyzer); hits = is->search(qp->parse(newLucene()->toString()), FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); // test parsable toString() with non default boost QueryPtr maq = newLucene(); maq->setBoost(2.3); QueryPtr pq = qp->parse(maq->toString()); hits = is->search(pq, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); is->close(); ir->close(); dir->close(); } BOOST_AUTO_TEST_CASE(testEquals) { QueryPtr q1 = newLucene(); QueryPtr q2 = newLucene(); BOOST_CHECK(q1->equals(q2)); q1->setBoost(1.5); BOOST_CHECK(!q1->equals(q2)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/MockFilter.cpp000066400000000000000000000015651217574114600231000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "MockFilter.h" #include "DocIdBitSet.h" #include "BitSet.h" namespace Lucene { MockFilter::MockFilter() { _wasCalled = false; } MockFilter::~MockFilter() { } DocIdSetPtr MockFilter::getDocIdSet(IndexReaderPtr reader) { _wasCalled = true; return newLucene(newLucene()); } void MockFilter::clear() { _wasCalled = false; } bool MockFilter::wasCalled() { return _wasCalled; } } LucenePlusPlus-rel_3.0.4/src/test/search/MultiPhraseQueryTest.cpp000066400000000000000000000171071217574114600251630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "IndexSearcher.h" #include "MultiPhraseQuery.h" #include "Term.h" #include "IndexReader.h" #include "TermEnum.h" #include "Document.h" #include "Field.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "TermQuery.h" #include "BooleanQuery.h" #include "StandardAnalyzer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(MultiPhraseQueryTest, LuceneTestFixture) static void add(const String& s, IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"body", s, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } static void add(const String& s, const String& type, IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"body", s, Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"type", type, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } BOOST_AUTO_TEST_CASE(testPhrasePrefix) { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); add(L"blueberry pie", writer); add(L"blueberry strudel", writer); add(L"blueberry pizza", writer); add(L"blueberry chewing gum", writer); add(L"bluebird pizza", writer); add(L"bluebird foobar pizza", writer); add(L"piccadilly circus", writer); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(indexStore, true); // search for "blueberry pi*" MultiPhraseQueryPtr query1 = newLucene(); // search for "strawberry pi*" MultiPhraseQueryPtr query2 = newLucene(); query1->add(newLucene(L"body", L"blueberry")); query2->add(newLucene(L"body", L"strawberry")); Collection termsWithPrefix = Collection::newInstance(); IndexReaderPtr ir = IndexReader::open(indexStore, true); // this TermEnum gives "piccadilly", "pie" and "pizza". String prefix = L"pi"; TermEnumPtr te = ir->terms(newLucene(L"body", prefix)); do { if (boost::starts_with(te->term()->text(), prefix)) termsWithPrefix.add(te->term()); } while (te->next()); query1->add(termsWithPrefix); BOOST_CHECK_EQUAL(L"body:\"blueberry (piccadilly pie pizza)\"", query1->toString()); query2->add(termsWithPrefix); BOOST_CHECK_EQUAL(L"body:\"strawberry (piccadilly pie pizza)\"", query2->toString()); Collection result = searcher->search(query1, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, result.size()); result = searcher->search(query2, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); // search for "blue* pizza" MultiPhraseQueryPtr query3 = newLucene(); termsWithPrefix.clear(); prefix = L"blue"; te = ir->terms(newLucene(L"body", prefix)); do { if (boost::starts_with(te->term()->text(), prefix)) termsWithPrefix.add(te->term()); } while (te->next()); query3->add(termsWithPrefix); query3->add(newLucene(L"body", L"pizza")); result = searcher->search(query3, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, result.size()); // blueberry pizza, bluebird pizza BOOST_CHECK_EQUAL(L"body:\"(blueberry bluebird) pizza\"", query3->toString()); // test slop query3->setSlop(1); result = searcher->search(query3, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, result.size()); // blueberry pizza, bluebird pizza, bluebird foobar pizza MultiPhraseQueryPtr query4 = newLucene(); query4->add(newLucene(L"field1", L"foo")); BOOST_CHECK_EXCEPTION(query4->add(newLucene(L"field2", L"foobar")), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); searcher->close(); indexStore->close(); } BOOST_AUTO_TEST_CASE(testBooleanQueryContainingSingleTermPrefixQuery) { // In order to cause the bug, the outer query must have more than one term and all terms required. // The contained PhraseMultiQuery must contain exactly one term array. RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); add(L"blueberry pie", writer); add(L"blueberry chewing gum", writer); add(L"blue raspberry pie", writer); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(indexStore, true); // This query will be equivalent to +body:pie +body:"blue*" BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"body", L"pie")), BooleanClause::MUST); MultiPhraseQueryPtr mpq = newLucene(); mpq->add(newCollection(newLucene(L"body", L"blueberry"), newLucene(L"body", L"blue"))); q->add(mpq, BooleanClause::MUST); Collection hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); searcher->close(); } BOOST_AUTO_TEST_CASE(testPhrasePrefixWithBooleanQuery) { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(LuceneVersion::LUCENE_CURRENT, HashSet::newInstance()), true, IndexWriter::MaxFieldLengthLIMITED); add(L"This is a test", L"object", writer); add(L"a note", L"note", writer); writer->close(); IndexSearcherPtr searcher = newLucene(indexStore, true); // This query will be equivalent to +type:note +body:"a t*" BooleanQueryPtr q = newLucene(); q->add(newLucene(newLucene(L"type", L"note")), BooleanClause::MUST); MultiPhraseQueryPtr mpq = newLucene(); mpq->add(newLucene(L"body", L"a")); mpq->add(newCollection(newLucene(L"body", L"test"), newLucene(L"body", L"this"))); q->add(mpq, BooleanClause::MUST); Collection hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); searcher->close(); } BOOST_AUTO_TEST_CASE(testHashCodeAndEquals) { MultiPhraseQueryPtr query1 = newLucene(); MultiPhraseQueryPtr query2 = newLucene(); BOOST_CHECK_EQUAL(query1->hashCode(), query2->hashCode()); BOOST_CHECK(query1->equals(query2)); TermPtr term1 = newLucene(L"someField", L"someText"); query1->add(term1); query2->add(term1); BOOST_CHECK_EQUAL(query1->hashCode(), query2->hashCode()); BOOST_CHECK(query1->equals(query2)); TermPtr term2 = newLucene(L"someField", L"someMoreText"); query1->add(term2); BOOST_CHECK_NE(query1->hashCode(), query2->hashCode()); BOOST_CHECK(!query1->equals(query2)); query2->add(term2); BOOST_CHECK_EQUAL(query1->hashCode(), query2->hashCode()); BOOST_CHECK(query1->equals(query2)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/MultiSearcherRankingTest.cpp000066400000000000000000000116121217574114600257540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "IndexSearcher.h" #include "MultiSearcher.h" #include "Document.h" #include "Field.h" #include "QueryParser.h" #include "Query.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; class MultiSearcherRankingFixture : public LuceneTestFixture { public: MultiSearcherRankingFixture() { // create MultiSearcher from two separate searchers DirectoryPtr d1 = newLucene(); IndexWriterPtr iw1 = newLucene(d1, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); addCollection1(iw1); iw1->close(); DirectoryPtr d2 = newLucene(); IndexWriterPtr iw2 = newLucene(d2, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); addCollection2(iw2); iw2->close(); Collection s = newCollection(newLucene(d1, true), newLucene(d2, true)); multiSearcher = newLucene(s); // create IndexSearcher which contains all documents DirectoryPtr d = newLucene(); IndexWriterPtr iw = newLucene(d, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); addCollection1(iw); addCollection2(iw); iw->close(); singleSearcher = newLucene(d, true); } virtual ~MultiSearcherRankingFixture() { } protected: static const String FIELD_NAME; SearcherPtr multiSearcher; SearcherPtr singleSearcher; public: void addCollection1(IndexWriterPtr iw) { add(L"one blah three", iw); add(L"one foo three multiOne", iw); add(L"one foobar three multiThree", iw); add(L"blueberry pie", iw); add(L"blueberry strudel", iw); add(L"blueberry pizza", iw); } void addCollection2(IndexWriterPtr iw) { add(L"two blah three", iw); add(L"two foo xxx multiTwo", iw); add(L"two foobar xxx multiThreee", iw); add(L"blueberry chewing gum", iw); add(L"bluebird pizza", iw); add(L"bluebird foobar pizza", iw); add(L"piccadilly circus", iw); } void add(const String& value, IndexWriterPtr iw) { DocumentPtr d = newLucene(); d->add(newLucene(FIELD_NAME, value, Field::STORE_YES, Field::INDEX_ANALYZED)); iw->addDocument(d); } /// checks if a query yields the same result when executed on a single IndexSearcher containing all /// documents and on MultiSearcher aggregating sub-searchers /// @param queryStr the query to check. void checkQuery(const String& queryStr) { QueryParserPtr queryParser = newLucene(LuceneVersion::LUCENE_CURRENT, FIELD_NAME, newLucene(LuceneVersion::LUCENE_CURRENT)); QueryPtr query = queryParser->parse(queryStr); Collection multiSearcherHits = multiSearcher->search(query, FilterPtr(), 1000)->scoreDocs; Collection singleSearcherHits = singleSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(multiSearcherHits.size(), singleSearcherHits.size()); for (int32_t i = 0; i < multiSearcherHits.size(); ++i) { DocumentPtr docMulti = multiSearcher->doc(multiSearcherHits[i]->doc); DocumentPtr docSingle = singleSearcher->doc(singleSearcherHits[i]->doc); BOOST_CHECK_CLOSE_FRACTION(multiSearcherHits[i]->score, singleSearcherHits[i]->score, 0.001); BOOST_CHECK_EQUAL(docMulti->get(FIELD_NAME), docSingle->get(FIELD_NAME)); } } }; const String MultiSearcherRankingFixture::FIELD_NAME = L"body"; BOOST_FIXTURE_TEST_SUITE(MultiSearcherRankingTest, MultiSearcherRankingFixture) BOOST_AUTO_TEST_CASE(testOneTermQuery) { checkQuery(L"three"); } BOOST_AUTO_TEST_CASE(testTwoTermQuery) { checkQuery(L"three foo"); } BOOST_AUTO_TEST_CASE(testPrefixQuery) { checkQuery(L"multi*"); } BOOST_AUTO_TEST_CASE(testFuzzyQuery) { checkQuery(L"multiThree~"); } BOOST_AUTO_TEST_CASE(testRangeQuery) { checkQuery(L"{multiA TO multiP}"); } BOOST_AUTO_TEST_CASE(testMultiPhraseQuery) { checkQuery(L"\"blueberry pi*\""); } BOOST_AUTO_TEST_CASE(testNoMatchQuery) { checkQuery(L"+three +nomatch"); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/MultiSearcherTest.cpp000066400000000000000000000363211217574114600244460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "MockRAMDirectory.h" #include "Document.h" #include "Field.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "QueryParser.h" #include "Query.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "MultiSearcher.h" #include "Term.h" #include "IndexReader.h" #include "TermQuery.h" #include "SetBasedFieldSelector.h" #include "KeywordAnalyzer.h" #include "Sort.h" #include "DefaultSimilarity.h" #include "TopFieldDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(MultiSearcherTest, LuceneTestFixture) static MultiSearcherPtr getMultiSearcherInstance(Collection searchers) { return newLucene(searchers); } static DocumentPtr createDocument(const String& contents1, const String& contents2) { DocumentPtr document = newLucene(); document->add(newLucene(L"contents", contents1, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); document->add(newLucene(L"other", L"other contents", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); if (!contents2.empty()) document->add(newLucene(L"contents", contents2, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); return document; } static void initIndex(DirectoryPtr directory, int32_t numDocs, bool create, const String& contents2) { IndexWriterPtr indexWriter = newLucene(directory, newLucene(), create, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < numDocs; ++i) indexWriter->addDocument(createDocument(L"doc" + StringUtils::toString(i), contents2)); indexWriter->close(); } BOOST_AUTO_TEST_CASE(testEmptyIndex) { // creating two directories for indices DirectoryPtr indexStoreA = newLucene(); DirectoryPtr indexStoreB = newLucene(); // creating a document to store DocumentPtr lDoc = newLucene(); lDoc->add(newLucene(L"fulltext", L"Once upon a time.....", Field::STORE_YES, Field::INDEX_ANALYZED)); lDoc->add(newLucene(L"id", L"doc1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); lDoc->add(newLucene(L"handle", L"1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // creating a document to store DocumentPtr lDoc2 = newLucene(); lDoc2->add(newLucene(L"fulltext", L"in a galaxy far far away.....", Field::STORE_YES, Field::INDEX_ANALYZED)); lDoc2->add(newLucene(L"id", L"doc2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); lDoc2->add(newLucene(L"handle", L"1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // creating a document to store DocumentPtr lDoc3 = newLucene(); lDoc3->add(newLucene(L"fulltext", L"a bizarre bug manifested itself....", Field::STORE_YES, Field::INDEX_ANALYZED)); lDoc3->add(newLucene(L"id", L"doc3", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); lDoc3->add(newLucene(L"handle", L"1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // creating an index writer for the first index IndexWriterPtr writerA = newLucene(indexStoreA, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); // creating an index writer for the second index, but writing nothing IndexWriterPtr writerB = newLucene(indexStoreB, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA->addDocument(lDoc); writerA->addDocument(lDoc2); writerA->addDocument(lDoc3); writerA->optimize(); writerA->close(); // closing the second index writerB->close(); // creating the query QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, L"fulltext", newLucene(LuceneVersion::LUCENE_CURRENT)); QueryPtr query = parser->parse(L"handle:1"); // building the searchables Collection searchers = newCollection(newLucene(indexStoreB, true), newLucene(indexStoreA, true)); // creating the multiSearcher SearcherPtr mSearcher = getMultiSearcherInstance(searchers); // performing the search Collection hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); // iterating over the hit documents for (int32_t i = 0; i < hits.size(); ++i) mSearcher->doc(hits[i]->doc); mSearcher->close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = newLucene(indexStoreB, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writerB->addDocument(lDoc); writerB->optimize(); writerB->close(); // building the searchables Collection searchers2 = newCollection(newLucene(indexStoreB, true), newLucene(indexStoreA, true)); // creating the mulitSearcher MultiSearcherPtr mSearcher2 = getMultiSearcherInstance(searchers2); // performing the same search Collection hits2 = mSearcher2->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(4, hits2.size()); // iterating over the hit documents for (int32_t i = 0; i < hits2.size(); ++i) mSearcher2->doc(hits2[i]->doc); // test the subSearcher() method QueryPtr subSearcherQuery = parser->parse(L"id:doc1"); hits2 = mSearcher2->search(subSearcherQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits2.size()); BOOST_CHECK_EQUAL(0, mSearcher2->subSearcher(hits2[0]->doc)); // hit from searchers2[0] BOOST_CHECK_EQUAL(1, mSearcher2->subSearcher(hits2[1]->doc)); // hit from searchers2[1] subSearcherQuery = parser->parse(L"id:doc2"); hits2 = mSearcher2->search(subSearcherQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits2.size()); BOOST_CHECK_EQUAL(1, mSearcher2->subSearcher(hits2[0]->doc)); // hit from searchers2[1] mSearcher2->close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place TermPtr term = newLucene(L"id", L"doc1"); IndexReaderPtr readerB = IndexReader::open(indexStoreB, false); readerB->deleteDocuments(term); readerB->close(); // optimizing the index with the writer writerB = newLucene(indexStoreB, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writerB->optimize(); writerB->close(); // building the searchables Collection searchers3 = newCollection(newLucene(indexStoreB, true), newLucene(indexStoreA, true)); // creating the mulitSearcher SearcherPtr mSearcher3 = getMultiSearcherInstance(searchers3); // performing the same search Collection hits3 = mSearcher3->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits3.size()); // iterating over the hit documents for (int32_t i = 0; i < hits3.size(); ++i) mSearcher3->doc(hits3[i]->doc); mSearcher3->close(); indexStoreA->close(); indexStoreB->close(); } BOOST_AUTO_TEST_CASE(testFieldSelector) { RAMDirectoryPtr ramDirectory1 = newLucene(); RAMDirectoryPtr ramDirectory2 = newLucene(); QueryPtr query = newLucene(newLucene(L"contents", L"doc0")); // Now put the documents in a different index initIndex(ramDirectory1, 10, true, L""); // documents with a single token "doc0", "doc1", etc... initIndex(ramDirectory2, 10, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcherPtr indexSearcher1 = newLucene(ramDirectory1, true); IndexSearcherPtr indexSearcher2 = newLucene(ramDirectory2, true); MultiSearcherPtr searcher = getMultiSearcherInstance(newCollection(indexSearcher1, indexSearcher2)); BOOST_CHECK(searcher); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK(hits); BOOST_CHECK_EQUAL(hits.size(), 2); DocumentPtr document = searcher->doc(hits[0]->doc); BOOST_CHECK(document); BOOST_CHECK_EQUAL(document->getFields().size(), 2); // Should be one document from each directory they both have two fields, contents and other HashSet ftl = HashSet::newInstance(); ftl.add(L"other"); SetBasedFieldSelectorPtr fs = newLucene(ftl, HashSet::newInstance()); document = searcher->doc(hits[0]->doc, fs); BOOST_CHECK(document); BOOST_CHECK_EQUAL(document->getFields().size(), 1); String value = document->get(L"contents"); BOOST_CHECK(value.empty()); value = document->get(L"other"); BOOST_CHECK(!value.empty()); ftl.clear(); ftl.add(L"contents"); fs = newLucene(ftl, HashSet::newInstance()); document = searcher->doc(hits[1]->doc, fs); value = document->get(L"contents"); BOOST_CHECK(!value.empty()); value = document->get(L"other"); BOOST_CHECK(value.empty()); } BOOST_AUTO_TEST_CASE(testNormalization) { int32_t numDocs = 10; QueryPtr query = newLucene(newLucene(L"contents", L"doc0")); RAMDirectoryPtr ramDirectory1 = newLucene(); // First put the documents in the same index initIndex(ramDirectory1, numDocs, true, L""); // documents with a single token "doc0", "doc1", etc... initIndex(ramDirectory1, numDocs, false, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcherPtr indexSearcher1 = newLucene(ramDirectory1, true); indexSearcher1->setDefaultFieldSortScoring(true, true); Collection hits = indexSearcher1->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); // Store the scores for use later Collection scores = newCollection(hits[0]->score, hits[1]->score); BOOST_CHECK(scores[0] > scores[1]); indexSearcher1->close(); ramDirectory1->close(); hits.clear(); ramDirectory1 = newLucene(); RAMDirectoryPtr ramDirectory2 = newLucene(); // Now put the documents in a different index initIndex(ramDirectory1, numDocs, true, L""); // documents with a single token "doc0", "doc1", etc... initIndex(ramDirectory2, numDocs, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... indexSearcher1 = newLucene(ramDirectory1, true); indexSearcher1->setDefaultFieldSortScoring(true, true); IndexSearcherPtr indexSearcher2 = newLucene(ramDirectory2, true); indexSearcher2->setDefaultFieldSortScoring(true, true); SearcherPtr searcher = getMultiSearcherInstance(newCollection(indexSearcher1, indexSearcher2)); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); // The scores should be the same (within reason) BOOST_CHECK_CLOSE_FRACTION(scores[0], hits[0]->score, 1e-6); // This will a document from ramDirectory1 BOOST_CHECK_CLOSE_FRACTION(scores[1], hits[1]->score, 1e-6); // This will a document from ramDirectory2 // Adding a Sort.RELEVANCE object should not change anything hits = searcher->search(query, FilterPtr(), 1000, Sort::RELEVANCE())->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); BOOST_CHECK_CLOSE_FRACTION(scores[0], hits[0]->score, 1e-6); // This will a document from ramDirectory1 BOOST_CHECK_CLOSE_FRACTION(scores[1], hits[1]->score, 1e-6); // This will a document from ramDirectory2 searcher->close(); ramDirectory1->close(); ramDirectory2->close(); } namespace TestCustomSimilarity { class CustomSimilarity : public DefaultSimilarity { public: virtual ~CustomSimilarity() { } public: virtual double idf(int32_t docFreq, int32_t numDocs) { return 100.0; } virtual double coord(int32_t overlap, int32_t maxOverlap) { return 1.0; } virtual double lengthNorm(const String& fieldName, int32_t numTokens) { return 1.0; } virtual double queryNorm(double sumOfSquaredWeights) { return 1.0; } virtual double sloppyFreq(int32_t distance) { return 1.0; } virtual double tf(double freq) { return 1.0; } }; } BOOST_AUTO_TEST_CASE(testCustomSimilarity) { RAMDirectoryPtr dir = newLucene(); initIndex(dir, 10, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcherPtr searcher = newLucene(dir, true); MultiSearcherPtr msearcher = getMultiSearcherInstance(newCollection(searcher)); SimilarityPtr customSimilarity = newLucene(); searcher->setSimilarity(customSimilarity); msearcher->setSimilarity(customSimilarity); QueryPtr query = newLucene(newLucene(L"contents", L"doc0")); // Get a score from IndexSearcher TopDocsPtr topDocs = searcher->search(query, FilterPtr(), 1); double score1 = topDocs->maxScore; // Get the score from MultiSearcher topDocs = msearcher->search(query, FilterPtr(), 1); double scoreN = topDocs->maxScore; // The scores from the IndexSearcher and Multisearcher should be the same if the same similarity is used. BOOST_CHECK_CLOSE_FRACTION(score1, scoreN, 1e-6); } BOOST_AUTO_TEST_CASE(testDocFreq) { RAMDirectoryPtr dir1 = newLucene(); RAMDirectoryPtr dir2 = newLucene(); initIndex(dir1, 10, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... initIndex(dir2, 5, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcherPtr searcher1 = newLucene(dir1, true); IndexSearcherPtr searcher2 = newLucene(dir2, true); MultiSearcherPtr multiSearcher = getMultiSearcherInstance(newCollection(searcher1, searcher2)); BOOST_CHECK_EQUAL(15, multiSearcher->docFreq(newLucene(L"contents", L"x"))); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/MultiTermConstantScoreTest.cpp000066400000000000000000000614741217574114600263360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTestRangeFilterFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "QueryUtils.h" #include "Term.h" #include "TermRangeQuery.h" #include "MultiTermQuery.h" #include "PrefixQuery.h" #include "WildcardQuery.h" #include "Collator.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "Scorer.h" #include "Collector.h" #include "BooleanQuery.h" using namespace Lucene; class MultiTermConstantScoreFixture : public BaseTestRangeFilterFixture { public: MultiTermConstantScoreFixture() { Collection data = newCollection( L"A 1 2 3 4 5 6", L"Z 4 5 6", L"", L"B 2 4 5 6", L"Y 3 5 6", L"", L"C 3 6", L"X 4 5 6" ); small = newLucene(); IndexWriterPtr writer = newLucene(small, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < data.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"all", L"all", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); if (!data[i].empty()) doc->add(newLucene(L"data", data[i], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->optimize(); writer->close(); } virtual ~MultiTermConstantScoreFixture() { } public: static const double SCORE_COMP_THRESH; DirectoryPtr small; public: QueryPtr csrq(const String& f, const String& l, const String& h, bool il, bool ih) { TermRangeQueryPtr query = newLucene(f, l, h, il, ih); query->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); return query; } QueryPtr csrq(const String& f, const String& l, const String& h, bool il, bool ih, RewriteMethodPtr method) { TermRangeQueryPtr query = newLucene(f, l, h, il, ih); query->setRewriteMethod(method); return query; } QueryPtr csrq(const String& f, const String& l, const String& h, bool il, bool ih, CollatorPtr c) { TermRangeQueryPtr query = newLucene(f, l, h, il, ih, c); query->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); return query; } QueryPtr cspq(TermPtr prefix) { PrefixQueryPtr query = newLucene(prefix); query->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); return query; } QueryPtr cswcq(TermPtr wild) { WildcardQueryPtr query = newLucene(wild); query->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); return query; } }; /// threshold for comparing doubles const double MultiTermConstantScoreFixture::SCORE_COMP_THRESH = 1e-6f; BOOST_FIXTURE_TEST_SUITE(MultiTermConstantScoreTest, MultiTermConstantScoreFixture) BOOST_AUTO_TEST_CASE(testBasics) { QueryUtils::check(csrq(L"data", L"1", L"6", true, true)); QueryUtils::check(csrq(L"data", L"A", L"Z", true, true)); QueryUtils::checkUnequal(csrq(L"data", L"1", L"6", true, true), csrq(L"data", L"A", L"Z", true, true)); QueryUtils::check(cspq(newLucene(L"data", L"p*u?"))); QueryUtils::checkUnequal(cspq(newLucene(L"data", L"pre*")), cspq(newLucene(L"data", L"pres*"))); QueryUtils::check(cswcq(newLucene(L"data", L"p"))); QueryUtils::checkUnequal(cswcq(newLucene(L"data", L"pre*n?t")), cswcq(newLucene(L"data", L"pr*t?j"))); } BOOST_AUTO_TEST_CASE(testBasicsRngCollating) { CollatorPtr c = newLucene(std::locale()); QueryUtils::check(csrq(L"data", L"1", L"6", true, true, c)); QueryUtils::check(csrq(L"data", L"A", L"Z", true, true, c)); QueryUtils::checkUnequal(csrq(L"data", L"1", L"6", true, true, c), csrq(L"data", L"A", L"Z", true, true, c)); } BOOST_AUTO_TEST_CASE(testEqualScores) { IndexReaderPtr reader = IndexReader::open(small, true); IndexSearcherPtr search = newLucene(reader); // some hits match more terms then others, score should be the same Collection result = search->search(csrq(L"data", L"1", L"6", true, true), FilterPtr(), 1000)->scoreDocs; int32_t numHits = result.size(); BOOST_CHECK_EQUAL(6, numHits); double score = result[0]->score; for (int32_t i = 1; i < numHits; ++i) BOOST_CHECK_EQUAL(score, result[i]->score); result = search->search(csrq(L"data", L"1", L"6", true, true, MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()), FilterPtr(), 1000)->scoreDocs; numHits = result.size(); BOOST_CHECK_EQUAL(6, numHits); for (int32_t i = 0; i < numHits; ++i) BOOST_CHECK_EQUAL(score, result[i]->score); } namespace TestBoost { class TestCollector : public Collector { public: TestCollector() { base = 0; } virtual ~TestCollector() { } protected: int32_t base; ScorerPtr scorer; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { BOOST_CHECK_EQUAL(1.0, scorer->score()); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { base = docBase; } virtual bool acceptsDocsOutOfOrder() { return true; } }; } BOOST_AUTO_TEST_CASE(testBoost) { IndexReaderPtr reader = IndexReader::open(small, true); IndexSearcherPtr search = newLucene(reader); // test for correct application of query normalization // must use a non score normalizing method for this. QueryPtr q = csrq(L"data", L"1", L"6", true, true); q->setBoost(100); search->search(q, FilterPtr(), newLucene()); // Ensure that boosting works to score one clause of a query higher than another. QueryPtr q1 = csrq(L"data", L"A", L"A", true, true); // matches document #0 q1->setBoost(0.1); QueryPtr q2 = csrq(L"data", L"Z", L"Z", true, true); // matches document #1 BooleanQueryPtr bq = newLucene(true); bq->add(q1, BooleanClause::SHOULD); bq->add(q2, BooleanClause::SHOULD); Collection hits = search->search(bq, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits[0]->doc); BOOST_CHECK_EQUAL(0, hits[1]->doc); BOOST_CHECK(hits[0]->score > hits[1]->score); q1 = csrq(L"data", L"A", L"A", true, true, MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); // matches document #0 q1->setBoost(0.1); q2 = csrq(L"data", L"Z", L"Z", true, true, MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); // matches document #1 bq = newLucene(true); bq->add(q1, BooleanClause::SHOULD); bq->add(q2, BooleanClause::SHOULD); hits = search->search(bq, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits[0]->doc); BOOST_CHECK_EQUAL(0, hits[1]->doc); BOOST_CHECK(hits[0]->score > hits[1]->score); q1 = csrq(L"data", L"A", L"A", true, true); // matches document #0 q1->setBoost(10.0); q2 = csrq(L"data", L"Z", L"Z", true, true); // matches document #1 bq = newLucene(true); bq->add(q1, BooleanClause::SHOULD); bq->add(q2, BooleanClause::SHOULD); hits = search->search(bq, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits[0]->doc); BOOST_CHECK_EQUAL(1, hits[1]->doc); BOOST_CHECK(hits[0]->score > hits[1]->score); } BOOST_AUTO_TEST_CASE(testBooleanOrderUnAffected) { IndexReaderPtr reader = IndexReader::open(small, true); IndexSearcherPtr search = newLucene(reader); // first do a regular TermRangeQuery which uses term expansion so docs with more terms in range get higher scores QueryPtr rq = newLucene(L"data", L"1", L"4", true, true); Collection expected = search->search(rq, FilterPtr(), 1000)->scoreDocs; int32_t numHits = expected.size(); // now do a boolean where which also contains a ConstantScoreRangeQuery and make sure hte order is the same BooleanQueryPtr q = newLucene(); q->add(rq, BooleanClause::MUST); q->add(csrq(L"data", L"1", L"6", true, true), BooleanClause::MUST); Collection actual = search->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(numHits, actual.size()); for (int32_t i = 0; i < numHits; ++i) BOOST_CHECK_EQUAL(expected[i]->doc, actual[i]->doc); } BOOST_AUTO_TEST_CASE(testRangeQueryId) { IndexReaderPtr reader = IndexReader::open(signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); int32_t medId = ((maxId - minId) / 2); String nullMin = StringUtils::toString(INT_MIN); String nullMax = StringUtils::toString(INT_MAX); String minIP = pad(minId); String maxIP = pad(maxId); String medIP = pad(medId); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); // test id, bounded on both ends Collection result = search->search(csrq(L"id", minIP, maxIP, true, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"id", minIP, maxIP, true, true, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"id", minIP, maxIP, true, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", minIP, maxIP, true, false, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", minIP, maxIP, false, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", minIP, maxIP, false, true, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", minIP, maxIP, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); result = search->search(csrq(L"id", minIP, maxIP, false, false, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); result = search->search(csrq(L"id", medIP, maxIP, true, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + maxId - medId, result.size()); result = search->search(csrq(L"id", medIP, maxIP, true, true, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + maxId - medId, result.size()); result = search->search(csrq(L"id", minIP, medIP, true, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + medId - minId, result.size()); result = search->search(csrq(L"id", minIP, medIP, true, true, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + medId - minId, result.size()); // unbounded id result = search->search(csrq(L"id", minIP, nullMax, true, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"id", nullMin, maxIP, false, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"id", minIP, nullMax, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", nullMin, maxIP, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", medIP, maxIP, true, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(maxId - medId, result.size()); result = search->search(csrq(L"id", minIP, medIP, false, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(medId - minId, result.size()); // very small sets result = search->search(csrq(L"id", minIP, minIP, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"id", minIP, minIP, false, false, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"id", medIP, medIP, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"id", medIP, medIP, false, false, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"id", maxIP, maxIP, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"id", maxIP, maxIP, false, false, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"id", minIP, minIP, true, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", minIP, minIP, true, true, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", nullMin, minIP, false, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", nullMin, minIP, false, true, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", maxIP, maxIP, true, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", maxIP, maxIP, true, true, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", maxIP, nullMax, true, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", maxIP, nullMax, true, false, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", medIP, medIP, true, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", medIP, medIP, true, true, MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_CASE(testRangeQueryIdCollating) { IndexReaderPtr reader = IndexReader::open(signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); int32_t medId = ((maxId - minId) / 2); String nullMin = StringUtils::toString(INT_MIN); String nullMax = StringUtils::toString(INT_MAX); String minIP = pad(minId); String maxIP = pad(maxId); String medIP = pad(medId); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); CollatorPtr c = newLucene(std::locale()); // test id, bounded on both ends Collection result = search->search(csrq(L"id", minIP, maxIP, true, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"id", minIP, maxIP, true, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", minIP, maxIP, false, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", minIP, maxIP, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); result = search->search(csrq(L"id", medIP, maxIP, true, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + maxId - medId, result.size()); result = search->search(csrq(L"id", minIP, medIP, true, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + medId - minId, result.size()); // unbounded id result = search->search(csrq(L"id", minIP, nullMax, true, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"id", nullMin, maxIP, false, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"id", minIP, nullMax, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", nullMin, maxIP, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"id", medIP, maxIP, true, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(maxId - medId, result.size()); result = search->search(csrq(L"id", minIP, medIP, false, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(medId - minId, result.size()); // very small sets result = search->search(csrq(L"id", minIP, minIP, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"id", medIP, medIP, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"id", maxIP, maxIP, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"id", minIP, minIP, true, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", nullMin, minIP, false, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", maxIP, maxIP, true, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", maxIP, nullMax, true, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"id", medIP, medIP, true, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_CASE(testRangeQueryRand) { IndexReaderPtr reader = IndexReader::open(signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); String nullMin = StringUtils::toString(INT_MIN); String nullMax = StringUtils::toString(INT_MAX); String minRP = pad(signedIndex->minR); String maxRP = pad(signedIndex->maxR); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); // test extremes, bounded on both ends Collection result = search->search(csrq(L"rand", minRP, maxRP, true, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"rand", minRP, maxRP, true, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"rand", minRP, maxRP, false, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"rand", minRP, maxRP, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); // unbounded result = search->search(csrq(L"rand", minRP, nullMax, true, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"rand", nullMin, maxRP, false, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"rand", minRP, nullMax, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"rand", nullMin, maxRP, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); // very small sets result = search->search(csrq(L"rand", minRP, minRP, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"rand", maxRP, maxRP, false, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"rand", minRP, minRP, true, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"rand", nullMin, minRP, false, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"rand", maxRP, maxRP, true, true), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"rand", maxRP, nullMax, true, false), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_CASE(testRangeQueryRandCollating) { // using the unsigned index because collation seems to ignore hyphens IndexReaderPtr reader = IndexReader::open(unsignedIndex->index, true); IndexSearcherPtr search = newLucene(reader); String nullMin = StringUtils::toString(INT_MIN); String nullMax = StringUtils::toString(INT_MAX); String minRP = pad(unsignedIndex->minR); String maxRP = pad(unsignedIndex->maxR); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); CollatorPtr c = newLucene(std::locale()); // test extremes, bounded on both ends Collection result = search->search(csrq(L"rand", minRP, maxRP, true, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"rand", minRP, maxRP, true, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"rand", minRP, maxRP, false, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"rand", minRP, maxRP, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); // unbounded result = search->search(csrq(L"rand", minRP, nullMax, true, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"rand", nullMin, maxRP, false, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(csrq(L"rand", minRP, nullMax, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(csrq(L"rand", nullMin, maxRP, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); // very small sets result = search->search(csrq(L"rand", minRP, minRP, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"rand", maxRP, maxRP, false, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(csrq(L"rand", minRP, minRP, true, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"rand", nullMin, minRP, false, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"rand", maxRP, maxRP, true, true, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(csrq(L"rand", maxRP, nullMax, true, false, c), FilterPtr(), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/MultiThreadTermVectorsTest.cpp000066400000000000000000000107131217574114600263140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "LuceneThread.h" #include "TermFreqVector.h" #include "MiscUtils.h" using namespace Lucene; DECLARE_SHARED_PTR(MultiThreadTermVectorsReader) class MultiThreadTermVectorsReader : public LuceneThread { public: MultiThreadTermVectorsReader(IndexReaderPtr reader) { this->reader = reader; timeElapsed = 0; } virtual ~MultiThreadTermVectorsReader() { } LUCENE_CLASS(MultiThreadTermVectorsReader); protected: IndexReaderPtr reader; int64_t timeElapsed; static const int32_t runsToDo; public: virtual void run() { try { for (int32_t i = 0; i < runsToDo; ++i) testTermVectors(); } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } protected: void testTermVectors() { int32_t numDocs = reader->numDocs(); int64_t start = 0; for (int32_t docId = 0; docId < numDocs; ++docId) { start = MiscUtils::currentTimeMillis(); Collection vectors = reader->getTermFreqVectors(docId); timeElapsed += MiscUtils::currentTimeMillis() - start; // verify vectors result verifyVectors(vectors, docId); start = MiscUtils::currentTimeMillis(); TermFreqVectorPtr vector = reader->getTermFreqVector(docId, L"field"); timeElapsed += MiscUtils::currentTimeMillis() - start; vectors = newCollection(vector); verifyVectors(vectors, docId); } } void verifyVectors(Collection vectors, int32_t num) { StringStream temp; Collection terms; for (int32_t i = 0; i < vectors.size(); ++i) { terms = vectors[i]->getTerms(); for (int32_t z = 0; z < terms.size(); ++z) temp << terms[z]; } if (intToEnglish(num) != temp.str()) BOOST_FAIL(intToEnglish(num) << "!=" << temp.str()); } }; const int32_t MultiThreadTermVectorsReader::runsToDo = 100; class MultiThreadTermVectorsFixture : public LuceneTestFixture { public: MultiThreadTermVectorsFixture() { directory = newLucene(); numDocs = 100; numThreads = 3; IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene(); FieldablePtr fld = newLucene(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_YES); doc->add(fld); writer->addDocument(doc); } writer->close(); } virtual ~MultiThreadTermVectorsFixture() { } protected: RAMDirectoryPtr directory; int32_t numDocs; int32_t numThreads; public: void testTermPositionVectors(IndexReaderPtr reader, int32_t threadCount) { Collection mtr = Collection::newInstance(threadCount); for (int32_t i = 0; i < threadCount; ++i) { mtr[i] = newLucene(reader); mtr[i]->start(); } for (int32_t i = 0; i < threadCount; ++i) mtr[i]->join(); } }; BOOST_FIXTURE_TEST_SUITE(MultiThreadTermVectorsTest, MultiThreadTermVectorsFixture) BOOST_AUTO_TEST_CASE(testMultiThreadTermVectors) { IndexReaderPtr reader; try { reader = IndexReader::open(directory, true); for (int32_t i = 1; i <= numThreads; ++i) testTermPositionVectors(reader, i); } catch (LuceneException& e) { BOOST_FAIL(e.getError()); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/MultiValuedNumericRangeQueryTest.cpp000066400000000000000000000051341217574114600274560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Random.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "NumericField.h" #include "IndexSearcher.h" #include "TermRangeQuery.h" #include "NumericRangeQuery.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(MultiValuedNumericRangeQueryTest, LuceneTestFixture) static String pad(int32_t n) { int32_t intLength = String(L"00000000000").length(); StringStream buf; String p = L"0"; if (n < 0) { p = L"-"; n = INT_MAX + n + 1; } buf << p; String s = StringUtils::toString(n); for (int32_t i = s.length(); i <= intLength; ++i) buf << L"0"; buf << s; return buf.str(); } BOOST_AUTO_TEST_CASE(testMultiValuedNRQ) { RandomPtr rnd = newLucene(); RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t l = 0; l < 5000; ++l) { DocumentPtr doc = newLucene(); for (int32_t m = 0, c = rnd->nextInt(10); m <= c; ++m) { int32_t value = rnd->nextInt(INT_MAX); doc->add(newLucene(L"asc", pad(value), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"trie", Field::STORE_NO, true)->setIntValue(value)); } writer->addDocument(doc); } writer->close(); SearcherPtr searcher = newLucene(directory, true); for (int32_t i = 0; i < 50; ++i) { int32_t lower = rnd->nextInt(INT_MAX); int32_t upper = rnd->nextInt(INT_MAX); if (lower > upper) std::swap(lower, upper); TermRangeQueryPtr cq = newLucene(L"asc", pad(lower), pad(upper), true, true); NumericRangeQueryPtr tq = NumericRangeQuery::newIntRange(L"trie", lower, upper, true, true); TopDocsPtr trTopDocs = searcher->search(cq, 1); TopDocsPtr nrTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(trTopDocs->totalHits, nrTopDocs->totalHits); } searcher->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/NotTest.cpp000066400000000000000000000027711217574114600224410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "QueryParser.h" #include "Query.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(NotTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testNot) { RAMDirectoryPtr store = newLucene(); IndexWriterPtr writer = newLucene(store, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d1 = newLucene(); d1->add(newLucene(L"field", L"a b", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d1); writer->optimize(); writer->close(); SearcherPtr searcher = newLucene(store, true); QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene()); QueryPtr query = parser->parse(L"a NOT b"); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/NumericRangeQuery32Test.cpp000066400000000000000000000502331217574114600254470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "NumericRangeQuery.h" #include "_NumericRangeQuery.h" #include "NumericRangeFilter.h" #include "TopDocs.h" #include "MultiTermQuery.h" #include "Sort.h" #include "MatchAllDocsQuery.h" #include "Document.h" #include "BooleanQuery.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "NumericField.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopFieldDocs.h" #include "DocIdSet.h" #include "Random.h" #include "NumericUtils.h" #include "TermRangeQuery.h" #include "SortField.h" #include "QueryUtils.h" #include "FilteredTermEnum.h" #include "Term.h" using namespace Lucene; class NumericRangeQuery32Fixture : public LuceneTestFixture { public: NumericRangeQuery32Fixture() { static bool setupRequired = true; if (setupRequired) { setup(); setupRequired = false; } } virtual ~NumericRangeQuery32Fixture() { } protected: // distance of entries static const int32_t distance; // shift the starting of the values to the left, to also have negative values static const int32_t startOffset; // number of docs to generate for testing static const int32_t noDocs; static RAMDirectoryPtr directory; static IndexSearcherPtr searcher; protected: /// One-time setup to initialise static members void setup() { // set the theoretical maximum term count for 8bit (see docs for the number) BooleanQuery::setMaxClauseCount(3 * 255 * 2 + 255); directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); NumericFieldPtr field8 = newLucene(L"field8", 8, Field::STORE_YES, true); NumericFieldPtr field4 = newLucene(L"field4", 4, Field::STORE_YES, true); NumericFieldPtr field2 = newLucene(L"field2", 2, Field::STORE_YES, true); NumericFieldPtr fieldNoTrie = newLucene(L"field" + StringUtils::toString(INT_MAX), INT_MAX, Field::STORE_YES, true); NumericFieldPtr ascfield8 = newLucene(L"ascfield8", 8, Field::STORE_NO, true); NumericFieldPtr ascfield4 = newLucene(L"ascfield4", 4, Field::STORE_NO, true); NumericFieldPtr ascfield2 = newLucene(L"ascfield2", 2, Field::STORE_NO, true); DocumentPtr doc = newLucene(); // add fields, that have a distance to test general functionality doc->add(field8); doc->add(field4); doc->add(field2); doc->add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc->add(ascfield8); doc->add(ascfield4); doc->add(ascfield2); // Add a series of noDocs docs with increasing int values for (int32_t l = 0; l < noDocs; ++l) { int32_t val = distance * l + startOffset; field8->setIntValue(val); field4->setIntValue(val); field2->setIntValue(val); fieldNoTrie->setIntValue(val); val = l - (noDocs / 2); ascfield8->setIntValue(val); ascfield4->setIntValue(val); ascfield2->setIntValue(val); writer->addDocument(doc); } writer->optimize(); writer->close(); searcher = newLucene(directory, true); } public: /// test for both constant score and boolean query, the other tests only use the constant score mode void testRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; int32_t lower = (distance * 3 / 2) + startOffset; int32_t upper = lower + count * distance + (distance / 3); NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); NumericRangeFilterPtr f = NumericRangeFilter::newIntRange(field, precisionStep, lower, upper, true, true); int32_t lastTerms = 0; for (uint8_t i = 0; i < 3; ++i) { TopDocsPtr topDocs; int32_t terms; String type; q->clearTotalNumberOfTerms(); f->clearTotalNumberOfTerms(); switch (i) { case 0: type = L" (constant score filter rewrite)"; q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); terms = q->getTotalNumberOfTerms(); break; case 1: type = L" (constant score boolean rewrite)"; q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); terms = q->getTotalNumberOfTerms(); break; case 2: type = L" (filter)"; topDocs = searcher->search(newLucene(), f, noDocs, Sort::INDEXORDER()); terms = f->getTotalNumberOfTerms(); break; default: return; } BOOST_TEST_MESSAGE("Found " << terms << " distinct terms in range for field '" << field << "'" << type << "."); Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); BOOST_CHECK_EQUAL(count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); BOOST_CHECK_EQUAL(StringUtils::toString(2 * distance + startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); BOOST_CHECK_EQUAL(StringUtils::toString((1 + count) * distance + startOffset), doc->get(field)); if (i > 0) BOOST_CHECK_EQUAL(lastTerms, terms); lastTerms = terms; } } void testLeftOpenRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; int32_t upper = (count - 1) * distance + (distance / 3) + startOffset; NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(field, precisionStep, INT_MIN, upper, true, true); TopDocsPtr topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); BOOST_CHECK_EQUAL(count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); BOOST_CHECK_EQUAL(StringUtils::toString(startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); BOOST_CHECK_EQUAL(StringUtils::toString((count - 1) * distance + startOffset), doc->get(field)); } void testRightOpenRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; int32_t lower = (count - 1) * distance + (distance / 3) + startOffset; NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(field, precisionStep, lower, INT_MAX, true, true); TopDocsPtr topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); BOOST_CHECK_EQUAL(noDocs - count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); BOOST_CHECK_EQUAL(StringUtils::toString(count * distance + startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); BOOST_CHECK_EQUAL(StringUtils::toString((noDocs - 1) * distance + startOffset), doc->get(field)); } void testRandomTrieAndClassicRangeQuery(int32_t precisionStep) { RandomPtr rnd = newLucene(); String field = L"field" + StringUtils::toString(precisionStep); int32_t termCountT = 0; int32_t termCountC = 0; for (int32_t i = 0; i < 50; ++i) { int32_t lower = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; int32_t upper = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; if (lower > upper) std::swap(lower, upper); // test inclusive range NumericRangeQueryPtr tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); TermRangeQueryPtr cq = newLucene(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), true, true); TopDocsPtr tTopDocs = searcher->search(tq, 1); TopDocsPtr cTopDocs = searcher->search(cq, 1); BOOST_CHECK_EQUAL(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, false); cq = newLucene(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), false, false); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); BOOST_CHECK_EQUAL(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test left exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, true); cq = newLucene(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), false, true); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); BOOST_CHECK_EQUAL(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test right exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, false); cq = newLucene(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), true, false); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); BOOST_CHECK_EQUAL(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); } if (precisionStep == INT_MAX) BOOST_CHECK_EQUAL(termCountT, termCountC); } void testRangeSplit(int32_t precisionStep) { RandomPtr rnd = newLucene(); String field = L"ascfield" + StringUtils::toString(precisionStep); // 50 random tests for (int32_t i = 0; i < 50; ++i) { int32_t lower = (int32_t)(rnd->nextDouble() * noDocs - noDocs / 2.0); int32_t upper = (int32_t)(rnd->nextDouble() * noDocs - noDocs / 2.0); if (lower > upper) std::swap(lower, upper); // test inclusive range QueryPtr tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); TopDocsPtr tTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(upper - lower + 1, tTopDocs->totalHits); // test exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, false); tTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(std::max(upper - lower - 1, (int32_t)0), tTopDocs->totalHits); // test left exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, true); tTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(upper - lower, tTopDocs->totalHits); // test right exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, false); tTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(upper - lower, tTopDocs->totalHits); } } void testSorting(int32_t precisionStep) { RandomPtr rnd = newLucene(); String field = L"field" + StringUtils::toString(precisionStep); // 10 random tests, the index order is ascending, so using a reverse sort field should return descending documents for (int32_t i = 0; i < 10; ++i) { int32_t lower = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; int32_t upper = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; if (lower > upper) std::swap(lower, upper); QueryPtr tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); TopDocsPtr topDocs = searcher->search(tq, FilterPtr(), noDocs, newLucene(newLucene(field, SortField::INT, true))); if (topDocs->totalHits == 0) continue; Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); int32_t last = StringUtils::toInt(searcher->doc(sd[0]->doc)->get(field)); for (int32_t j = 1; j < sd.size(); ++j) { int32_t act = StringUtils::toInt(searcher->doc(sd[j]->doc)->get(field)); BOOST_CHECK(last > act); last = act; } } } void testEnumRange(int32_t lower, int32_t upper) { NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(L"field4", 4, lower, upper, true, true); FilteredTermEnumPtr termEnum = newLucene(q, searcher->getIndexReader()); do { TermPtr t = termEnum->term(); if (t) { int32_t val = NumericUtils::prefixCodedToInt(t->text()); BOOST_CHECK(val >= lower && val <= upper); } else break; } while (termEnum->next()); BOOST_CHECK(!termEnum->next()); termEnum->close(); } }; // distance of entries const int32_t NumericRangeQuery32Fixture::distance = 6666; // shift the starting of the values to the left, to also have negative values const int32_t NumericRangeQuery32Fixture::startOffset = -1 << 15; // number of docs to generate for testing const int32_t NumericRangeQuery32Fixture::noDocs = 10000; RAMDirectoryPtr NumericRangeQuery32Fixture::directory; IndexSearcherPtr NumericRangeQuery32Fixture::searcher; BOOST_FIXTURE_TEST_SUITE(NumericRangeQuery32Test, NumericRangeQuery32Fixture) BOOST_AUTO_TEST_CASE(testRange_8bit) { testRange(8); } BOOST_AUTO_TEST_CASE(testRange_4bit) { testRange(4); } BOOST_AUTO_TEST_CASE(testRange_2bit) { testRange(2); } BOOST_AUTO_TEST_CASE(testInverseRange) { NumericRangeFilterPtr f = NumericRangeFilter::newIntRange(L"field8", 8, 1000, -1000, true, true); BOOST_CHECK_EQUAL(f->getDocIdSet(searcher->getIndexReader()), DocIdSet::EMPTY_DOCIDSET()); f = NumericRangeFilter::newIntRange(L"field8", 8, INT_MAX, INT_MIN, false, false); BOOST_CHECK_EQUAL(f->getDocIdSet(searcher->getIndexReader()), DocIdSet::EMPTY_DOCIDSET()); f = NumericRangeFilter::newIntRange(L"field8", 8, INT_MIN, INT_MIN, false, false); BOOST_CHECK_EQUAL(f->getDocIdSet(searcher->getIndexReader()), DocIdSet::EMPTY_DOCIDSET()); } BOOST_AUTO_TEST_CASE(testOneMatchQuery) { NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(L"ascfield8", 8, 1000, 1000, true, true); BOOST_CHECK_EQUAL(MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE(), q->getRewriteMethod()); TopDocsPtr topDocs = searcher->search(q, noDocs); Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); BOOST_CHECK_EQUAL(1, sd.size()); } BOOST_AUTO_TEST_CASE(testLeftOpenRange_8bit) { testLeftOpenRange(8); } BOOST_AUTO_TEST_CASE(testLeftOpenRange_4bit) { testLeftOpenRange(4); } BOOST_AUTO_TEST_CASE(testLeftOpenRange_2bit) { testLeftOpenRange(2); } BOOST_AUTO_TEST_CASE(testRightOpenRange_8bit) { testRightOpenRange(8); } BOOST_AUTO_TEST_CASE(testRightOpenRange_4bit) { testRightOpenRange(4); } BOOST_AUTO_TEST_CASE(testRightOpenRange_2bit) { testRightOpenRange(2); } BOOST_AUTO_TEST_CASE(testRandomTrieAndClassicRangeQuery_8bit) { testRandomTrieAndClassicRangeQuery(8); } BOOST_AUTO_TEST_CASE(testRandomTrieAndClassicRangeQuery_4bit) { testRandomTrieAndClassicRangeQuery(4); } BOOST_AUTO_TEST_CASE(testRandomTrieAndClassicRangeQuery_2bit) { testRandomTrieAndClassicRangeQuery(2); } BOOST_AUTO_TEST_CASE(testRandomTrieAndClassicRangeQuery_NoTrie) { testRandomTrieAndClassicRangeQuery(INT_MAX); } BOOST_AUTO_TEST_CASE(testRangeSplit_8bit) { testRangeSplit(8); } BOOST_AUTO_TEST_CASE(testRangeSplit_4bit) { testRangeSplit(4); } BOOST_AUTO_TEST_CASE(testRangeSplit_2bit) { testRangeSplit(2); } BOOST_AUTO_TEST_CASE(testSorting_8bit) { testSorting(8); } BOOST_AUTO_TEST_CASE(testSorting_4bit) { testSorting(4); } BOOST_AUTO_TEST_CASE(testSorting_2bit) { testSorting(2); } BOOST_AUTO_TEST_CASE(testEqualsAndHash) { QueryUtils::checkHashEquals(NumericRangeQuery::newIntRange(L"test1", 4, 10, 20, true, true)); QueryUtils::checkHashEquals(NumericRangeQuery::newIntRange(L"test2", 4, 10, 20, false, true)); QueryUtils::checkHashEquals(NumericRangeQuery::newIntRange(L"test3", 4, 10, 20, true, false)); QueryUtils::checkHashEquals(NumericRangeQuery::newIntRange(L"test4", 4, 10, 20, false, false)); QueryUtils::checkHashEquals(NumericRangeQuery::newIntRange(L"test5", 4, 10, INT_MAX, true, true)); QueryUtils::checkHashEquals(NumericRangeQuery::newIntRange(L"test6", 4, INT_MIN, 20, true, true)); QueryUtils::checkHashEquals(NumericRangeQuery::newIntRange(L"test7", 4, INT_MIN, INT_MAX, true, true)); QueryUtils::checkEqual(NumericRangeQuery::newIntRange(L"test8", 4, 10, 20, true, true), NumericRangeQuery::newIntRange(L"test8", 4, 10, 20, true, true)); QueryUtils::checkUnequal(NumericRangeQuery::newIntRange(L"test9", 4, 10, 20, true, true), NumericRangeQuery::newIntRange(L"test9", 8, 10, 20, true, true)); QueryUtils::checkUnequal(NumericRangeQuery::newIntRange(L"test10a", 4, 10, 20, true, true), NumericRangeQuery::newIntRange(L"test10b", 4, 10, 20, true, true)); QueryUtils::checkUnequal(NumericRangeQuery::newIntRange(L"test11", 4, 10, 20, true, true), NumericRangeQuery::newIntRange(L"test11", 4, 20, 10, true, true)); QueryUtils::checkUnequal(NumericRangeQuery::newIntRange(L"test12", 4, 10, 20, true, true), NumericRangeQuery::newIntRange(L"test12", 4, 10, 20, false, true)); QueryUtils::checkUnequal(NumericRangeQuery::newIntRange(L"test13", 4, 10, 20, true, true), NumericRangeQuery::newDoubleRange(L"test13", 4, 10.0, 20.0, true, true)); // the following produces a hash collision, because Long and Integer have the same hashcode, so only test equality QueryPtr q1 = NumericRangeQuery::newIntRange(L"test14", 4, 10, 20, true, true); QueryPtr q2 = NumericRangeQuery::newLongRange(L"test14", 4, 10, 20, true, true); BOOST_CHECK(!q1->equals(q2)); BOOST_CHECK(!q2->equals(q1)); } BOOST_AUTO_TEST_CASE(testEnum) { int32_t count = 3000; int32_t lower= (distance * 3 / 2) + startOffset; int32_t upper = lower + count * distance + (distance / 3); // test enum with values testEnumRange(lower, upper); // test empty enum testEnumRange(upper, lower); // test empty enum outside of bounds lower = distance * noDocs + startOffset; upper = 2 * lower; testEnumRange(lower, upper); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/NumericRangeQuery64Test.cpp000066400000000000000000000512061217574114600254550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "NumericRangeQuery.h" #include "NumericRangeFilter.h" #include "TopDocs.h" #include "MultiTermQuery.h" #include "Sort.h" #include "MatchAllDocsQuery.h" #include "Document.h" #include "BooleanQuery.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "NumericField.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopFieldDocs.h" #include "DocIdSet.h" #include "Random.h" #include "NumericUtils.h" #include "TermRangeQuery.h" #include "SortField.h" #include "QueryUtils.h" #include "FilteredTermEnum.h" #include "Term.h" using namespace Lucene; class NumericRangeQuery64Fixture : public LuceneTestFixture { public: NumericRangeQuery64Fixture() { static bool setupRequired = true; if (setupRequired) { setup(); setupRequired = false; } } virtual ~NumericRangeQuery64Fixture() { } protected: // distance of entries static const int64_t distance; // shift the starting of the values to the left, to also have negative values static const int64_t startOffset; // number of docs to generate for testing static const int32_t noDocs; static RAMDirectoryPtr directory; static IndexSearcherPtr searcher; protected: /// One-time setup to initialise static members void setup() { // set the theoretical maximum term count for 8bit (see docs for the number) BooleanQuery::setMaxClauseCount(7 * 255 * 2 + 255); directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); NumericFieldPtr field8 = newLucene(L"field8", 8, Field::STORE_YES, true); NumericFieldPtr field6 = newLucene(L"field6", 6, Field::STORE_YES, true); NumericFieldPtr field4 = newLucene(L"field4", 4, Field::STORE_YES, true); NumericFieldPtr field2 = newLucene(L"field2", 2, Field::STORE_YES, true); NumericFieldPtr fieldNoTrie = newLucene(L"field" + StringUtils::toString(INT_MAX), INT_MAX, Field::STORE_YES, true); NumericFieldPtr ascfield8 = newLucene(L"ascfield8", 8, Field::STORE_NO, true); NumericFieldPtr ascfield6 = newLucene(L"ascfield6", 6, Field::STORE_NO, true); NumericFieldPtr ascfield4 = newLucene(L"ascfield4", 4, Field::STORE_NO, true); NumericFieldPtr ascfield2 = newLucene(L"ascfield2", 2, Field::STORE_NO, true); DocumentPtr doc = newLucene(); // add fields, that have a distance to test general functionality doc->add(field8); doc->add(field6); doc->add(field4); doc->add(field2); doc->add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc->add(ascfield8); doc->add(ascfield6); doc->add(ascfield4); doc->add(ascfield2); // Add a series of noDocs docs with increasing int values for (int32_t l = 0; l < noDocs; ++l) { int64_t val = distance * l + startOffset; field8->setLongValue(val); field6->setLongValue(val); field4->setLongValue(val); field2->setLongValue(val); fieldNoTrie->setLongValue(val); val = l - (noDocs / 2); ascfield8->setLongValue(val); ascfield6->setLongValue(val); ascfield4->setLongValue(val); ascfield2->setLongValue(val); writer->addDocument(doc); } writer->optimize(); writer->close(); searcher = newLucene(directory, true); } public: /// test for both constant score and boolean query, the other tests only use the constant score mode void testRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; int64_t lower = (distance * 3 / 2) + startOffset; int64_t upper = lower + count * distance + (distance / 3); NumericRangeQueryPtr q = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, true, true); NumericRangeFilterPtr f = NumericRangeFilter::newLongRange(field, precisionStep, lower, upper, true, true); int32_t lastTerms = 0; for (uint8_t i = 0; i < 3; ++i) { TopDocsPtr topDocs; int32_t terms; String type; q->clearTotalNumberOfTerms(); f->clearTotalNumberOfTerms(); switch (i) { case 0: type = L" (constant score filter rewrite)"; q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); terms = q->getTotalNumberOfTerms(); break; case 1: type = L" (constant score boolean rewrite)"; q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); terms = q->getTotalNumberOfTerms(); break; case 2: type = L" (filter)"; topDocs = searcher->search(newLucene(), f, noDocs, Sort::INDEXORDER()); terms = f->getTotalNumberOfTerms(); break; default: return; } BOOST_TEST_MESSAGE("Found " << terms << " distinct terms in range for field '" << field << "'" << type << "."); Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); BOOST_CHECK_EQUAL(count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); BOOST_CHECK_EQUAL(StringUtils::toString(2 * distance + startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); BOOST_CHECK_EQUAL(StringUtils::toString((1 + count) * distance + startOffset), doc->get(field)); if (i > 0) BOOST_CHECK_EQUAL(lastTerms, terms); lastTerms = terms; } } void testLeftOpenRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; // int32_t count = 10; int64_t upper = (count - 1) * distance + (distance / 3) + startOffset; NumericRangeQueryPtr q = NumericRangeQuery::newLongRange(field, precisionStep, LLONG_MIN, upper, true, true); TopDocsPtr topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); BOOST_CHECK_EQUAL(count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); BOOST_CHECK_EQUAL(StringUtils::toString(startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); BOOST_CHECK_EQUAL(StringUtils::toString((count - 1) * distance + startOffset), doc->get(field)); } void testRightOpenRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; int64_t lower = (count - 1) * distance + (distance / 3) + startOffset; NumericRangeQueryPtr q = NumericRangeQuery::newLongRange(field, precisionStep, lower, LLONG_MAX, true, true); TopDocsPtr topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); BOOST_CHECK_EQUAL(noDocs - count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); BOOST_CHECK_EQUAL(StringUtils::toString(count * distance + startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); BOOST_CHECK_EQUAL(StringUtils::toString((noDocs - 1) * distance + startOffset), doc->get(field)); } void testRandomTrieAndClassicRangeQuery(int32_t precisionStep) { RandomPtr rnd = newLucene(); String field = L"field" + StringUtils::toString(precisionStep); int32_t termCountT = 0; int32_t termCountC = 0; for (int32_t i = 0; i < 50; ++i) { int64_t lower = (int64_t)(rnd->nextDouble() * noDocs * distance) + startOffset; int64_t upper = (int64_t)(rnd->nextDouble() * noDocs * distance) + startOffset; if (lower > upper) std::swap(lower, upper); // test inclusive range NumericRangeQueryPtr tq = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, true, true); TermRangeQueryPtr cq = newLucene(field, NumericUtils::longToPrefixCoded(lower), NumericUtils::longToPrefixCoded(upper), true, true); TopDocsPtr tTopDocs = searcher->search(tq, 1); TopDocsPtr cTopDocs = searcher->search(cq, 1); BOOST_CHECK_EQUAL(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test exclusive range tq = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, false, false); cq = newLucene(field, NumericUtils::longToPrefixCoded(lower), NumericUtils::longToPrefixCoded(upper), false, false); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); BOOST_CHECK_EQUAL(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test left exclusive range tq = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, false, true); cq = newLucene(field, NumericUtils::longToPrefixCoded(lower), NumericUtils::longToPrefixCoded(upper), false, true); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); BOOST_CHECK_EQUAL(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test right exclusive range tq = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, true, false); cq = newLucene(field, NumericUtils::longToPrefixCoded(lower), NumericUtils::longToPrefixCoded(upper), true, false); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); BOOST_CHECK_EQUAL(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); } if (precisionStep == INT_MAX) BOOST_CHECK_EQUAL(termCountT, termCountC); } void testRangeSplit(int32_t precisionStep) { RandomPtr rnd = newLucene(); String field = L"ascfield" + StringUtils::toString(precisionStep); // 50 random tests for (int32_t i = 0; i < 50; ++i) { int64_t lower = (int64_t)(rnd->nextDouble() * noDocs - noDocs / 2.0); int64_t upper = (int64_t)(rnd->nextDouble() * noDocs - noDocs / 2.0); if (lower > upper) std::swap(lower, upper); // test inclusive range QueryPtr tq = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, true, true); TopDocsPtr tTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(upper - lower + 1, tTopDocs->totalHits); // test exclusive range tq = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, false, false); tTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(std::max(upper - lower - 1, (int64_t)0), tTopDocs->totalHits); // test left exclusive range tq = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, false, true); tTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(upper - lower, tTopDocs->totalHits); // test right exclusive range tq = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, true, false); tTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(upper - lower, tTopDocs->totalHits); } } void testDoubleRange(int32_t precisionStep) { String field = L"ascfield" + StringUtils::toString(precisionStep); int64_t lower = -1000; int64_t upper = +2000; QueryPtr tq = NumericRangeQuery::newDoubleRange(field, precisionStep, NumericUtils::sortableLongToDouble(lower), NumericUtils::sortableLongToDouble(upper), true, true); TopDocsPtr tTopDocs = searcher->search(tq, 1); BOOST_CHECK_EQUAL(upper - lower + 1, tTopDocs->totalHits); FilterPtr tf = NumericRangeFilter::newDoubleRange(field, precisionStep, NumericUtils::sortableLongToDouble(lower), NumericUtils::sortableLongToDouble(upper), true, true); tTopDocs = searcher->search(newLucene(), tf, 1); BOOST_CHECK_EQUAL(upper - lower + 1, tTopDocs->totalHits); } void testSorting(int32_t precisionStep) { RandomPtr rnd = newLucene(); String field = L"field" + StringUtils::toString(precisionStep); // 10 random tests, the index order is ascending, so using a reverse sort field should return descending documents for (int32_t i = 0; i < 10; ++i) { int64_t lower = (int64_t)(rnd->nextDouble() * noDocs * distance) + startOffset; int64_t upper = (int64_t)(rnd->nextDouble() * noDocs * distance) + startOffset; if (lower > upper) std::swap(lower, upper); QueryPtr tq = NumericRangeQuery::newLongRange(field, precisionStep, lower, upper, true, true); TopDocsPtr topDocs = searcher->search(tq, FilterPtr(), noDocs, newLucene(newLucene(field, SortField::LONG, true))); if (topDocs->totalHits == 0) continue; Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); int64_t last = StringUtils::toInt(searcher->doc(sd[0]->doc)->get(field)); for (int32_t j = 1; j < sd.size(); ++j) { int64_t act = StringUtils::toLong(searcher->doc(sd[j]->doc)->get(field)); BOOST_CHECK(last > act); last = act; } } } }; // distance of entries const int64_t NumericRangeQuery64Fixture::distance = 66666; // shift the starting of the values to the left, to also have negative values const int64_t NumericRangeQuery64Fixture::startOffset = (int64_t)-1 << 31; // number of docs to generate for testing const int32_t NumericRangeQuery64Fixture::noDocs = 10000; RAMDirectoryPtr NumericRangeQuery64Fixture::directory; IndexSearcherPtr NumericRangeQuery64Fixture::searcher; BOOST_FIXTURE_TEST_SUITE(NumericRangeQuery64Test, NumericRangeQuery64Fixture) BOOST_AUTO_TEST_CASE(testRange_8bit) { testRange(8); } BOOST_AUTO_TEST_CASE(testRange_6bit) { testRange(6); } BOOST_AUTO_TEST_CASE(testRange_4bit) { testRange(4); } BOOST_AUTO_TEST_CASE(testRange_2bit) { testRange(2); } BOOST_AUTO_TEST_CASE(testInverseRange) { NumericRangeFilterPtr f = NumericRangeFilter::newLongRange(L"field8", 8, 1000, -1000, true, true); BOOST_CHECK_EQUAL(f->getDocIdSet(searcher->getIndexReader()), DocIdSet::EMPTY_DOCIDSET()); f = NumericRangeFilter::newLongRange(L"field8", 8, LLONG_MAX, LLONG_MIN, false, false); BOOST_CHECK_EQUAL(f->getDocIdSet(searcher->getIndexReader()), DocIdSet::EMPTY_DOCIDSET()); f = NumericRangeFilter::newLongRange(L"field8", 8, LLONG_MIN, LLONG_MIN, false, false); BOOST_CHECK_EQUAL(f->getDocIdSet(searcher->getIndexReader()), DocIdSet::EMPTY_DOCIDSET()); } BOOST_AUTO_TEST_CASE(testOneMatchQuery) { NumericRangeQueryPtr q = NumericRangeQuery::newLongRange(L"ascfield8", 8, 1000, 1000, true, true); BOOST_CHECK_EQUAL(MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE(), q->getRewriteMethod()); TopDocsPtr topDocs = searcher->search(q, noDocs); Collection sd = topDocs->scoreDocs; BOOST_CHECK(sd); BOOST_CHECK_EQUAL(1, sd.size()); } BOOST_AUTO_TEST_CASE(testLeftOpenRange_8bit) { testLeftOpenRange(8); } BOOST_AUTO_TEST_CASE(testLeftOpenRange_6bit) { testLeftOpenRange(6); } BOOST_AUTO_TEST_CASE(testLeftOpenRange_4bit) { testLeftOpenRange(4); } BOOST_AUTO_TEST_CASE(testLeftOpenRange_2bit) { testLeftOpenRange(2); } BOOST_AUTO_TEST_CASE(testRightOpenRange_8bit) { testRightOpenRange(8); } BOOST_AUTO_TEST_CASE(testRightOpenRange_6bit) { testRightOpenRange(6); } BOOST_AUTO_TEST_CASE(testRightOpenRange_4bit) { testRightOpenRange(4); } BOOST_AUTO_TEST_CASE(testRightOpenRange_2bit) { testRightOpenRange(2); } BOOST_AUTO_TEST_CASE(testRandomTrieAndClassicRangeQuery_8bit) { testRandomTrieAndClassicRangeQuery(8); } BOOST_AUTO_TEST_CASE(testRandomTrieAndClassicRangeQuery_6bit) { testRandomTrieAndClassicRangeQuery(6); } BOOST_AUTO_TEST_CASE(testRandomTrieAndClassicRangeQuery_4bit) { testRandomTrieAndClassicRangeQuery(4); } BOOST_AUTO_TEST_CASE(testRandomTrieAndClassicRangeQuery_2bit) { testRandomTrieAndClassicRangeQuery(2); } BOOST_AUTO_TEST_CASE(testRandomTrieAndClassicRangeQuery_NoTrie) { testRandomTrieAndClassicRangeQuery(INT_MAX); } BOOST_AUTO_TEST_CASE(testRangeSplit_8bit) { testRangeSplit(8); } BOOST_AUTO_TEST_CASE(testRangeSplit_6bit) { testRangeSplit(6); } BOOST_AUTO_TEST_CASE(testRangeSplit_4bit) { testRangeSplit(4); } BOOST_AUTO_TEST_CASE(testRangeSplit_2bit) { testRangeSplit(2); } BOOST_AUTO_TEST_CASE(testDoubleRange_8bit) { testDoubleRange(8); } BOOST_AUTO_TEST_CASE(testDoubleRange_6bit) { testDoubleRange(6); } BOOST_AUTO_TEST_CASE(testDoubleRange_4bit) { testDoubleRange(4); } BOOST_AUTO_TEST_CASE(testDoubleRange_2bit) { testDoubleRange(2); } BOOST_AUTO_TEST_CASE(testSorting_8bit) { testSorting(8); } BOOST_AUTO_TEST_CASE(testSorting_6bit) { testSorting(6); } BOOST_AUTO_TEST_CASE(testSorting_4bit) { testSorting(4); } BOOST_AUTO_TEST_CASE(testSorting_2bit) { testSorting(2); } BOOST_AUTO_TEST_CASE(testEqualsAndHash) { QueryUtils::checkHashEquals(NumericRangeQuery::newLongRange(L"test1", 4, 10, 20, true, true)); QueryUtils::checkHashEquals(NumericRangeQuery::newLongRange(L"test2", 4, 10, 20, false, true)); QueryUtils::checkHashEquals(NumericRangeQuery::newLongRange(L"test3", 4, 10, 20, true, false)); QueryUtils::checkHashEquals(NumericRangeQuery::newLongRange(L"test4", 4, 10, 20, false, false)); QueryUtils::checkHashEquals(NumericRangeQuery::newLongRange(L"test5", 4, 10, LLONG_MAX, true, true)); QueryUtils::checkHashEquals(NumericRangeQuery::newLongRange(L"test6", 4, LLONG_MIN, 20, true, true)); QueryUtils::checkHashEquals(NumericRangeQuery::newLongRange(L"test7", 4, LLONG_MIN, LLONG_MAX, true, true)); QueryUtils::checkEqual(NumericRangeQuery::newLongRange(L"test8", 4, 10, 20, true, true), NumericRangeQuery::newLongRange(L"test8", 4, 10, 20, true, true)); QueryUtils::checkUnequal(NumericRangeQuery::newLongRange(L"test9", 4, 10, 20, true, true), NumericRangeQuery::newLongRange(L"test9", 8, 10, 20, true, true)); QueryUtils::checkUnequal(NumericRangeQuery::newLongRange(L"test10a", 4, 10, 20, true, true), NumericRangeQuery::newLongRange(L"test10b", 4, 10, 20, true, true)); QueryUtils::checkUnequal(NumericRangeQuery::newLongRange(L"test11", 4, 10, 20, true, true), NumericRangeQuery::newLongRange(L"test11", 4, 20, 10, true, true)); QueryUtils::checkUnequal(NumericRangeQuery::newLongRange(L"test12", 4, 10, 20, true, true), NumericRangeQuery::newLongRange(L"test12", 4, 10, 20, false, true)); QueryUtils::checkUnequal(NumericRangeQuery::newLongRange(L"test13", 4, 10, 20, true, true), NumericRangeQuery::newDoubleRange(L"test13", 4, 10.0, 20.0, true, true)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/ParallelMultiSearcherTest.cpp000066400000000000000000000363501217574114600261250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "MockRAMDirectory.h" #include "Document.h" #include "Field.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "QueryParser.h" #include "Query.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "ParallelMultiSearcher.h" #include "Term.h" #include "IndexReader.h" #include "TermQuery.h" #include "SetBasedFieldSelector.h" #include "KeywordAnalyzer.h" #include "Sort.h" #include "DefaultSimilarity.h" #include "TopFieldDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(ParallelMultiSearcherTest, LuceneTestFixture) static MultiSearcherPtr getMultiSearcherInstance(Collection searchers) { return newLucene(searchers); } static DocumentPtr createDocument(const String& contents1, const String& contents2) { DocumentPtr document = newLucene(); document->add(newLucene(L"contents", contents1, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); document->add(newLucene(L"other", L"other contents", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); if (!contents2.empty()) document->add(newLucene(L"contents", contents2, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); return document; } static void initIndex(DirectoryPtr directory, int32_t numDocs, bool create, const String& contents2) { IndexWriterPtr indexWriter = newLucene(directory, newLucene(), create, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < numDocs; ++i) indexWriter->addDocument(createDocument(L"doc" + StringUtils::toString(i), contents2)); indexWriter->close(); } BOOST_AUTO_TEST_CASE(testEmptyIndex) { // creating two directories for indices DirectoryPtr indexStoreA = newLucene(); DirectoryPtr indexStoreB = newLucene(); // creating a document to store DocumentPtr lDoc = newLucene(); lDoc->add(newLucene(L"fulltext", L"Once upon a time.....", Field::STORE_YES, Field::INDEX_ANALYZED)); lDoc->add(newLucene(L"id", L"doc1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); lDoc->add(newLucene(L"handle", L"1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // creating a document to store DocumentPtr lDoc2 = newLucene(); lDoc2->add(newLucene(L"fulltext", L"in a galaxy far far away.....", Field::STORE_YES, Field::INDEX_ANALYZED)); lDoc2->add(newLucene(L"id", L"doc2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); lDoc2->add(newLucene(L"handle", L"1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // creating a document to store DocumentPtr lDoc3 = newLucene(); lDoc3->add(newLucene(L"fulltext", L"a bizarre bug manifested itself....", Field::STORE_YES, Field::INDEX_ANALYZED)); lDoc3->add(newLucene(L"id", L"doc3", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); lDoc3->add(newLucene(L"handle", L"1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // creating an index writer for the first index IndexWriterPtr writerA = newLucene(indexStoreA, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); // creating an index writer for the second index, but writing nothing IndexWriterPtr writerB = newLucene(indexStoreB, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA->addDocument(lDoc); writerA->addDocument(lDoc2); writerA->addDocument(lDoc3); writerA->optimize(); writerA->close(); // closing the second index writerB->close(); // creating the query QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, L"fulltext", newLucene(LuceneVersion::LUCENE_CURRENT)); QueryPtr query = parser->parse(L"handle:1"); // building the searchables Collection searchers = newCollection(newLucene(indexStoreB, true), newLucene(indexStoreA, true)); // creating the multiSearcher SearcherPtr mSearcher = getMultiSearcherInstance(searchers); // performing the search Collection hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); // iterating over the hit documents for (int32_t i = 0; i < hits.size(); ++i) mSearcher->doc(hits[i]->doc); mSearcher->close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = newLucene(indexStoreB, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writerB->addDocument(lDoc); writerB->optimize(); writerB->close(); // building the searchables Collection searchers2 = newCollection(newLucene(indexStoreB, true), newLucene(indexStoreA, true)); // creating the mulitSearcher MultiSearcherPtr mSearcher2 = getMultiSearcherInstance(searchers2); // performing the same search Collection hits2 = mSearcher2->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(4, hits2.size()); // iterating over the hit documents for (int32_t i = 0; i < hits2.size(); ++i) mSearcher2->doc(hits2[i]->doc); // test the subSearcher() method QueryPtr subSearcherQuery = parser->parse(L"id:doc1"); hits2 = mSearcher2->search(subSearcherQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits2.size()); BOOST_CHECK_EQUAL(0, mSearcher2->subSearcher(hits2[0]->doc)); // hit from searchers2[0] BOOST_CHECK_EQUAL(1, mSearcher2->subSearcher(hits2[1]->doc)); // hit from searchers2[1] subSearcherQuery = parser->parse(L"id:doc2"); hits2 = mSearcher2->search(subSearcherQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits2.size()); BOOST_CHECK_EQUAL(1, mSearcher2->subSearcher(hits2[0]->doc)); // hit from searchers2[1] mSearcher2->close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place TermPtr term = newLucene(L"id", L"doc1"); IndexReaderPtr readerB = IndexReader::open(indexStoreB, false); readerB->deleteDocuments(term); readerB->close(); // optimizing the index with the writer writerB = newLucene(indexStoreB, newLucene(LuceneVersion::LUCENE_CURRENT), false, IndexWriter::MaxFieldLengthLIMITED); writerB->optimize(); writerB->close(); // building the searchables Collection searchers3 = newCollection(newLucene(indexStoreB, true), newLucene(indexStoreA, true)); // creating the mulitSearcher SearcherPtr mSearcher3 = getMultiSearcherInstance(searchers3); // performing the same search Collection hits3 = mSearcher3->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits3.size()); // iterating over the hit documents for (int32_t i = 0; i < hits3.size(); ++i) mSearcher3->doc(hits3[i]->doc); mSearcher3->close(); indexStoreA->close(); indexStoreB->close(); } BOOST_AUTO_TEST_CASE(testFieldSelector) { RAMDirectoryPtr ramDirectory1 = newLucene(); RAMDirectoryPtr ramDirectory2 = newLucene(); QueryPtr query = newLucene(newLucene(L"contents", L"doc0")); // Now put the documents in a different index initIndex(ramDirectory1, 10, true, L""); // documents with a single token "doc0", "doc1", etc... initIndex(ramDirectory2, 10, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcherPtr indexSearcher1 = newLucene(ramDirectory1, true); IndexSearcherPtr indexSearcher2 = newLucene(ramDirectory2, true); MultiSearcherPtr searcher = getMultiSearcherInstance(newCollection(indexSearcher1, indexSearcher2)); BOOST_CHECK(searcher); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK(hits); BOOST_CHECK_EQUAL(hits.size(), 2); DocumentPtr document = searcher->doc(hits[0]->doc); BOOST_CHECK(document); BOOST_CHECK_EQUAL(document->getFields().size(), 2); // Should be one document from each directory they both have two fields, contents and other HashSet ftl = HashSet::newInstance(); ftl.add(L"other"); SetBasedFieldSelectorPtr fs = newLucene(ftl, HashSet::newInstance()); document = searcher->doc(hits[0]->doc, fs); BOOST_CHECK(document); BOOST_CHECK_EQUAL(document->getFields().size(), 1); String value = document->get(L"contents"); BOOST_CHECK(value.empty()); value = document->get(L"other"); BOOST_CHECK(!value.empty()); ftl.clear(); ftl.add(L"contents"); fs = newLucene(ftl, HashSet::newInstance()); document = searcher->doc(hits[1]->doc, fs); value = document->get(L"contents"); BOOST_CHECK(!value.empty()); value = document->get(L"other"); BOOST_CHECK(value.empty()); } BOOST_AUTO_TEST_CASE(testNormalization) { int32_t numDocs = 10; QueryPtr query = newLucene(newLucene(L"contents", L"doc0")); RAMDirectoryPtr ramDirectory1 = newLucene(); // First put the documents in the same index initIndex(ramDirectory1, numDocs, true, L""); // documents with a single token "doc0", "doc1", etc... initIndex(ramDirectory1, numDocs, false, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcherPtr indexSearcher1 = newLucene(ramDirectory1, true); indexSearcher1->setDefaultFieldSortScoring(true, true); Collection hits = indexSearcher1->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); // Store the scores for use later Collection scores = newCollection(hits[0]->score, hits[1]->score); BOOST_CHECK(scores[0] > scores[1]); indexSearcher1->close(); ramDirectory1->close(); hits.clear(); ramDirectory1 = newLucene(); RAMDirectoryPtr ramDirectory2 = newLucene(); // Now put the documents in a different index initIndex(ramDirectory1, numDocs, true, L""); // documents with a single token "doc0", "doc1", etc... initIndex(ramDirectory2, numDocs, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... indexSearcher1 = newLucene(ramDirectory1, true); indexSearcher1->setDefaultFieldSortScoring(true, true); IndexSearcherPtr indexSearcher2 = newLucene(ramDirectory2, true); indexSearcher2->setDefaultFieldSortScoring(true, true); SearcherPtr searcher = getMultiSearcherInstance(newCollection(indexSearcher1, indexSearcher2)); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); // The scores should be the same (within reason) BOOST_CHECK_CLOSE_FRACTION(scores[0], hits[0]->score, 1e-6); // This will a document from ramDirectory1 BOOST_CHECK_CLOSE_FRACTION(scores[1], hits[1]->score, 1e-6); // This will a document from ramDirectory2 // Adding a Sort.RELEVANCE object should not change anything hits = searcher->search(query, FilterPtr(), 1000, Sort::RELEVANCE())->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); BOOST_CHECK_CLOSE_FRACTION(scores[0], hits[0]->score, 1e-6); // This will a document from ramDirectory1 BOOST_CHECK_CLOSE_FRACTION(scores[1], hits[1]->score, 1e-6); // This will a document from ramDirectory2 searcher->close(); ramDirectory1->close(); ramDirectory2->close(); } namespace TestCustomSimilarity { class CustomSimilarity : public DefaultSimilarity { public: virtual ~CustomSimilarity() { } public: virtual double idf(int32_t docFreq, int32_t numDocs) { return 100.0; } virtual double coord(int32_t overlap, int32_t maxOverlap) { return 1.0; } virtual double lengthNorm(const String& fieldName, int32_t numTokens) { return 1.0; } virtual double queryNorm(double sumOfSquaredWeights) { return 1.0; } virtual double sloppyFreq(int32_t distance) { return 1.0; } virtual double tf(double freq) { return 1.0; } }; } BOOST_AUTO_TEST_CASE(testCustomSimilarity) { RAMDirectoryPtr dir = newLucene(); initIndex(dir, 10, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcherPtr searcher = newLucene(dir, true); MultiSearcherPtr msearcher = getMultiSearcherInstance(newCollection(searcher)); SimilarityPtr customSimilarity = newLucene(); searcher->setSimilarity(customSimilarity); msearcher->setSimilarity(customSimilarity); QueryPtr query = newLucene(newLucene(L"contents", L"doc0")); // Get a score from IndexSearcher TopDocsPtr topDocs = searcher->search(query, FilterPtr(), 1); double score1 = topDocs->maxScore; // Get the score from MultiSearcher topDocs = msearcher->search(query, FilterPtr(), 1); double scoreN = topDocs->maxScore; // The scores from the IndexSearcher and Multisearcher should be the same if the same similarity is used. BOOST_CHECK_CLOSE_FRACTION(score1, scoreN, 1e-6); } BOOST_AUTO_TEST_CASE(testDocFreq) { RAMDirectoryPtr dir1 = newLucene(); RAMDirectoryPtr dir2 = newLucene(); initIndex(dir1, 10, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... initIndex(dir2, 5, true, L"x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcherPtr searcher1 = newLucene(dir1, true); IndexSearcherPtr searcher2 = newLucene(dir2, true); MultiSearcherPtr multiSearcher = getMultiSearcherInstance(newCollection(searcher1, searcher2)); BOOST_CHECK_EQUAL(15, multiSearcher->docFreq(newLucene(L"contents", L"x"))); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/PhrasePrefixQueryTest.cpp000066400000000000000000000060551217574114600253260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "MultiPhraseQuery.h" #include "Term.h" #include "TermEnum.h" #include "IndexReader.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PhrasePrefixQueryTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testPhrasePrefix) { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc1 = newLucene(); DocumentPtr doc2 = newLucene(); DocumentPtr doc3 = newLucene(); DocumentPtr doc4 = newLucene(); DocumentPtr doc5 = newLucene(); doc1->add(newLucene(L"body", L"blueberry pie", Field::STORE_YES, Field::INDEX_ANALYZED)); doc2->add(newLucene(L"body", L"blueberry strudel", Field::STORE_YES, Field::INDEX_ANALYZED)); doc3->add(newLucene(L"body", L"blueberry pizza", Field::STORE_YES, Field::INDEX_ANALYZED)); doc4->add(newLucene(L"body", L"blueberry chewing gum", Field::STORE_YES, Field::INDEX_ANALYZED)); doc5->add(newLucene(L"body", L"piccadilly circus", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc1); writer->addDocument(doc2); writer->addDocument(doc3); writer->addDocument(doc4); writer->addDocument(doc5); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(indexStore, true); MultiPhraseQueryPtr query1 = newLucene(); MultiPhraseQueryPtr query2 = newLucene(); query1->add(newLucene(L"body", L"blueberry")); query2->add(newLucene(L"body", L"strawberry")); Collection termsWithPrefix = Collection::newInstance(); IndexReaderPtr ir = IndexReader::open(indexStore, true); // this TermEnum gives "piccadilly", "pie" and "pizza". String prefix = L"pi"; TermEnumPtr te = ir->terms(newLucene(L"body", prefix + L"*")); do { if (boost::starts_with(te->term()->text(), prefix)) termsWithPrefix.add(te->term()); } while (te->next()); query1->add(termsWithPrefix); query2->add(termsWithPrefix); Collection result = searcher->search(query1, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, result.size()); result = searcher->search(query2, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/PhraseQueryTest.cpp000066400000000000000000000522231217574114600241460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "Analyzer.h" #include "WhitespaceTokenizer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "PhraseQuery.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "QueryUtils.h" #include "Term.h" #include "StopAnalyzer.h" #include "WhitespaceAnalyzer.h" #include "TermQuery.h" #include "BooleanQuery.h" #include "QueryParser.h" using namespace Lucene; class PhraseQueryAnalyzer : public Analyzer { public: virtual ~PhraseQueryAnalyzer() { } public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(reader); } virtual int32_t getPositionIncrementGap(const String& fieldName) { return 100; } }; class PhraseQueryFixture : public LuceneTestFixture { public: PhraseQueryFixture() { directory = newLucene(); AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"one two three four five", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"repeated", L"this is a repeated field - first part", Field::STORE_YES, Field::INDEX_ANALYZED)); FieldablePtr repeatedField = newLucene(L"repeated", L"second part of a repeated field", Field::STORE_YES, Field::INDEX_ANALYZED); doc->add(repeatedField); doc->add(newLucene(L"palindrome", L"one two three two one", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"nonexist", L"phrase exist notexist exist found", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"nonexist", L"phrase exist notexist exist found", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->optimize(); writer->close(); searcher = newLucene(directory, true); query = newLucene(); } virtual ~PhraseQueryFixture() { searcher->close(); directory->close(); } public: // threshold for comparing floats static const double SCORE_COMP_THRESH; protected: IndexSearcherPtr searcher; PhraseQueryPtr query; RAMDirectoryPtr directory; }; const double PhraseQueryFixture::SCORE_COMP_THRESH = 1e-6f; BOOST_FIXTURE_TEST_SUITE(PhraseQueryTest, PhraseQueryFixture) BOOST_AUTO_TEST_CASE(testNotCloseEnough) { query->setSlop(2); query->add(newLucene(L"field", L"one")); query->add(newLucene(L"field", L"five")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(query, searcher); } BOOST_AUTO_TEST_CASE(testBarelyCloseEnough) { query->setSlop(3); query->add(newLucene(L"field", L"one")); query->add(newLucene(L"field", L"five")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); } /// Ensures slop of 0 works for exact matches, but not reversed BOOST_AUTO_TEST_CASE(testExact) { // slop is zero by default query->add(newLucene(L"field", L"four")); query->add(newLucene(L"field", L"five")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); query = newLucene(); query->add(newLucene(L"field", L"two")); query->add(newLucene(L"field", L"one")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(query, searcher); } BOOST_AUTO_TEST_CASE(testSlop1) { // Ensures slop of 1 works with terms in order. query->setSlop(1); query->add(newLucene(L"field", L"one")); query->add(newLucene(L"field", L"two")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); // Ensures slop of 1 does not work for phrases out of order; must be at least 2 query = newLucene(); query->setSlop(1); query->add(newLucene(L"field", L"two")); query->add(newLucene(L"field", L"one")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(query, searcher); } /// As long as slop is at least 2, terms can be reversed BOOST_AUTO_TEST_CASE(testOrderDoesntMatter) { // must be at least two for reverse order match query->setSlop(2); query->add(newLucene(L"field", L"two")); query->add(newLucene(L"field", L"one")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); query = newLucene(); query->setSlop(2); query->add(newLucene(L"field", L"three")); query->add(newLucene(L"field", L"one")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(query, searcher); } /// Slop is the total number of positional moves allowed to line up a phrase BOOST_AUTO_TEST_CASE(testMulipleTerms) { query->setSlop(2); query->add(newLucene(L"field", L"one")); query->add(newLucene(L"field", L"three")); query->add(newLucene(L"field", L"five")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); query = newLucene(); query->setSlop(5); // it takes six moves to match this phrase query->add(newLucene(L"field", L"five")); query->add(newLucene(L"field", L"three")); query->add(newLucene(L"field", L"one")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(query, searcher); query->setSlop(6); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); } BOOST_AUTO_TEST_CASE(testPhraseQueryWithStopAnalyzer) { RAMDirectoryPtr directory = newLucene(); StopAnalyzerPtr stopAnalyzer = newLucene(LuceneVersion::LUCENE_24); IndexWriterPtr writer = newLucene(directory, stopAnalyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"the stop words are here", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexSearcherPtr searcher = newLucene(directory, true); // valid exact phrase query PhraseQueryPtr query = newLucene(); query->add(newLucene(L"field", L"stop")); query->add(newLucene(L"field", L"words")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); // StopAnalyzer as of 2.4 does not leave "holes", so this matches. query = newLucene(); query->add(newLucene(L"field", L"words")); query->add(newLucene(L"field", L"here")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); searcher->close(); } BOOST_AUTO_TEST_CASE(testPhraseQueryInConjunctionScorer) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"source", L"marketing info", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"contents", L"foobar", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"source", L"marketing info", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(directory, true); PhraseQueryPtr phraseQuery = newLucene(); phraseQuery->add(newLucene(L"source", L"marketing")); phraseQuery->add(newLucene(L"source", L"info")); Collection hits = searcher->search(phraseQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); QueryUtils::check(phraseQuery, searcher); TermQueryPtr termQuery = newLucene(newLucene(L"contents", L"foobar")); BooleanQueryPtr booleanQuery = newLucene(); booleanQuery->add(termQuery, BooleanClause::MUST); booleanQuery->add(phraseQuery, BooleanClause::MUST); hits = searcher->search(booleanQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(termQuery, searcher); searcher->close(); writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); doc = newLucene(); doc->add(newLucene(L"contents", L"map entry woo", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"contents", L"woo map entry", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"contents", L"map foobarword entry woo", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->optimize(); writer->close(); searcher = newLucene(directory, true); termQuery = newLucene(newLucene(L"contents", L"woo")); phraseQuery = newLucene(); phraseQuery->add(newLucene(L"contents", L"map")); phraseQuery->add(newLucene(L"contents", L"entry")); hits = searcher->search(termQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); hits = searcher->search(phraseQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); booleanQuery = newLucene(); booleanQuery->add(termQuery, BooleanClause::MUST); booleanQuery->add(phraseQuery, BooleanClause::MUST); hits = searcher->search(booleanQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); booleanQuery = newLucene(); booleanQuery->add(phraseQuery, BooleanClause::MUST); booleanQuery->add(termQuery, BooleanClause::MUST); hits = searcher->search(booleanQuery, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); QueryUtils::check(booleanQuery, searcher); searcher->close(); directory->close(); } BOOST_AUTO_TEST_CASE(testSlopScoring) { DirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"foo firstname lastname foo", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); DocumentPtr doc2 = newLucene(); doc2->add(newLucene(L"field", L"foo firstname xxx lastname foo", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc2); DocumentPtr doc3 = newLucene(); doc3->add(newLucene(L"field", L"foo firstname xxx yyy lastname foo", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc3); writer->optimize(); writer->close(); SearcherPtr searcher = newLucene(directory, true); PhraseQueryPtr query = newLucene(); query->add(newLucene(L"field", L"firstname")); query->add(newLucene(L"field", L"lastname")); query->setSlop(INT_MAX); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); // Make sure that those matches where the terms appear closer to each other get a higher score BOOST_CHECK_CLOSE_FRACTION(0.71, hits[0]->score, 0.01); BOOST_CHECK_EQUAL(0, hits[0]->doc); BOOST_CHECK_CLOSE_FRACTION(0.44, hits[1]->score, 0.01); BOOST_CHECK_EQUAL(1, hits[1]->doc); BOOST_CHECK_CLOSE_FRACTION(0.31, hits[2]->score, 0.01); BOOST_CHECK_EQUAL(2, hits[2]->doc); QueryUtils::check(query, searcher); } BOOST_AUTO_TEST_CASE(testToString) { StopAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", analyzer); qp->setEnablePositionIncrements(true); PhraseQueryPtr q = boost::dynamic_pointer_cast(qp->parse(L"\"this hi this is a test is\"")); BOOST_CHECK_EQUAL(L"field:\"? hi ? ? ? test\"", q->toString()); q->add(newLucene(L"field", L"hello"), 1); BOOST_CHECK_EQUAL(L"field:\"? hi|hello ? ? ? test\"", q->toString()); } BOOST_AUTO_TEST_CASE(testWrappedPhrase) { query->add(newLucene(L"repeated", L"first")); query->add(newLucene(L"repeated", L"part")); query->add(newLucene(L"repeated", L"second")); query->add(newLucene(L"repeated", L"part")); query->setSlop(100); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); query->setSlop(99); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(query, searcher); } /// work on two docs like this: "phrase exist notexist exist found" BOOST_AUTO_TEST_CASE(testNonExistingPhrase) { // phrase without repetitions that exists in 2 docs query->add(newLucene(L"nonexist", L"phrase")); query->add(newLucene(L"nonexist", L"notexist")); query->add(newLucene(L"nonexist", L"found")); query->setSlop(2); // would be found this way Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); QueryUtils::check(query, searcher); // phrase with repetitions that exists in 2 docs query = newLucene(); query->add(newLucene(L"nonexist", L"phrase")); query->add(newLucene(L"nonexist", L"exist")); query->add(newLucene(L"nonexist", L"exist")); query->setSlop(1); // would be found hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); QueryUtils::check(query, searcher); // phrase I with repetitions that does not exist in any doc query = newLucene(); query->add(newLucene(L"nonexist", L"phrase")); query->add(newLucene(L"nonexist", L"notexist")); query->add(newLucene(L"nonexist", L"phrase")); query->setSlop(1000); // would not be found no matter how high the slop is hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(query, searcher); // phrase II with repetitions that does not exist in any doc query = newLucene(); query->add(newLucene(L"nonexist", L"phrase")); query->add(newLucene(L"nonexist", L"exist")); query->add(newLucene(L"nonexist", L"exist")); query->add(newLucene(L"nonexist", L"exist")); query->setSlop(1000); // would not be found no matter how high the slop is hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); QueryUtils::check(query, searcher); } /// Working on a 2 fields like this: /// Field(L"field", L"one two three four five") /// Field(L"palindrome", L"one two three two one") /// Phrase of size 2 occurring twice, once in order and once in reverse, because doc is a palindrome, is counted twice. /// Also, in this case order in query does not matter. Also, when an exact match is found, both sloppy scorer and /// exact scorer scores the same. BOOST_AUTO_TEST_CASE(testPalindrome2) { // search on non palindrome, find phrase with no slop, using exact phrase scorer query->setSlop(0); // to use exact phrase scorer query->add(newLucene(L"field", L"two")); query->add(newLucene(L"field", L"three")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); double score0 = hits[0]->score; QueryUtils::check(query, searcher); // search on non palindrome, find phrase with slop 2, though no slop required here. query->setSlop(2); // to use sloppy scorer hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); double score1 = hits[0]->score; BOOST_CHECK_CLOSE_FRACTION(score0, score1, SCORE_COMP_THRESH); QueryUtils::check(query, searcher); // search ordered in palindrome, find it twice query = newLucene(); query->setSlop(2); // must be at least two for both ordered and reversed to match query->add(newLucene(L"palindrome", L"two")); query->add(newLucene(L"palindrome", L"three")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); // search reversed in palindrome, find it twice query = newLucene(); query->setSlop(2); // must be at least two for both ordered and reversed to match query->add(newLucene(L"palindrome", L"three")); query->add(newLucene(L"palindrome", L"two")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); } /// Working on a 2 fields like this: /// Field(L"field", L"one two three four five") /// Field(L"palindrome", L"one two three two one") /// Phrase of size 3 occurring twice, once in order and once in reverse, because doc is a palindrome, is counted twice. /// Also, in this case order in query does not matter. Also, when an exact match is found, both sloppy scorer and exact /// scorer scores the same. BOOST_AUTO_TEST_CASE(testPalindrome3) { // search on non palindrome, find phrase with no slop, using exact phrase scorer query->setSlop(0); // to use exact phrase scorer query->add(newLucene(L"field", L"one")); query->add(newLucene(L"field", L"two")); query->add(newLucene(L"field", L"three")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); double score0 = hits[0]->score; QueryUtils::check(query, searcher); // search on non palindrome, find phrase with slop 3, though no slop required here. query->setSlop(4); // to use sloppy scorer hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); double score1 = hits[0]->score; BOOST_CHECK_CLOSE_FRACTION(score0, score1, SCORE_COMP_THRESH); QueryUtils::check(query, searcher); // search ordered in palindrome, find it twice query = newLucene(); query->setSlop(4); // must be at least four for both ordered and reversed to match query->add(newLucene(L"palindrome", L"one")); query->add(newLucene(L"palindrome", L"two")); query->add(newLucene(L"palindrome", L"three")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); // search reversed in palindrome, find it twice query = newLucene(); query->setSlop(4); // must be at least four for both ordered and reversed to match query->add(newLucene(L"palindrome", L"three")); query->add(newLucene(L"palindrome", L"two")); query->add(newLucene(L"palindrome", L"one")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); QueryUtils::check(query, searcher); } BOOST_AUTO_TEST_CASE(testEmptyPhraseQuery) { BooleanQueryPtr q2 = newLucene(); q2->add(newLucene(), BooleanClause::MUST); BOOST_CHECK_EQUAL(q2->toString(), L"+\"?\""); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/PositionIncrementTest.cpp000066400000000000000000000333031217574114600253450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Analyzer.h" #include "TokenStream.h" #include "PositionIncrementAttribute.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "IndexReader.h" #include "TermPositions.h" #include "PhraseQuery.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "MultiPhraseQuery.h" #include "QueryParser.h" #include "WhitespaceAnalyzer.h" #include "StopFilter.h" #include "CharArraySet.h" #include "LowerCaseTokenizer.h" #include "PayloadAttribute.h" #include "Payload.h" #include "StringReader.h" #include "SpanTermQuery.h" #include "SpanQuery.h" #include "SpanNearQuery.h" #include "Spans.h" #include "PayloadSpanUtil.h" #include "Term.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PositionIncrementTest, LuceneTestFixture) namespace TestSetPosition { class SetPositionTokenStream : public TokenStream { public: SetPositionTokenStream() { TOKENS = newCollection(L"1", L"2", L"3", L"4", L"5"); INCREMENTS = newCollection(0, 2, 1, 0, 1); i = 0; posIncrAtt = addAttribute(); termAtt = addAttribute(); offsetAtt = addAttribute(); } virtual ~SetPositionTokenStream() { } protected: Collection TOKENS; Collection INCREMENTS; int32_t i; PositionIncrementAttributePtr posIncrAtt; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (i == TOKENS.size()) return false; clearAttributes(); termAtt->setTermBuffer(TOKENS[i]); offsetAtt->setOffset(i, i); posIncrAtt->setPositionIncrement(INCREMENTS[i]); ++i; return true; } }; class SetPositionAnalyzer : public Analyzer { public: virtual ~SetPositionAnalyzer() { } public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(); } }; class StopWhitespaceAnalyzer : public Analyzer { public: StopWhitespaceAnalyzer(bool enablePositionIncrements) { this->enablePositionIncrements = enablePositionIncrements; this->a = newLucene(); } virtual ~StopWhitespaceAnalyzer() { } public: bool enablePositionIncrements; WhitespaceAnalyzerPtr a; public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr ts = a->tokenStream(fieldName, reader); return newLucene(enablePositionIncrements, ts, newLucene(newCollection(L"stop"), true)); } }; } BOOST_AUTO_TEST_CASE(testSetPosition) { AnalyzerPtr analyzer = newLucene(); DirectoryPtr store = newLucene(); IndexWriterPtr writer = newLucene(store, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d = newLucene(); d->add(newLucene(L"field", L"bogus", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d); writer->optimize(); writer->close(); IndexSearcherPtr searcher = newLucene(store, true); TermPositionsPtr pos = searcher->getIndexReader()->termPositions(newLucene(L"field", L"1")); pos->next(); // first token should be at position 0 BOOST_CHECK_EQUAL(0, pos->nextPosition()); pos = searcher->getIndexReader()->termPositions(newLucene(L"field", L"2")); pos->next(); // second token should be at position 2 BOOST_CHECK_EQUAL(2, pos->nextPosition()); PhraseQueryPtr q = newLucene(); q->add(newLucene(L"field", L"1")); q->add(newLucene(L"field", L"2")); Collection hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // same as previous, just specify positions explicitely. q = newLucene(); q->add(newLucene(L"field", L"1"), 0); q->add(newLucene(L"field", L"2"), 1); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // specifying correct positions should find the phrase. q = newLucene(); q->add(newLucene(L"field", L"1"), 0); q->add(newLucene(L"field", L"2"), 2); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); q = newLucene(); q->add(newLucene(L"field", L"2")); q->add(newLucene(L"field", L"3")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); q = newLucene(); q->add(newLucene(L"field", L"3")); q->add(newLucene(L"field", L"4")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // phrase query would find it when correct positions are specified. q = newLucene(); q->add(newLucene(L"field", L"3"), 0); q->add(newLucene(L"field", L"4"), 0); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = newLucene(); q->add(newLucene(L"field", L"3"), 0); q->add(newLucene(L"field", L"9"), 0); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // multi-phrase query should succeed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQueryPtr mq = newLucene(); mq->add(newCollection(newLucene(L"field", L"3"), newLucene(L"field", L"9")), 0); hits = searcher->search(mq, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); q = newLucene(); q->add(newLucene(L"field", L"2")); q->add(newLucene(L"field", L"4")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); q = newLucene(); q->add(newLucene(L"field", L"3")); q->add(newLucene(L"field", L"5")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); q = newLucene(); q->add(newLucene(L"field", L"4")); q->add(newLucene(L"field", L"5")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); q = newLucene(); q->add(newLucene(L"field", L"2")); q->add(newLucene(L"field", L"5")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // should not find "1 2" because there is a gap of 1 in the index QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene(false)); q = boost::dynamic_pointer_cast(qp->parse(L"\"1 2\"")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // omitted stop word cannot help because stop filter swallows the increments. q = boost::dynamic_pointer_cast(qp->parse(L"\"1 stop 2\"")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // query parser alone won't help, because stop filter swallows the increments. qp->setEnablePositionIncrements(true); q = boost::dynamic_pointer_cast(qp->parse(L"\"1 stop 2\"")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // stop filter alone won't help, because query parser swallows the increments. qp->setEnablePositionIncrements(false); q = boost::dynamic_pointer_cast(qp->parse(L"\"1 stop 2\"")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // when both qp and stopFilter propagate increments, we should find the doc. qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene(true)); qp->setEnablePositionIncrements(true); q = boost::dynamic_pointer_cast(qp->parse(L"\"1 stop 2\"")); hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); } namespace TestPayloadsPos0 { class TestPayloadFilter : public TokenFilter { public: TestPayloadFilter(TokenStreamPtr input, const String& fieldName) : TokenFilter(input) { this->fieldName = fieldName; this->pos = 0; this->i = 0; this->posIncrAttr = input->addAttribute(); this->payloadAttr = input->addAttribute(); this->termAttr = input->addAttribute(); } virtual ~TestPayloadFilter() { } public: String fieldName; int32_t pos; int32_t i; PositionIncrementAttributePtr posIncrAttr; PayloadAttributePtr payloadAttr; TermAttributePtr termAttr; public: virtual bool incrementToken() { if (input->incrementToken()) { String payloadData = L"pos: " + StringUtils::toString(pos); ByteArray data = ByteArray::newInstance(payloadData.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)data.get(), payloadData.c_str(), payloadData.length()); payloadAttr->setPayload(newLucene(data)); int32_t posIncr = i % 2 == 1 ? 1 : 0; posIncrAttr->setPositionIncrement(posIncr); pos += posIncr; ++i; return true; } else return false; } }; class TestPayloadAnalyzer : public Analyzer { public: virtual ~TestPayloadAnalyzer() { } public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(reader); return newLucene(result, fieldName); } }; } BOOST_AUTO_TEST_CASE(testPayloadsPos0) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", newLucene(L"a a b c d e a f g h i j a b k k"))); writer->addDocument(doc); IndexReaderPtr r = writer->getReader(); TermPositionsPtr tp = r->termPositions(newLucene(L"content", L"a")); int32_t count = 0; BOOST_CHECK(tp->next()); // "a" occurs 4 times BOOST_CHECK_EQUAL(4, tp->freq()); int32_t expected = 0; BOOST_CHECK_EQUAL(expected, tp->nextPosition()); BOOST_CHECK_EQUAL(1, tp->nextPosition()); BOOST_CHECK_EQUAL(3, tp->nextPosition()); BOOST_CHECK_EQUAL(6, tp->nextPosition()); // only one doc has "a" BOOST_CHECK(!tp->next()); IndexSearcherPtr is = newLucene(r); SpanTermQueryPtr stq1 = newLucene(newLucene(L"content", L"a")); SpanTermQueryPtr stq2 = newLucene(newLucene(L"content", L"k")); Collection sqs = newCollection(stq1, stq2); SpanNearQueryPtr snq = newLucene(sqs, 30, false); count = 0; bool sawZero = false; SpansPtr pspans = snq->getSpans(is->getIndexReader()); while (pspans->next()) { Collection payloads = pspans->getPayload(); if (pspans->start() == 0) sawZero = true; count += payloads.size(); } BOOST_CHECK_EQUAL(5, count); BOOST_CHECK(sawZero); SpansPtr spans = snq->getSpans(is->getIndexReader()); count = 0; sawZero = false; while (spans->next()) { ++count; if (spans->start() == 0) sawZero = true; } BOOST_CHECK_EQUAL(4, count); BOOST_CHECK(sawZero); sawZero = false; PayloadSpanUtilPtr psu = newLucene(is->getIndexReader()); Collection pls = psu->getPayloadsForQuery(snq); count = pls.size(); for (Collection::iterator it = pls.begin(); it != pls.end(); ++it) { String s = String((wchar_t*)it->get(), it->size() / sizeof(wchar_t)); if (s == L"pos: 0") sawZero = true; } BOOST_CHECK_EQUAL(5, count); BOOST_CHECK(sawZero); writer->close(); is->getIndexReader()->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/PositiveScoresOnlyCollectorTest.cpp000066400000000000000000000060521217574114600273670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Scorer.h" #include "TopScoreDocCollector.h" #include "PositiveScoresOnlyCollector.h" #include "TopDocs.h" #include "ScoreDoc.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PositiveScoresOnlyCollectorTest, LuceneTestFixture) namespace TestNegativeScores { class SimpleScorer : public Scorer { public: SimpleScorer(Collection scores) : Scorer(SimilarityPtr()) { this->scores = scores; idx = -1; } virtual ~SimpleScorer() { } public: int32_t idx; Collection scores; public: virtual double score() { return idx == scores.size() ? std::numeric_limits::quiet_NaN() : scores[idx]; } virtual int32_t docID() { return idx; } virtual int32_t nextDoc() { return ++idx != scores.size() ? idx : DocIdSetIterator::NO_MORE_DOCS; } virtual int32_t advance(int32_t target) { idx = target; return idx < scores.size() ? idx : DocIdSetIterator::NO_MORE_DOCS; } }; } BOOST_AUTO_TEST_CASE(testNegativeScores) { // The scores must have positive as well as negative values Collection scores = Collection::newInstance(); scores.add(0.7767749); scores.add(-1.7839992); scores.add(8.9925785); scores.add(7.9608946); scores.add(-0.07948637); scores.add(2.6356435); scores.add(7.4950366); scores.add(7.1490803); scores.add(-8.108544); scores.add(4.961808f); scores.add(2.2423935); scores.add(-7.285586); scores.add(4.6699767); // The Top*Collectors previously filtered out documents with <= scores. This behaviour has changed. // This test checks that if PositiveOnlyScoresFilter wraps one of these collectors, documents with // <= 0 scores are indeed filtered. int32_t numPositiveScores = 0; for (int32_t i = 0; i < scores.size(); ++i) { if (scores[i] > 0) ++numPositiveScores; } ScorerPtr s = newLucene(scores); TopDocsCollectorPtr tdc = TopScoreDocCollector::create(scores.size(), true); CollectorPtr c = newLucene(tdc); c->setScorer(s); while (s->nextDoc() != DocIdSetIterator::NO_MORE_DOCS) c->collect(0); TopDocsPtr td = tdc->topDocs(); Collection sd = td->scoreDocs; BOOST_CHECK_EQUAL(numPositiveScores, td->totalHits); for (int32_t i = 0; i < sd.size(); ++i) BOOST_CHECK(sd[i]->score > 0); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/PrefixFilterTest.cpp000066400000000000000000000100161217574114600242730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "PrefixFilter.h" #include "Term.h" #include "ConstantScoreQuery.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PrefixFilterTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testPrefixFilter) { RAMDirectoryPtr directory = newLucene(); Collection categories = newCollection( L"/Computers/Linux", L"/Computers/Mac/One", L"/Computers/Mac/Two", L"/Computers/Windows" ); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < categories.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"category", categories[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); // PrefixFilter combined with ConstantScoreQuery PrefixFilterPtr filter = newLucene(newLucene(L"category", L"/Computers")); QueryPtr query = newLucene(filter); IndexSearcherPtr searcher = newLucene(directory, true); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(4, hits.size()); // test middle of values filter = newLucene(newLucene(L"category", L"/Computers/Mac")); query = newLucene(filter); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); // test start of values filter = newLucene(newLucene(L"category", L"/Computers/Linux")); query = newLucene(filter); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); // test end of values filter = newLucene(newLucene(L"category", L"/Computers/Windows")); query = newLucene(filter); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); // test non-existent filter = newLucene(newLucene(L"category", L"/Computers/ObsoleteOS")); query = newLucene(filter); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // test non-existent, before values filter = newLucene(newLucene(L"category", L"/Computers/AAA")); query = newLucene(filter); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // test non-existent, after values filter = newLucene(newLucene(L"category", L"/Computers/ZZZ")); query = newLucene(filter); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); // test zero length prefix filter = newLucene(newLucene(L"category", L"")); query = newLucene(filter); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(4, hits.size()); // test non existent field filter = newLucene(newLucene(L"nonexistentfield", L"/Computers")); query = newLucene(filter); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/PrefixInBooleanQueryTest.cpp000066400000000000000000000074031217574114600257500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "PrefixQuery.h" #include "Term.h" #include "TopDocs.h" #include "TermQuery.h" #include "BooleanQuery.h" using namespace Lucene; class PrefixInBooleanQueryFixture : public LuceneTestFixture { public: PrefixInBooleanQueryFixture() { directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 5137; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } for (int32_t i = 5138; i < 11377; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); } virtual ~PrefixInBooleanQueryFixture() { } protected: RAMDirectoryPtr directory; public: static const String FIELD; }; const String PrefixInBooleanQueryFixture::FIELD = L"name"; BOOST_FIXTURE_TEST_SUITE(PrefixInBooleanQueryTest, PrefixInBooleanQueryFixture) BOOST_AUTO_TEST_CASE(testPrefixQuery) { IndexSearcherPtr indexSearcher = newLucene(directory, true); QueryPtr query = newLucene(newLucene(FIELD, L"tang")); BOOST_CHECK_EQUAL(2, indexSearcher->search(query, FilterPtr(), 1000)->totalHits); } BOOST_AUTO_TEST_CASE(testTermQuery) { IndexSearcherPtr indexSearcher = newLucene(directory, true); QueryPtr query = newLucene(newLucene(FIELD, L"tangfulin")); BOOST_CHECK_EQUAL(2, indexSearcher->search(query, FilterPtr(), 1000)->totalHits); } BOOST_AUTO_TEST_CASE(testTermBooleanQuery) { IndexSearcherPtr indexSearcher = newLucene(directory, true); BooleanQueryPtr query = newLucene(); query->add(newLucene(newLucene(FIELD, L"tangfulin")), BooleanClause::SHOULD); query->add(newLucene(newLucene(FIELD, L"notexistnames")), BooleanClause::SHOULD); BOOST_CHECK_EQUAL(2, indexSearcher->search(query, FilterPtr(), 1000)->totalHits); } BOOST_AUTO_TEST_CASE(testPrefixBooleanQuery) { IndexSearcherPtr indexSearcher = newLucene(directory, true); BooleanQueryPtr query = newLucene(); query->add(newLucene(newLucene(FIELD, L"tang")), BooleanClause::SHOULD); query->add(newLucene(newLucene(FIELD, L"notexistnames")), BooleanClause::SHOULD); BOOST_CHECK_EQUAL(2, indexSearcher->search(query, FilterPtr(), 1000)->totalHits); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/PrefixQueryTest.cpp000066400000000000000000000035731217574114600241650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "PrefixQuery.h" #include "Term.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PrefixQueryTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testPrefixQuery) { RAMDirectoryPtr directory = newLucene(); Collection categories = newCollection( L"/Computers/Linux", L"/Computers/Mac", L"/Computers/Windows" ); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < categories.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"category", categories[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); PrefixQueryPtr query = newLucene(newLucene(L"category", L"/Computers")); IndexSearcherPtr searcher = newLucene(directory, true); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); query = newLucene(newLucene(L"category", L"/Computers/Mac")); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/QueryTermVectorTest.cpp000066400000000000000000000037471217574114600250250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "QueryTermVector.h" #include "WhitespaceAnalyzer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(QueryTermVectorTest, LuceneTestFixture) static void checkGold(Collection terms, Collection gold, Collection freq, Collection goldFreqs) { for (int32_t i = 0; i < terms.size(); ++i) { BOOST_CHECK_EQUAL(terms[i], gold[i]); BOOST_CHECK_EQUAL(freq[i], goldFreqs[i]); } } BOOST_AUTO_TEST_CASE(testConstructor) { Collection queryTerm = newCollection(L"foo", L"bar", L"foo", L"again", L"foo", L"bar", L"go", L"go", L"go"); // Items are sorted lexicographically Collection gold = newCollection(L"again", L"bar", L"foo", L"go"); Collection goldFreqs = newCollection(1, 2, 3, 3); QueryTermVectorPtr result = newLucene(queryTerm); BOOST_CHECK(result); Collection terms = result->getTerms(); BOOST_CHECK_EQUAL(terms.size(), 4); Collection freq = result->getTermFrequencies(); BOOST_CHECK_EQUAL(freq.size(), 4); checkGold(terms, gold, freq, goldFreqs); result = newLucene(Collection()); BOOST_CHECK_EQUAL(result->getTerms().size(), 0); result = newLucene(L"foo bar foo again foo bar go go go", newLucene()); BOOST_CHECK(result); terms = result->getTerms(); BOOST_CHECK_EQUAL(terms.size(), 4); freq = result->getTermFrequencies(); BOOST_CHECK_EQUAL(freq.size(), 4); checkGold(terms, gold, freq, goldFreqs); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/QueryUtils.cpp000066400000000000000000000416271217574114600231720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "QueryUtils.h" #include "Query.h" #include "CheckHits.h" #include "IndexSearcher.h" #include "IndexReader.h" #include "MultiReader.h" #include "MultiSearcher.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "MatchAllDocsQuery.h" #include "Scorer.h" #include "Weight.h" #include "DocIdSetIterator.h" #include "ReaderUtil.h" #include "MiscUtils.h" namespace Lucene { QueryUtils::~QueryUtils() { } void QueryUtils::check(QueryPtr q) { checkHashEquals(q); } class WhackyQuery : public Query { public: virtual ~WhackyQuery() { } public: virtual String toString(const String& field) { return L"My Whacky Query"; } virtual bool equals(LuceneObjectPtr other) { if (!MiscUtils::typeOf(other)) return false; return Query::equals(other); } }; void QueryUtils::checkHashEquals(QueryPtr q) { QueryPtr q2 = boost::dynamic_pointer_cast(q->clone()); checkEqual(q, q2); QueryPtr q3 = boost::dynamic_pointer_cast(q->clone()); q3->setBoost(7.21792348); checkUnequal(q, q3); // test that a class check is done so that no exception is thrown in the implementation of equals() QueryPtr whacky = newLucene(); whacky->setBoost(q->getBoost()); checkUnequal(q, whacky); } void QueryUtils::checkEqual(QueryPtr q1, QueryPtr q2) { BOOST_CHECK(q1->equals(q2)); BOOST_CHECK_EQUAL(q1->hashCode(), q2->hashCode()); } void QueryUtils::checkUnequal(QueryPtr q1, QueryPtr q2) { BOOST_CHECK(!q1->equals(q2)); BOOST_CHECK(!q2->equals(q1)); // possible this test can fail on a hash collision... if that happens, please change // test to use a different example. BOOST_CHECK_NE(q1->hashCode(), q2->hashCode()); } void QueryUtils::checkExplanations(QueryPtr q, SearcherPtr s) { CheckHits::checkExplanations(q, L"", s, true); } void QueryUtils::check(QueryPtr q1, SearcherPtr s) { check(q1, s, true); } void QueryUtils::check(QueryPtr q1, SearcherPtr s, bool wrap) { check(q1); if (s) { IndexSearcherPtr is = boost::dynamic_pointer_cast(s); if (is) { checkFirstSkipTo(q1, is); checkSkipTo(q1, is); if (wrap) { check(q1, wrapUnderlyingReader(is, -1), false); check(q1, wrapUnderlyingReader(is, 0), false); check(q1, wrapUnderlyingReader(is, +1), false); } } if (wrap) { check(q1, wrapSearcher(s, -1), false); check(q1, wrapSearcher(s, 0), false); check(q1, wrapSearcher(s, +1), false); } checkExplanations(q1, s); QueryPtr q2 = boost::dynamic_pointer_cast(q1->clone()); checkEqual(s->rewrite(q1), s->rewrite(q2)); } } IndexSearcherPtr QueryUtils::wrapUnderlyingReader(IndexSearcherPtr s, int32_t edge) { IndexReaderPtr r = s->getIndexReader(); // we can't put deleted docs before the nested reader, because it will throw off the docIds Collection readers = newCollection( edge < 0 ? r : IndexReader::open(makeEmptyIndex(0), true), IndexReader::open(makeEmptyIndex(0), true), newLucene(newCollection( IndexReader::open(makeEmptyIndex(edge < 0 ? 4 : 0), true), IndexReader::open(makeEmptyIndex(0), true), 0 == edge ? r : IndexReader::open(makeEmptyIndex(0), true) )), IndexReader::open(makeEmptyIndex(0 < edge ? 0 : 7), true), IndexReader::open(makeEmptyIndex(0), true), newLucene(newCollection( IndexReader::open(makeEmptyIndex(0 < edge ? 0 : 5), true), IndexReader::open(makeEmptyIndex(0), true), 0 < edge ? r : IndexReader::open(makeEmptyIndex(0), true) )) ); IndexSearcherPtr out = newLucene(newLucene(readers)); out->setSimilarity(s->getSimilarity()); return out; } MultiSearcherPtr QueryUtils::wrapSearcher(SearcherPtr s, int32_t edge) { // we can't put deleted docs before the nested reader, because it will through off the docIds Collection searchers = newCollection( edge < 0 ? s : newLucene(makeEmptyIndex(0), true), newLucene(newCollection( newLucene(makeEmptyIndex(edge < 0 ? 65 : 0), true), newLucene(makeEmptyIndex(0), true), 0 == edge ? s : newLucene(makeEmptyIndex(0), true) )), newLucene(makeEmptyIndex(0 < edge ? 0 : 3), true), newLucene(makeEmptyIndex(0), true), newLucene(newCollection( newLucene(makeEmptyIndex(0 < edge ? 0 : 5), true), newLucene(makeEmptyIndex(0), true), 0 < edge ? s : newLucene(makeEmptyIndex(0), true) )) ); MultiSearcherPtr out = newLucene(searchers); out->setSimilarity(s->getSimilarity()); return out; } RAMDirectoryPtr QueryUtils::makeEmptyIndex(int32_t numDeletedDocs) { RAMDirectoryPtr d = newLucene(); IndexWriterPtr w = newLucene(d, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < numDeletedDocs; ++i) w->addDocument(newLucene()); w->commit(); w->deleteDocuments(newLucene()); w->commit(); if (0 < numDeletedDocs) BOOST_CHECK(w->hasDeletions()); BOOST_CHECK_EQUAL(numDeletedDocs, w->maxDoc()); BOOST_CHECK_EQUAL(0, w->numDocs()); w->close(); IndexReaderPtr r = IndexReader::open(d, true); BOOST_CHECK_EQUAL(numDeletedDocs, r->numDeletedDocs()); r->close(); return d; } namespace CheckSkipTo { class SkipCollector : public Collector { public: SkipCollector(QueryPtr q, IndexSearcherPtr s, Collection lastDoc, Collection order, Collection opidx, Collection lastReader) { this->q = q; this->s = s; this->lastDoc = lastDoc; this->order = order; this->opidx = opidx; this->lastReader = lastReader; } virtual ~SkipCollector() { } protected: QueryPtr q; IndexSearcherPtr s; Collection lastDoc; Collection order; Collection opidx; ScorerPtr sc; IndexReaderPtr reader; ScorerPtr scorer; Collection lastReader; public: virtual void setScorer(ScorerPtr scorer) { this->sc = scorer; } virtual void collect(int32_t doc) { double score = sc->score(); lastDoc[0] = doc; if (!scorer) { WeightPtr w = q->weight(s); scorer = w->scorer(reader, true, false); } int32_t skip_op = 0; int32_t next_op = 1; double maxDiff = 1e-5; int32_t op = order[(opidx[0]++) % order.size()]; bool more = op == skip_op ? (scorer->advance(scorer->docID() + 1) != DocIdSetIterator::NO_MORE_DOCS) : (scorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS); int32_t scorerDoc = scorer->docID(); double scorerScore = scorer->score(); double scorerScore2 = scorer->score(); double scoreDiff = std::abs(score - scorerScore); double scorerDiff = std::abs(scorerScore2 - scorerScore); if (!more || doc != scorerDoc || scoreDiff > maxDiff || scorerDiff > maxDiff) { StringStream sbord; for (int32_t i = 0; i < order.size(); ++i) sbord << (order[i] == skip_op ? L" skip()" : L" next()"); StringStream message; message << L"ERROR matching docs:\n\t" << (doc != scorerDoc ? L"--> " : L"") << L"doc=" << doc << L", scorerDoc=" << scorerDoc << L"\n\t" << (!more ? L"--> " : L"") << L"tscorer.more=" << more << L"\n\t" << (scoreDiff > maxDiff ? L"--> " : L"") << L"scorerScore=" << scorerScore << L" scoreDiff=" << scoreDiff << L" maxDiff=" << maxDiff << L"\n\t" << (scorerDiff > maxDiff ? L"--> " : L"") << L"scorerScore2=" << scorerScore2 << L" scorerDiff=" << scorerDiff << L"\n\thitCollector.doc=" << doc << L" score=" << score << L"\n\t Scorer=" << scorer << L"\n\t Query=" << q->toString() + L" " << L"\n\t Searcher=" + s->toString() << L"\n\t Order=" << sbord.str() << L"\n\t Op=" << (op == skip_op ? L" skip()" : L" next()"); BOOST_FAIL(StringUtils::toUTF8(message.str())); } } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { // confirm that skipping beyond the last doc, on the previous reader, hits NO_MORE_DOCS if (lastReader[0]) { IndexReaderPtr previousReader = lastReader[0]; WeightPtr w = q->weight(newLucene(previousReader)); ScorerPtr scorer = w->scorer(previousReader, true, false); if (scorer) { bool more = (scorer->advance(lastDoc[0] + 1) != DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK(!more); } } this->reader = reader; this->scorer.reset(); lastDoc[0] = -1; } virtual bool acceptsDocsOutOfOrder() { return true; } }; } void QueryUtils::checkSkipTo(QueryPtr q, IndexSearcherPtr s) { if (q->weight(s)->scoresDocsOutOfOrder()) return; // in this case order of skipTo() might differ from that of next(). int32_t skip_op = 0; int32_t next_op = 1; Collection< Collection > orders = newCollection< Collection >( newCollection(next_op), newCollection(skip_op), newCollection(skip_op, next_op), newCollection(next_op, skip_op), newCollection(skip_op, skip_op, next_op, next_op), newCollection(next_op, next_op, skip_op, skip_op), newCollection(skip_op, skip_op, skip_op, next_op, next_op) ); Collection lastReader = Collection::newInstance(1); for (int32_t k = 0; k < orders.size(); ++k) { Collection order = orders[k]; Collection opidx = newCollection(0); Collection lastDoc = newCollection(-1); s->search(q, newLucene(q, s, lastDoc, order, opidx, lastReader)); if (lastReader[0]) { // confirm that skipping beyond the last doc, on the previous reader, hits NO_MORE_DOCS IndexReaderPtr previousReader = lastReader[0]; WeightPtr w = q->weight(newLucene(previousReader)); ScorerPtr scorer = w->scorer(previousReader, true, false); if (scorer) { bool more = (scorer->advance(lastDoc[0] + 1) != DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK(!more); } } } } namespace CheckFirstSkipTo { class SkipCollector : public Collector { public: SkipCollector(QueryPtr q, IndexSearcherPtr s, Collection lastDoc, Collection lastReader) { this->q = q; this->s = s; this->lastDoc = lastDoc; this->lastReader = lastReader; } virtual ~SkipCollector() { } protected: QueryPtr q; IndexSearcherPtr s; Collection lastDoc; Collection lastReader; ScorerPtr scorer; IndexReaderPtr reader; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { double score = scorer->score(); lastDoc[0] = doc; for (int32_t i = lastDoc[0] + 1; i <= doc; ++i) { WeightPtr w = q->weight(s); ScorerPtr scorer = w->scorer(reader, true, false); BOOST_CHECK(scorer->advance(i) != DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK_EQUAL(doc, scorer->docID()); double skipToScore = scorer->score(); BOOST_CHECK_CLOSE_FRACTION(skipToScore, scorer->score(), 1e-5); BOOST_CHECK_CLOSE_FRACTION(score, skipToScore, 1e-5); } lastDoc[0] = doc; } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { // confirm that skipping beyond the last doc, on the previous reader, hits NO_MORE_DOCS if (lastReader[0]) { IndexReaderPtr previousReader = lastReader[0]; WeightPtr w = q->weight(newLucene(previousReader)); ScorerPtr scorer = w->scorer(previousReader, true, false); if (scorer) { bool more = (scorer->advance(lastDoc[0] + 1) != DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK(!more); } } lastReader[0] = reader; this->reader = reader; lastDoc[0] = -1; } virtual bool acceptsDocsOutOfOrder() { return false; } }; } void QueryUtils::checkFirstSkipTo(QueryPtr q, IndexSearcherPtr s) { Collection lastDoc = newCollection(-1); Collection lastReader = Collection::newInstance(1); s->search(q, newLucene(q, s, lastDoc, lastReader)); if (lastReader[0]) { // confirm that skipping beyond the last doc, on the previous reader, hits NO_MORE_DOCS IndexReaderPtr previousReader = lastReader[0]; WeightPtr w = q->weight(newLucene(previousReader)); ScorerPtr scorer = w->scorer(previousReader, true, false); if (scorer) { bool more = (scorer->advance(lastDoc[0] + 1) != DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK(!more); } } } } LucenePlusPlus-rel_3.0.4/src/test/search/QueryWrapperFilterTest.cpp000066400000000000000000000064541217574114600255170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "TermQuery.h" #include "Term.h" #include "QueryWrapperFilter.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "MatchAllDocsQuery.h" #include "BooleanQuery.h" #include "FuzzyQuery.h" #include "CachingWrapperFilter.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(QueryWrapperFilterTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testBasic) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"value", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); TermQueryPtr termQuery = newLucene(newLucene(L"field", L"value")); // should not throw exception with primitive query QueryWrapperFilterPtr qwf = newLucene(termQuery); IndexSearcherPtr searcher = newLucene(dir, true); TopDocsPtr hits = searcher->search(newLucene(), qwf, 10); BOOST_CHECK_EQUAL(1, hits->totalHits); hits = searcher->search(newLucene(), newLucene(qwf), 10); BOOST_CHECK_EQUAL(1, hits->totalHits); // should not throw exception with complex primitive query BooleanQueryPtr booleanQuery = newLucene(); booleanQuery->add(termQuery, BooleanClause::MUST); booleanQuery->add(newLucene(newLucene(L"field", L"missing")), BooleanClause::MUST_NOT); qwf = newLucene(termQuery); hits = searcher->search(newLucene(), qwf, 10); BOOST_CHECK_EQUAL(1, hits->totalHits); hits = searcher->search(newLucene(), newLucene(qwf), 10); BOOST_CHECK_EQUAL(1, hits->totalHits); // should not throw exception with non primitive Query (doesn't implement Query#createWeight) qwf = newLucene(newLucene(newLucene(L"field", L"valu"))); hits = searcher->search(newLucene(), qwf, 10); BOOST_CHECK_EQUAL(1, hits->totalHits); hits = searcher->search(newLucene(), newLucene(qwf), 10); BOOST_CHECK_EQUAL(1, hits->totalHits); // test a query with no hits termQuery = newLucene(newLucene(L"field", L"not_exist")); qwf = newLucene(termQuery); hits = searcher->search(newLucene(), qwf, 10); BOOST_CHECK_EQUAL(0, hits->totalHits); hits = searcher->search(newLucene(), newLucene(qwf), 10); BOOST_CHECK_EQUAL(0, hits->totalHits); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/ScoreCachingWrappingScorerTest.cpp000066400000000000000000000100441217574114600271070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Scorer.h" #include "TopScoreDocCollector.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "ScoreCachingWrappingScorer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(ScoreCachingWrappingScorerTest, LuceneTestFixture) namespace TestGetScores { class SimpleScorer : public Scorer { public: SimpleScorer(Collection scores) : Scorer(SimilarityPtr()) { this->scores = scores; idx = 0; doc = -1; } virtual ~SimpleScorer() { } public: int32_t idx; int32_t doc; Collection scores; public: virtual double score() { // Advance idx on purpose, so that consecutive calls to score will get different results. // This is to emulate computation of a score. If ScoreCachingWrappingScorer is used, this // should not be called more than once per document. return idx == scores.size() ? std::numeric_limits::quiet_NaN() : scores[idx++]; } virtual int32_t docID() { return doc; } virtual int32_t nextDoc() { return ++doc < scores.size() ? doc : DocIdSetIterator::NO_MORE_DOCS; } virtual int32_t advance(int32_t target) { doc = target; return doc < scores.size() ? doc : DocIdSetIterator::NO_MORE_DOCS; } }; DECLARE_SHARED_PTR(ScoreCachingCollector) class ScoreCachingCollector : public Collector { public: ScoreCachingCollector(int32_t numToCollect) { idx = 0; mscores = Collection::newInstance(numToCollect); } virtual ~ScoreCachingCollector() { } public: int32_t idx; ScorerPtr scorer; Collection mscores; public: virtual void collect(int32_t doc) { // just a sanity check to avoid IOOB. if (idx == mscores.size()) return; // just call score() a couple of times and record the score. mscores[idx] = scorer->score(); mscores[idx] = scorer->score(); mscores[idx] = scorer->score(); ++idx; } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { } virtual void setScorer(ScorerPtr scorer) { this->scorer = newLucene(scorer); } virtual bool acceptsDocsOutOfOrder() { return true; } }; } BOOST_AUTO_TEST_CASE(testGetScores) { Collection scores = Collection::newInstance(); scores.add(0.7767749); scores.add(1.7839992); scores.add(8.9925785); scores.add(7.9608946); scores.add(0.07948637); scores.add(2.6356435); scores.add(7.4950366); scores.add(7.1490803); scores.add(8.108544); scores.add(4.961808); scores.add(2.2423935); scores.add(7.285586); scores.add(4.6699767); ScorerPtr s = newLucene(scores); TestGetScores::ScoreCachingCollectorPtr scc = newLucene(scores.size()); scc->setScorer(s); // We need to iterate on the scorer so that its doc() advances. int32_t doc; while ((doc = s->nextDoc()) != DocIdSetIterator::NO_MORE_DOCS) scc->collect(doc); for (int32_t i = 0; i < scores.size(); ++i) BOOST_CHECK_EQUAL(scores[i], scc->mscores[i]); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/ScorerPerfTest.cpp000066400000000000000000000140041217574114600237430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Random.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "IndexSearcher.h" #include "BitSet.h" #include "BooleanQuery.h" #include "Collector.h" #include "ConstantScoreQuery.h" #include "Filter.h" #include "DocIdBitSet.h" using namespace Lucene; DECLARE_SHARED_PTR(CountingHitCollector) DECLARE_SHARED_PTR(MatchingHitCollector) class CountingHitCollector : public Collector { public: CountingHitCollector() { count = 0; sum = 0; docBase = 0; } virtual ~CountingHitCollector() { } public: int32_t count; int32_t sum; int32_t docBase; public: virtual void setScorer(ScorerPtr scorer) { } virtual void collect(int32_t doc) { ++count; sum += docBase + doc; // use it to avoid any possibility of being optimized away } int32_t getCount() { return count; } int32_t getSum() { return sum; } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { this->docBase = docBase; } virtual bool acceptsDocsOutOfOrder() { return true; } }; class MatchingHitCollector : public CountingHitCollector { public: MatchingHitCollector(BitSetPtr answer) { this->answer = answer; this->pos = -1; } virtual ~MatchingHitCollector() { } public: BitSetPtr answer; int32_t pos; public: virtual void collect(int32_t doc) { pos = answer->nextSetBit(pos + 1); if (pos != doc + docBase) boost::throw_exception(RuntimeException(L"Expected doc " + StringUtils::toString(pos) + L" but got " + StringUtils::toString(doc + docBase))); CountingHitCollector::collect(doc); } }; class AddClauseFilter : public Filter { public: AddClauseFilter(BitSetPtr rnd) { this->rnd = rnd; } virtual ~AddClauseFilter() { } protected: BitSetPtr rnd; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { return newLucene(rnd); } }; class ScorerPerfFixture : public LuceneTestFixture { public: ScorerPerfFixture() { r = newLucene(); createDummySearcher(); } virtual ~ScorerPerfFixture() { s->close(); } public: RandomPtr r; Collection sets; Collection terms; IndexSearcherPtr s; public: void createDummySearcher() { // Create a dummy index with nothing in it. RAMDirectoryPtr rd = newLucene(); IndexWriterPtr iw = newLucene(rd, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); iw->addDocument(newLucene()); iw->close(); s = newLucene(rd, true); } BitSetPtr randBitSet(int32_t sz, int32_t numBitsToSet) { BitSetPtr set = newLucene(sz); for (int32_t i = 0; i < numBitsToSet; ++i) set->set(r->nextInt(sz)); return set; } Collection randBitSets(int32_t numSets, int32_t setSize) { Collection sets = Collection::newInstance(numSets); for (int32_t i = 0; i < sets.size(); ++i) sets[i] = randBitSet(setSize, r->nextInt(setSize)); return sets; } void doConjunctions(int32_t iter, int32_t maxClauses) { for (int32_t i = 0; i < iter; ++i) { int32_t numClauses = r->nextInt(maxClauses - 1) + 2; // min 2 clauses BooleanQueryPtr bq = newLucene(); BitSetPtr result; for (int32_t j = 0; j < numClauses; ++j) result = addClause(bq, result); CountingHitCollectorPtr hc = newLucene(result); s->search(bq, hc); BOOST_CHECK_EQUAL(result->cardinality(), hc->getCount()); } } void doNestedConjunctions(int32_t iter, int32_t maxOuterClauses, int32_t maxClauses) { for (int32_t i = 0; i < iter; ++i) { int32_t oClauses = r->nextInt(maxOuterClauses - 1) + 2; BooleanQueryPtr oq = newLucene(); BitSetPtr result; for (int32_t o = 0; o < oClauses; ++o) { int32_t numClauses = r->nextInt(maxClauses - 1) + 2; // min 2 clauses BooleanQueryPtr bq = newLucene(); for (int32_t j = 0; j < numClauses; ++j) result = addClause(bq, result); oq->add(bq, BooleanClause::MUST); } CountingHitCollectorPtr hc = newLucene(result); s->search(oq, hc); BOOST_CHECK_EQUAL(result->cardinality(), hc->getCount()); } } BitSetPtr addClause(BooleanQueryPtr bq, BitSetPtr result) { BitSetPtr rnd = sets[r->nextInt(sets.size())]; QueryPtr q = newLucene(newLucene(rnd)); bq->add(q, BooleanClause::MUST); if (!result) result = boost::dynamic_pointer_cast(rnd->clone()); else result->_and(rnd); return result; } }; BOOST_FIXTURE_TEST_SUITE(ScorerPerfTest, ScorerPerfFixture) BOOST_AUTO_TEST_CASE(testConjunctions) { // test many small sets... the bugs will be found on boundary conditions sets = randBitSets(1000, 10); doConjunctions(10000, 5); doNestedConjunctions(10000, 3, 3); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/SearchForDuplicatesTest.cpp000066400000000000000000000073341217574114600255730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "SimpleAnalyzer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "QueryParser.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "IndexSearcher.h" #include "Query.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SearchForDuplicatesTest, LuceneTestFixture) static const String PRIORITY_FIELD = L"priority"; static const String ID_FIELD = L"id"; static const String HIGH_PRIORITY = L"high"; static const String MED_PRIORITY = L"medium"; static const String LOW_PRIORITY = L"low"; static void printHits(StringStream& out, Collection hits, SearcherPtr searcher) { out << hits.size() << L" total results\n"; for (int32_t i = 0; i < hits.size(); ++i) { if (i < 10 || (i > 94 && i < 105)) { DocumentPtr doc = searcher->doc(hits[i]->doc); out << i << L" " << doc->get(ID_FIELD) << L"\n"; } } } static void checkHits(Collection hits, int32_t expectedCount, SearcherPtr searcher) { BOOST_CHECK_EQUAL(expectedCount, hits.size()); for (int32_t i = 0; i < hits.size(); ++i) { if (i < 10 || (i > 94 && i < 105)) { DocumentPtr doc = searcher->doc(hits[i]->doc); BOOST_CHECK_EQUAL(StringUtils::toString(i), doc->get(ID_FIELD)); } } } static void doTest(StringStream& out, bool useCompoundFile) { DirectoryPtr directory = newLucene(); AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(useCompoundFile); int32_t MAX_DOCS = 225; for (int32_t j = 0; j < MAX_DOCS; ++j) { DocumentPtr doc = newLucene(); doc->add(newLucene(PRIORITY_FIELD, HIGH_PRIORITY, Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(ID_FIELD, StringUtils::toString(j), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); // try a search without OR SearcherPtr searcher = newLucene(directory, true); QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, PRIORITY_FIELD, analyzer); QueryPtr query = parser->parse(HIGH_PRIORITY); out << L"Query: " << query->toString(PRIORITY_FIELD) << L"\n"; Collection hits = searcher->search(query, FilterPtr(), MAX_DOCS)->scoreDocs; printHits(out, hits, searcher); checkHits(hits, MAX_DOCS, searcher); searcher->close(); // try a new search with OR searcher = newLucene(directory, true); parser = newLucene(LuceneVersion::LUCENE_CURRENT, PRIORITY_FIELD, analyzer); query = parser->parse(HIGH_PRIORITY + L" OR " + MED_PRIORITY); out << L"Query: " << query->toString(PRIORITY_FIELD) << L"\n"; hits = searcher->search(query, FilterPtr(), MAX_DOCS)->scoreDocs; printHits(out, hits, searcher); checkHits(hits, MAX_DOCS, searcher); searcher->close(); } BOOST_AUTO_TEST_CASE(testRun) { StringStream multiFileOutput; doTest(multiFileOutput, false); StringStream singleFileOutput; doTest(singleFileOutput, true); BOOST_CHECK_EQUAL(multiFileOutput.str(), singleFileOutput.str()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/SearchTest.cpp000066400000000000000000000054241217574114600231040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "SimpleAnalyzer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "QueryParser.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "IndexSearcher.h" #include "Query.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SearchTest, LuceneTestFixture) static void doTestSearch(StringStream& out, bool useCompoundFile) { DirectoryPtr directory = newLucene(); AnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(useCompoundFile); Collection docs = newCollection( L"a b c d e", L"a b c d e a b c d e", L"a b c d e f g h i j", L"a c e", L"e c a", L"a c e a c e", L"a c e a b c" ); for (int32_t j = 0; j < docs.size(); ++j) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"contents", docs[j], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); SearcherPtr searcher = newLucene(directory, true); Collection queries = newCollection( L"a b", L"\"a b\"", L"\"a b c\"", L"a c", L"\"a c\"", L"\"a c e\"" ); QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, L"contents", analyzer); parser->setPhraseSlop(4); for (int32_t j = 0; j < queries.size(); ++j) { QueryPtr query = parser->parse(queries[j]); out << L"Query: " << query->toString(L"contents") << L"\n"; Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; out << hits.size() << L" total results\n"; for (int32_t i = 0; i < hits.size() && i < 10; ++i) { DocumentPtr doc = searcher->doc(hits[i]->doc); out << i << L" " << hits[i]->score << L" " + doc->get(L"contents") << L"\n"; } } searcher->close(); } BOOST_AUTO_TEST_CASE(testSearch) { StringStream multiFileOutput; doTestSearch(multiFileOutput, false); StringStream singleFileOutput; doTestSearch(singleFileOutput, true); BOOST_CHECK_EQUAL(multiFileOutput.str(), singleFileOutput.str()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/SetNormTest.cpp000066400000000000000000000056411217574114600232670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "Collector.h" #include "Scorer.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "Term.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SetNormTest, LuceneTestFixture) namespace TestSetNorm { class SetNormCollector : public Collector { public: SetNormCollector(Collection scores) { this->scores = scores; this->base = 0; } virtual ~SetNormCollector() { } protected: int32_t base; ScorerPtr scorer; Collection scores; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { scores[doc + base] = scorer->score(); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { base = docBase; } virtual bool acceptsDocsOutOfOrder() { return true; } }; } BOOST_AUTO_TEST_CASE(testSetNorm) { RAMDirectoryPtr store = newLucene(); IndexWriterPtr writer = newLucene(store, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); // add the same document four times FieldablePtr f1 = newLucene(L"field", L"word", Field::STORE_YES, Field::INDEX_ANALYZED); DocumentPtr d1 = newLucene(); d1->add(f1); writer->addDocument(d1); writer->addDocument(d1); writer->addDocument(d1); writer->addDocument(d1); writer->close(); // reset the boost of each instance of this document IndexReaderPtr reader = IndexReader::open(store, false); reader->setNorm(0, L"field", 1.0); reader->setNorm(1, L"field", 2.0); reader->setNorm(2, L"field", 4.0); reader->setNorm(3, L"field", 16.0); reader->close(); // check that searches are ordered by this boost Collection scores = Collection::newInstance(4); IndexSearcherPtr searcher = newLucene(store, true); searcher->search(newLucene(newLucene(L"field", L"word")), newLucene(scores)); double lastScore = 0.0; for (int32_t i = 0; i < 4; ++i) { BOOST_CHECK(scores[i] > lastScore); lastScore = scores[i]; } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/SimilarityTest.cpp000066400000000000000000000136321217574114600240250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Similarity.h" #include "Explanation.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "Term.h" #include "Collector.h" #include "BooleanQuery.h" #include "PhraseQuery.h" #include "Scorer.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SimilarityTest, LuceneTestFixture) namespace TestSimilarity { class SimpleIDFExplanation : public IDFExplanation { public: virtual ~SimpleIDFExplanation() { } public: virtual double getIdf() { return 1.0; } virtual String explain() { return L"Inexplicable"; } }; class SimpleSimilarity : public Similarity { public: virtual ~SimpleSimilarity() { } public: virtual double lengthNorm(const String& fieldName, int32_t numTokens) { return 1.0; } virtual double queryNorm(double sumOfSquaredWeights) { return 1.0; } virtual double tf(double freq) { return freq; } virtual double sloppyFreq(int32_t distance) { return 2.0; } virtual double idf(int32_t docFreq, int32_t numDocs) { return 1.0; } virtual double coord(int32_t overlap, int32_t maxOverlap) { return 1.0; } virtual IDFExplanationPtr idfExplain(Collection terms, SearcherPtr searcher) { return newLucene(); } }; class TermQueryCollector : public Collector { public: virtual ~TermQueryCollector() { } protected: ScorerPtr scorer; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { BOOST_CHECK_EQUAL(1.0, scorer->score()); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { } virtual bool acceptsDocsOutOfOrder() { return true; } }; class BooleanQueryCollector : public Collector { public: BooleanQueryCollector() { this->base = 0; } virtual ~BooleanQueryCollector() { } protected: int32_t base; ScorerPtr scorer; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { BOOST_CHECK_EQUAL((double)doc + (double)base + 1.0, scorer->score()); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { base = docBase; } virtual bool acceptsDocsOutOfOrder() { return true; } }; class PhraseQueryCollector : public Collector { public: PhraseQueryCollector(double expectedScore) { this->expectedScore = expectedScore; } virtual ~PhraseQueryCollector() { } protected: double expectedScore; ScorerPtr scorer; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { BOOST_CHECK_EQUAL(expectedScore, scorer->score()); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { } virtual bool acceptsDocsOutOfOrder() { return true; } }; } BOOST_AUTO_TEST_CASE(testSimilarity) { RAMDirectoryPtr store = newLucene(); IndexWriterPtr writer = newLucene(store, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setSimilarity(newLucene()); DocumentPtr d1 = newLucene(); d1->add(newLucene(L"field", L"a c", Field::STORE_YES, Field::INDEX_ANALYZED)); DocumentPtr d2 = newLucene(); d2->add(newLucene(L"field", L"a b c", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(d1); writer->addDocument(d2); writer->optimize(); writer->close(); SearcherPtr searcher = newLucene(store, true); searcher->setSimilarity(newLucene()); TermPtr a = newLucene(L"field", L"a"); TermPtr b = newLucene(L"field", L"b"); TermPtr c = newLucene(L"field", L"c"); searcher->search(newLucene(b), newLucene()); BooleanQueryPtr bq = newLucene(); bq->add(newLucene(a), BooleanClause::SHOULD); bq->add(newLucene(b), BooleanClause::SHOULD); searcher->search(bq, newLucene()); PhraseQueryPtr pq = newLucene(); pq->add(a); pq->add(c); searcher->search(pq, newLucene(1.0)); pq->setSlop(2); searcher->search(pq, newLucene(2.0)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/SimpleExplanationsOfNonMatchesTest.cpp000066400000000000000000000330071217574114600277610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "ExplanationsFixture.h" #include "Explanation.h" #include "MatchAllDocsQuery.h" #include "FieldCacheTermsFilter.h" #include "QueryParser.h" #include "FilteredQuery.h" #include "ConstantScoreQuery.h" #include "DisjunctionMaxQuery.h" #include "MultiPhraseQuery.h" #include "BooleanQuery.h" #include "MockRAMDirectory.h" #include "Document.h" #include "Field.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "MultiSearcher.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "SpanNearQuery.h" #include "SpanQuery.h" #include "SpanTermQuery.h" #include "Term.h" #include "CheckHits.h" using namespace Lucene; class ItemizedFilter : public FieldCacheTermsFilter { public: ItemizedFilter(const String& field, Collection terms) : FieldCacheTermsFilter(field, int2str(terms)) { } ItemizedFilter(Collection terms) : FieldCacheTermsFilter(L"KEY", int2str(terms)) { } virtual ~ItemizedFilter() { } public: Collection int2str(Collection terms) { Collection out = Collection::newInstance(terms.size()); for (int32_t i = 0; i < terms.size(); ++i) out[i] = StringUtils::toString(terms[i]); return out; } }; /// TestExplanations subclass focusing on basic query types class SimpleExplanationsOfNonMatchesFixture : public ExplanationsFixture { public: SimpleExplanationsOfNonMatchesFixture() { } virtual ~SimpleExplanationsOfNonMatchesFixture() { } public: using ExplanationsFixture::qtest; /// ignore matches and focus on non-matches virtual void qtest(QueryPtr q, Collection expDocNrs) { CheckHits::checkNoMatchExplanations(q, FIELD, searcher, expDocNrs); } }; BOOST_FIXTURE_TEST_SUITE(SimpleExplanationsOfNonMatchesTest, SimpleExplanationsOfNonMatchesFixture) BOOST_AUTO_TEST_CASE(testT1) { qtest(L"w1", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testT2) { qtest(L"w1^1000", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMA1) { qtest(newLucene(), newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMA2) { QueryPtr q = newLucene(); q->setBoost(1000); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testP1) { qtest(L"\"w1 w2\"", newCollection(0)); } BOOST_AUTO_TEST_CASE(testP2) { qtest(L"\"w1 w3\"", newCollection(1, 3)); } BOOST_AUTO_TEST_CASE(testP3) { qtest(L"\"w1 w2\"~1", newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(testP4) { qtest(L"\"w2 w3\"~1", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testP5) { qtest(L"\"w3 w2\"~1", newCollection(1, 3)); } BOOST_AUTO_TEST_CASE(testP6) { qtest(L"\"w3 w2\"~2", newCollection(0, 1, 3)); } BOOST_AUTO_TEST_CASE(testP7) { qtest(L"\"w3 w2\"~3", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testFQ1) { qtest(newLucene(qp->parse(L"w1"), newLucene(newCollection(0, 1, 2, 3))), newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testFQ2) { qtest(newLucene(qp->parse(L"w1"), newLucene(newCollection(0, 2, 3))), newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testFQ3) { qtest(newLucene(qp->parse(L"xx"), newLucene(newCollection(1, 3))), newCollection(3)); } BOOST_AUTO_TEST_CASE(testFQ4) { qtest(newLucene(qp->parse(L"xx^1000"), newLucene(newCollection(1, 3))), newCollection(3)); } BOOST_AUTO_TEST_CASE(testFQ6) { QueryPtr q = newLucene(qp->parse(L"xx"), newLucene(newCollection(1, 3))); q->setBoost(1000); qtest(q, newCollection(3)); } BOOST_AUTO_TEST_CASE(testCSQ1) { QueryPtr q = newLucene(newLucene(newCollection(0, 1, 2, 3))); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testCSQ2) { QueryPtr q = newLucene(newLucene(newCollection(1, 3))); qtest(q, newCollection(1, 3)); } BOOST_AUTO_TEST_CASE(testCSQ3) { QueryPtr q = newLucene(newLucene(newCollection(0, 2))); q->setBoost(1000); qtest(q, newCollection(0, 2)); } BOOST_AUTO_TEST_CASE(testDMQ1) { DisjunctionMaxQueryPtr q = newLucene(0.0); q->add(qp->parse(L"w1")); q->add(qp->parse(L"w5")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ2) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"w1")); q->add(qp->parse(L"w5")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ3) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"QQ")); q->add(qp->parse(L"w5")); qtest(q, newCollection(0)); } BOOST_AUTO_TEST_CASE(testDMQ4) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"QQ")); q->add(qp->parse(L"xx")); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ5) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"yy -QQ")); q->add(qp->parse(L"xx")); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ6) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"-yy w3")); q->add(qp->parse(L"xx")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ7) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"-yy w3")); q->add(qp->parse(L"w2")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ8) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"yy w5^100")); q->add(qp->parse(L"xx^100000")); qtest(q, newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ9) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"yy w5^100")); q->add(qp->parse(L"xx^0")); qtest(q, newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testMPQ1) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2", L"w3", L"xx"))); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMPQ2) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2", L"w3"))); qtest(q, newCollection(0, 1, 3)); } BOOST_AUTO_TEST_CASE(testMPQ3) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1", L"xx"))); q->add(ta(newCollection(L"w2", L"w3"))); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMPQ4) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2"))); qtest(q, newCollection(0)); } BOOST_AUTO_TEST_CASE(testMPQ5) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2"))); q->setSlop(1); qtest(q, newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(testMPQ6) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1", L"w3"))); q->add(ta(newCollection(L"w2"))); q->setSlop(1); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ1) { qtest(L"+w1 +w2", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ2) { qtest(L"+yy +w3", newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testBQ3) { qtest(L"yy +w3", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ4) { qtest(L"w1 (-xx w2)", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ5) { qtest(L"w1 (+qq w2)", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ6) { qtest(L"w1 -(-qq w5)", newCollection(1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ7) { qtest(L"+w1 +(qq (xx -w2) (+w3 +w4))", newCollection(0)); } BOOST_AUTO_TEST_CASE(testBQ8) { qtest(L"+w1 (qq (xx -w2) (+w3 +w4))", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ9) { qtest(L"+w1 (qq (-xx w2) -(+w3 +w4))", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ10) { qtest(L"+w1 +(qq (-xx w2) -(+w3 +w4))", newCollection(1)); } BOOST_AUTO_TEST_CASE(testBQ11) { qtest(L"w1 w2^1000.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ14) { BooleanQueryPtr q = newLucene(true); q->add(qp->parse(L"QQQQQ"), BooleanClause::SHOULD); q->add(qp->parse(L"w1"), BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ15) { BooleanQueryPtr q = newLucene(true); q->add(qp->parse(L"QQQQQ"), BooleanClause::MUST_NOT); q->add(qp->parse(L"w1"), BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ16) { BooleanQueryPtr q = newLucene(true); q->add(qp->parse(L"QQQQQ"), BooleanClause::SHOULD); q->add(qp->parse(L"w1 -xx"), BooleanClause::SHOULD); qtest(q, newCollection(0, 1)); } BOOST_AUTO_TEST_CASE(testBQ17) { BooleanQueryPtr q = newLucene(true); q->add(qp->parse(L"w2"), BooleanClause::SHOULD); q->add(qp->parse(L"w1 -xx"), BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ19) { qtest(L"-yy w3", newCollection(0, 1)); } BOOST_AUTO_TEST_CASE(testBQ20) { BooleanQueryPtr q = newLucene(); q->setMinimumNumberShouldMatch(2); q->add(qp->parse(L"QQQQQ"), BooleanClause::SHOULD); q->add(qp->parse(L"yy"), BooleanClause::SHOULD); q->add(qp->parse(L"zz"), BooleanClause::SHOULD); q->add(qp->parse(L"w5"), BooleanClause::SHOULD); q->add(qp->parse(L"w4"), BooleanClause::SHOULD); qtest(q, newCollection(0, 3)); } BOOST_AUTO_TEST_CASE(testTermQueryMultiSearcherExplain) { // creating two directories for indices DirectoryPtr indexStoreA = newLucene(); DirectoryPtr indexStoreB = newLucene(); DocumentPtr lDoc = newLucene(); lDoc->add(newLucene(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED)); DocumentPtr lDoc2 = newLucene(); lDoc2->add(newLucene(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED)); DocumentPtr lDoc3 = newLucene(); lDoc3->add(newLucene(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED)); IndexWriterPtr writerA = newLucene(indexStoreA, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); IndexWriterPtr writerB = newLucene(indexStoreB, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writerA->addDocument(lDoc); writerA->addDocument(lDoc2); writerA->optimize(); writerA->close(); writerB->addDocument(lDoc3); writerB->close(); QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, L"fulltext", newLucene(LuceneVersion::LUCENE_CURRENT)); QueryPtr query = parser->parse(L"handle:1"); Collection searchers = newCollection( newLucene(indexStoreB, true), newLucene(indexStoreA, true) ); SearcherPtr mSearcher = newLucene(searchers); Collection hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); ExplanationPtr explain = mSearcher->explain(query, hits[0]->doc); String exp = explain->toString(); BOOST_CHECK(exp.find(L"maxDocs=3") != String::npos); BOOST_CHECK(exp.find(L"docFreq=3") != String::npos); query = parser->parse(L"handle:\"1 2\""); hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); explain = mSearcher->explain(query, hits[0]->doc); exp = explain->toString(); BOOST_CHECK(exp.find(L"1=3") != String::npos); BOOST_CHECK(exp.find(L"2=3") != String::npos); query = newLucene(newCollection(newLucene(newLucene(L"handle", L"1")), newLucene(newLucene(L"handle", L"2"))), 0, true); hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); explain = mSearcher->explain(query, hits[0]->doc); exp = explain->toString(); BOOST_CHECK(exp.find(L"1=3") != String::npos); BOOST_CHECK(exp.find(L"2=3") != String::npos); mSearcher->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/SimpleExplanationsTest.cpp000066400000000000000000000322661217574114600255220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "ExplanationsFixture.h" #include "Explanation.h" #include "MatchAllDocsQuery.h" #include "FieldCacheTermsFilter.h" #include "QueryParser.h" #include "FilteredQuery.h" #include "ConstantScoreQuery.h" #include "DisjunctionMaxQuery.h" #include "MultiPhraseQuery.h" #include "BooleanQuery.h" #include "MockRAMDirectory.h" #include "Document.h" #include "Field.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "MultiSearcher.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "SpanNearQuery.h" #include "SpanQuery.h" #include "SpanTermQuery.h" #include "Term.h" using namespace Lucene; class ItemizedFilter : public FieldCacheTermsFilter { public: ItemizedFilter(const String& field, Collection terms) : FieldCacheTermsFilter(field, int2str(terms)) { } ItemizedFilter(Collection terms) : FieldCacheTermsFilter(L"KEY", int2str(terms)) { } virtual ~ItemizedFilter() { } public: Collection int2str(Collection terms) { Collection out = Collection::newInstance(terms.size()); for (int32_t i = 0; i < terms.size(); ++i) out[i] = StringUtils::toString(terms[i]); return out; } }; /// TestExplanations subclass focusing on basic query types class SimpleExplanationsFixture : public ExplanationsFixture { public: SimpleExplanationsFixture() { } virtual ~SimpleExplanationsFixture() { } }; BOOST_FIXTURE_TEST_SUITE(SimpleExplanationsTest, SimpleExplanationsFixture) BOOST_AUTO_TEST_CASE(testT1) { qtest(L"w1", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testT2) { qtest(L"w1^1000", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMA1) { qtest(newLucene(), newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMA2) { QueryPtr q = newLucene(); q->setBoost(1000); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testP1) { qtest(L"\"w1 w2\"", newCollection(0)); } BOOST_AUTO_TEST_CASE(testP2) { qtest(L"\"w1 w3\"", newCollection(1, 3)); } BOOST_AUTO_TEST_CASE(testP3) { qtest(L"\"w1 w2\"~1", newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(testP4) { qtest(L"\"w2 w3\"~1", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testP5) { qtest(L"\"w3 w2\"~1", newCollection(1, 3)); } BOOST_AUTO_TEST_CASE(testP6) { qtest(L"\"w3 w2\"~2", newCollection(0, 1, 3)); } BOOST_AUTO_TEST_CASE(testP7) { qtest(L"\"w3 w2\"~3", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testFQ1) { qtest(newLucene(qp->parse(L"w1"), newLucene(newCollection(0, 1, 2, 3))), newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testFQ2) { qtest(newLucene(qp->parse(L"w1"), newLucene(newCollection(0, 2, 3))), newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testFQ3) { qtest(newLucene(qp->parse(L"xx"), newLucene(newCollection(1, 3))), newCollection(3)); } BOOST_AUTO_TEST_CASE(testFQ4) { qtest(newLucene(qp->parse(L"xx^1000"), newLucene(newCollection(1, 3))), newCollection(3)); } BOOST_AUTO_TEST_CASE(testFQ6) { QueryPtr q = newLucene(qp->parse(L"xx"), newLucene(newCollection(1, 3))); q->setBoost(1000); qtest(q, newCollection(3)); } BOOST_AUTO_TEST_CASE(testCSQ1) { QueryPtr q = newLucene(newLucene(newCollection(0, 1, 2, 3))); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testCSQ2) { QueryPtr q = newLucene(newLucene(newCollection(1, 3))); qtest(q, newCollection(1, 3)); } BOOST_AUTO_TEST_CASE(testCSQ3) { QueryPtr q = newLucene(newLucene(newCollection(0, 2))); q->setBoost(1000); qtest(q, newCollection(0, 2)); } BOOST_AUTO_TEST_CASE(testDMQ1) { DisjunctionMaxQueryPtr q = newLucene(0.0); q->add(qp->parse(L"w1")); q->add(qp->parse(L"w5")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ2) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"w1")); q->add(qp->parse(L"w5")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ3) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"QQ")); q->add(qp->parse(L"w5")); qtest(q, newCollection(0)); } BOOST_AUTO_TEST_CASE(testDMQ4) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"QQ")); q->add(qp->parse(L"xx")); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ5) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"yy -QQ")); q->add(qp->parse(L"xx")); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ6) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"-yy w3")); q->add(qp->parse(L"xx")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ7) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"-yy w3")); q->add(qp->parse(L"w2")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ8) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"yy w5^100")); q->add(qp->parse(L"xx^100000")); qtest(q, newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testDMQ9) { DisjunctionMaxQueryPtr q = newLucene(0.5); q->add(qp->parse(L"yy w5^100")); q->add(qp->parse(L"xx^0")); qtest(q, newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testMPQ1) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2", L"w3", L"xx"))); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMPQ2) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2", L"w3"))); qtest(q, newCollection(0, 1, 3)); } BOOST_AUTO_TEST_CASE(testMPQ3) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1", L"xx"))); q->add(ta(newCollection(L"w2", L"w3"))); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testMPQ4) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2"))); qtest(q, newCollection(0)); } BOOST_AUTO_TEST_CASE(testMPQ5) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1"))); q->add(ta(newCollection(L"w2"))); q->setSlop(1); qtest(q, newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(testMPQ6) { MultiPhraseQueryPtr q = newLucene(); q->add(ta(newCollection(L"w1", L"w3"))); q->add(ta(newCollection(L"w2"))); q->setSlop(1); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ1) { qtest(L"+w1 +w2", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ2) { qtest(L"+yy +w3", newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testBQ3) { qtest(L"yy +w3", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ4) { qtest(L"w1 (-xx w2)", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ5) { qtest(L"w1 (+qq w2)", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ6) { qtest(L"w1 -(-qq w5)", newCollection(1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ7) { qtest(L"+w1 +(qq (xx -w2) (+w3 +w4))", newCollection(0)); } BOOST_AUTO_TEST_CASE(testBQ8) { qtest(L"+w1 (qq (xx -w2) (+w3 +w4))", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ9) { qtest(L"+w1 (qq (-xx w2) -(+w3 +w4))", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ10) { qtest(L"+w1 +(qq (-xx w2) -(+w3 +w4))", newCollection(1)); } BOOST_AUTO_TEST_CASE(testBQ11) { qtest(L"w1 w2^1000.0", newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ14) { BooleanQueryPtr q = newLucene(true); q->add(qp->parse(L"QQQQQ"), BooleanClause::SHOULD); q->add(qp->parse(L"w1"), BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ15) { BooleanQueryPtr q = newLucene(true); q->add(qp->parse(L"QQQQQ"), BooleanClause::MUST_NOT); q->add(qp->parse(L"w1"), BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ16) { BooleanQueryPtr q = newLucene(true); q->add(qp->parse(L"QQQQQ"), BooleanClause::SHOULD); q->add(qp->parse(L"w1 -xx"), BooleanClause::SHOULD); qtest(q, newCollection(0, 1)); } BOOST_AUTO_TEST_CASE(testBQ17) { BooleanQueryPtr q = newLucene(true); q->add(qp->parse(L"w2"), BooleanClause::SHOULD); q->add(qp->parse(L"w1 -xx"), BooleanClause::SHOULD); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testBQ19) { qtest(L"-yy w3", newCollection(0, 1)); } BOOST_AUTO_TEST_CASE(testBQ20) { BooleanQueryPtr q = newLucene(); q->setMinimumNumberShouldMatch(2); q->add(qp->parse(L"QQQQQ"), BooleanClause::SHOULD); q->add(qp->parse(L"yy"), BooleanClause::SHOULD); q->add(qp->parse(L"zz"), BooleanClause::SHOULD); q->add(qp->parse(L"w5"), BooleanClause::SHOULD); q->add(qp->parse(L"w4"), BooleanClause::SHOULD); qtest(q, newCollection(0, 3)); } BOOST_AUTO_TEST_CASE(testTermQueryMultiSearcherExplain) { // creating two directories for indices DirectoryPtr indexStoreA = newLucene(); DirectoryPtr indexStoreB = newLucene(); DocumentPtr lDoc = newLucene(); lDoc->add(newLucene(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED)); DocumentPtr lDoc2 = newLucene(); lDoc2->add(newLucene(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED)); DocumentPtr lDoc3 = newLucene(); lDoc3->add(newLucene(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED)); IndexWriterPtr writerA = newLucene(indexStoreA, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); IndexWriterPtr writerB = newLucene(indexStoreB, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writerA->addDocument(lDoc); writerA->addDocument(lDoc2); writerA->optimize(); writerA->close(); writerB->addDocument(lDoc3); writerB->close(); QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, L"fulltext", newLucene(LuceneVersion::LUCENE_CURRENT)); QueryPtr query = parser->parse(L"handle:1"); Collection searchers = newCollection( newLucene(indexStoreB, true), newLucene(indexStoreA, true) ); SearcherPtr mSearcher = newLucene(searchers); Collection hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); ExplanationPtr explain = mSearcher->explain(query, hits[0]->doc); String exp = explain->toString(); BOOST_CHECK(exp.find(L"maxDocs=3") != String::npos); BOOST_CHECK(exp.find(L"docFreq=3") != String::npos); query = parser->parse(L"handle:\"1 2\""); hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); explain = mSearcher->explain(query, hits[0]->doc); exp = explain->toString(); BOOST_CHECK(exp.find(L"1=3") != String::npos); BOOST_CHECK(exp.find(L"2=3") != String::npos); query = newLucene(newCollection(newLucene(newLucene(L"handle", L"1")), newLucene(newLucene(L"handle", L"2"))), 0, true); hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); explain = mSearcher->explain(query, hits[0]->doc); exp = explain->toString(); BOOST_CHECK(exp.find(L"1=3") != String::npos); BOOST_CHECK(exp.find(L"2=3") != String::npos); mSearcher->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/SloppyPhraseQueryTest.cpp000066400000000000000000000123571217574114600253610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Document.h" #include "Field.h" #include "PhraseQuery.h" #include "Term.h" #include "RAMDirectory.h" #include "WhitespaceAnalyzer.h" #include "IndexWriter.h" #include "IndexSearcher.h" #include "TopDocs.h" using namespace Lucene; class SloppyPhraseQueryFixture : public LuceneTestFixture { public: SloppyPhraseQueryFixture() { S_1 = L"A A A"; S_2 = L"A 1 2 3 A 4 5 6 A"; DOC_1 = makeDocument(L"X " + S_1 + L" Y"); DOC_2 = makeDocument(L"X " + S_2 + L" Y"); DOC_3 = makeDocument(L"X " + S_1 + L" A Y"); DOC_1_B = makeDocument(L"X " + S_1 + L" Y N N N N " + S_1 + L" Z"); DOC_2_B = makeDocument(L"X " + S_2 + L" Y N N N N " + S_2 + L" Z"); DOC_3_B = makeDocument(L"X " + S_1 + L" A Y N N N N " + S_1 + L" A Y"); DOC_4 = makeDocument(L"A A X A X B A X B B A A X B A A"); QUERY_1 = makePhraseQuery(S_1); QUERY_2 = makePhraseQuery(S_2); QUERY_4 = makePhraseQuery(L"X A A"); } virtual ~SloppyPhraseQueryFixture() { } protected: String S_1; String S_2; DocumentPtr DOC_1; DocumentPtr DOC_2; DocumentPtr DOC_3; DocumentPtr DOC_1_B; DocumentPtr DOC_2_B; DocumentPtr DOC_3_B; DocumentPtr DOC_4; PhraseQueryPtr QUERY_1; PhraseQueryPtr QUERY_2; PhraseQueryPtr QUERY_4; public: DocumentPtr makeDocument(const String& docText) { DocumentPtr doc = newLucene(); FieldPtr f = newLucene(L"f", docText, Field::STORE_NO, Field::INDEX_ANALYZED); f->setOmitNorms(true); doc->add(f); return doc; } PhraseQueryPtr makePhraseQuery(const String& terms) { PhraseQueryPtr query = newLucene(); Collection tokens = StringUtils::split(terms, L" +"); for (int32_t i = 0; i < tokens.size(); ++i) query->add(newLucene(L"f", tokens[i])); return query; } double checkPhraseQuery(DocumentPtr doc, PhraseQueryPtr query, int32_t slop, int32_t expectedNumResults) { query->setSlop(slop); RAMDirectoryPtr ramDir = newLucene(); WhitespaceAnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(ramDir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED); writer->addDocument(doc); writer->close(); IndexSearcherPtr searcher = newLucene(ramDir, true); TopDocsPtr td = searcher->search(query, FilterPtr(), 10); BOOST_CHECK_EQUAL(expectedNumResults, td->totalHits); searcher->close(); ramDir->close(); return td->maxScore; } }; BOOST_FIXTURE_TEST_SUITE(SloppyPhraseQueryTest, SloppyPhraseQueryFixture) /// Test DOC_4 and QUERY_4. /// QUERY_4 has a fuzzy (len=1) match to DOC_4, so all slop values > 0 should succeed. /// But only the 3rd sequence of A's in DOC_4 will do. BOOST_AUTO_TEST_CASE(testDoc4Query4AllSlopsShouldMatch) { for (int32_t slop = 0; slop < 30; ++slop) { int32_t numResultsExpected = slop < 1 ? 0 : 1; checkPhraseQuery(DOC_4, QUERY_4, slop, numResultsExpected); } } /// Test DOC_1 and QUERY_1. /// QUERY_1 has an exact match to DOC_1, so all slop values should succeed. BOOST_AUTO_TEST_CASE(testDoc1Query1AllSlopsShouldMatch) { for (int32_t slop = 0; slop < 30; ++slop) { double score1 = checkPhraseQuery(DOC_1, QUERY_1, slop, 1); double score2 = checkPhraseQuery(DOC_1_B, QUERY_1, slop, 1); BOOST_CHECK(score2 > score1); } } /// Test DOC_2 and QUERY_1. /// 6 should be the minimum slop to make QUERY_1 match DOC_2. BOOST_AUTO_TEST_CASE(testDoc2Query1Slop6OrMoreShouldMatch) { for (int32_t slop = 0; slop < 30; ++slop) { int32_t numResultsExpected = slop < 6 ? 0 : 1; double score1 = checkPhraseQuery(DOC_2, QUERY_1, slop, numResultsExpected); if (numResultsExpected > 0) { double score2 = checkPhraseQuery(DOC_2_B, QUERY_1, slop, 1); BOOST_CHECK(score2 > score1); } } } /// Test DOC_2 and QUERY_2. /// QUERY_2 has an exact match to DOC_2, so all slop values should succeed. BOOST_AUTO_TEST_CASE(testDoc2Query2AllSlopsShouldMatch) { for (int32_t slop = 0; slop < 30; ++slop) { double score1 = checkPhraseQuery(DOC_2, QUERY_2, slop, 1); double score2 = checkPhraseQuery(DOC_2_B, QUERY_2, slop, 1); BOOST_CHECK(score2 > score1); } } /// Test DOC_3 and QUERY_1. /// QUERY_1 has an exact match to DOC_3, so all slop values should succeed. BOOST_AUTO_TEST_CASE(testDoc3Query1AllSlopsShouldMatch) { for (int32_t slop = 0; slop < 30; ++slop) { double score1 = checkPhraseQuery(DOC_3, QUERY_1, slop, 1); double score2 = checkPhraseQuery(DOC_3_B, QUERY_1, slop, 1); BOOST_CHECK(score2 > score1); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/SortTest.cpp000066400000000000000000001307011217574114600226230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TermQuery.h" #include "Term.h" #include "Sort.h" #include "SortField.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "TopFieldDocs.h" #include "Random.h" #include "MatchAllDocsQuery.h" #include "FieldCache.h" #include "FieldCacheSanityChecker.h" #include "FieldComparatorSource.h" #include "FieldComparator.h" #include "MultiSearcher.h" #include "ParallelMultiSearcher.h" #include "Filter.h" #include "BitSet.h" #include "DocIdBitSet.h" #include "IndexReader.h" #include "TopFieldCollector.h" #include "BooleanQuery.h" #include "MiscUtils.h" using namespace Lucene; class SortFixture : public LuceneTestFixture { public: SortFixture() { r = newLucene(); // document data: // the tracer field is used to determine which document was hit the contents field is used to search and sort by relevance // the int field to sort by int the double field to sort by double the string field to sort by string the i18n field // includes accented characters for testing locale-specific sorting data = Collection< Collection >::newInstance(14); // tracer contents int double string custom i18n long byte encoding data[0] = newCollection(L"A", L"x a", L"5", L"4.0", L"c", L"A-3", L"p\u00EAche", L"10", L"177", L"J"); data[1] = newCollection(L"B", L"y a", L"5", L"3.4028235E38", L"i", L"B-10", L"HAT", L"1000000000", L"52", L"I"); data[2] = newCollection(L"C", L"x a b c", L"2147483647", L"1.0", L"j", L"A-2", L"p\u00E9ch\u00E9", L"99999999", L"66", L"H"); data[3] = newCollection(L"D", L"y a b c", L"-1", L"0.0", L"a", L"C-0", L"HUT", StringUtils::toString(LLONG_MAX), L"0", L"G"); data[4] = newCollection(L"E", L"x a b c d", L"5", L"2.0", L"h", L"B-8", L"peach", StringUtils::toString(LLONG_MIN), StringUtils::toString(UCHAR_MAX), L"F"); data[5] = newCollection(L"F", L"y a b c d", L"2", L"3.14159", L"g", L"B-1", L"H\u00C5T", L"-44", L"51", L"E"); data[6] = newCollection(L"G", L"x a b c d", L"3", L"-1.0", L"f", L"C-100", L"sin", L"323254543543", L"151", L"D"); data[7] = newCollection(L"H", L"y a b c d", L"0", L"1.4E-45", L"e", L"C-88", L"H\u00D8T", L"1023423423005", L"1", L"C"); data[8] = newCollection(L"I", L"x a b c d e f", L"-2147483648", L"1.0e+0", L"d", L"A-10", L"s\u00EDn", L"332422459999", L"102", L"B"); data[9] = newCollection(L"J", L"y a b c d e f", L"4", L".5", L"b", L"C-7", L"HOT", L"34334543543", L"53", L"A"); data[10] = newCollection(L"W", L"g", L"1", L"", L"", L"", L"", L"", L"", L""); data[11] = newCollection(L"X", L"g", L"1", L"0.1", L"", L"", L"", L"", L"", L""); data[12] = newCollection(L"Y", L"g", L"1", L"0.2", L"", L"", L"", L"", L"", L""); data[13] = newCollection(L"Z", L"f g", L"", L"", L"", L"", L"", L"", L"", L""); full = getFullIndex(); searchX = getXIndex(); searchY = getYIndex(); queryX = newLucene(newLucene(L"contents", L"x")); queryY = newLucene(newLucene(L"contents", L"y")); queryA = newLucene(newLucene(L"contents", L"a")); queryE = newLucene(newLucene(L"contents", L"e")); queryF = newLucene(newLucene(L"contents", L"f")); queryG = newLucene(newLucene(L"contents", L"g")); sort = newLucene(); } virtual ~SortFixture() { } protected: static const int32_t NUM_STRINGS; SearcherPtr full; SearcherPtr searchX; SearcherPtr searchY; QueryPtr queryX; QueryPtr queryY; QueryPtr queryA; QueryPtr queryE; QueryPtr queryF; QueryPtr queryG; SortPtr sort; RandomPtr r; Collection< Collection > data; protected: SearcherPtr getFullIndex() { return getIndex(true, true); } SearcherPtr getXIndex() { return getIndex(true, false); } SearcherPtr getYIndex() { return getIndex(false, true); } SearcherPtr getEmptyIndex() { return getIndex(false, false); } SearcherPtr getIndex(bool even, bool odd) { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); writer->setMergeFactor(1000); for (int32_t i = 0; i < data.size(); ++i) { if (((i % 2) == 0 && even) || ((i % 2) == 1 && odd)) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"tracer", data[i][0], Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"contents", data[i][1], Field::STORE_NO, Field::INDEX_ANALYZED)); if (!data[i][2].empty()) doc->add(newLucene(L"int", data[i][2], Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (!data[i][3].empty()) doc->add(newLucene(L"double", data[i][3], Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (!data[i][4].empty()) doc->add(newLucene(L"string", data[i][4], Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (!data[i][5].empty()) doc->add(newLucene(L"custom", data[i][5], Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (!data[i][6].empty()) doc->add(newLucene(L"i18n", data[i][6], Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (!data[i][7].empty()) doc->add(newLucene(L"long", data[i][7], Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (!data[i][8].empty()) doc->add(newLucene(L"byte", data[i][8], Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (!data[i][9].empty()) doc->add(newLucene(L"parser", data[i][9], Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->setBoost(2); // produce some scores above 1.0 writer->addDocument(doc); } } writer->close(); IndexSearcherPtr s = newLucene(indexStore, true); s->setDefaultFieldSortScoring(true, true); return s; } MapStringDouble getScores(Collection hits, SearcherPtr searcher) { MapStringDouble scoreMap = MapStringDouble::newInstance(); int32_t n = hits.size(); for (int32_t i = 0; i < n; ++i) { DocumentPtr doc = searcher->doc(hits[i]->doc); Collection v = doc->getValues(L"tracer"); BOOST_CHECK_EQUAL(v.size(), 1); scoreMap.put(v[0], hits[i]->score); } return scoreMap; } void checkSameValues(MapStringDouble m1, MapStringDouble m2) { int32_t n = m1.size(); int32_t m = m2.size(); BOOST_CHECK_EQUAL(n, m); for (MapStringDouble::iterator key = m1.begin(); key != m1.end(); ++key) { double o1 = m1.get(key->first); double o2 = m2.get(key->first); BOOST_CHECK_CLOSE_FRACTION(o1, o2, 1e-6); } } /// make sure the documents returned by the search match the expected list void checkMatches(SearcherPtr searcher, QueryPtr query, SortPtr sort, const String& expectedResult) { TopDocsPtr hits = searcher->search(query, FilterPtr(), expectedResult.length(), sort); Collection result = hits->scoreDocs; BOOST_CHECK_EQUAL(hits->totalHits, expectedResult.length()); StringStream buff; int32_t n = result.size(); for (int32_t i = 0; i < n; ++i) { DocumentPtr doc = searcher->doc(result[i]->doc); Collection v = doc->getValues(L"tracer"); for (int32_t j = 0; j < v.size(); ++j) buff << v[j]; } BOOST_CHECK_EQUAL(expectedResult, buff.str()); } IndexSearcherPtr getFullStrings() { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(4); writer->setMergeFactor(97); for (int32_t i = 0; i < NUM_STRINGS; ++i) { DocumentPtr doc = newLucene(); String num = getRandomCharString(getRandomNumber(2, 8), 48, 52); doc->add(newLucene(L"tracer", num, Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"string", num, Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); String num2 = getRandomCharString(getRandomNumber(1, 4), 48, 50); doc->add(newLucene(L"string2", num2, Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"tracer2", num2, Field::STORE_YES, Field::INDEX_NO)); doc->setBoost(2.0); // produce some scores above 1.0 writer->setMaxBufferedDocs(getRandomNumber(2, 12)); writer->addDocument(doc); } writer->close(); return newLucene(indexStore, true); } String getRandomNumberString(int32_t num, int32_t low, int32_t high) { StringStream buff; for (int32_t i = 0; i < num; ++i) buff << getRandomNumber(low, high); return buff.str(); } String getRandomCharString(int32_t num) { return getRandomCharString(num, 48, 122); } String getRandomCharString(int32_t num, int32_t start, int32_t end) { static const wchar_t* alphanum = L"0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz"; StringStream buff; for (int32_t i = 0; i < num; ++i) buff << alphanum[getRandomNumber(start, end)]; return buff.str(); } int32_t getRandomNumber(int32_t low, int32_t high) { return (std::abs(r->nextInt()) % (high - low)) + low; } void checkSaneFieldCaches() { Collection entries = FieldCache::DEFAULT()->getCacheEntries(); Collection insanity = FieldCacheSanityChecker::checkSanity(entries); BOOST_CHECK_EQUAL(0, insanity.size()); } /// runs a variety of sorts useful for multisearchers void runMultiSorts(SearcherPtr multi, bool isFull) { sort->setSort(SortField::FIELD_DOC()); String expected = isFull ? L"ABCDEFGHIJ" : L"ACEGIBDFHJ"; checkMatches(multi, queryA, sort, expected); sort->setSort(newCollection(newLucene(L"int", SortField::INT))); expected = isFull ? L"IDHFGJABEC" : L"IDHFGJAEBC"; checkMatches(multi, queryA, sort, expected); sort->setSort(newCollection(newLucene(L"int", SortField::INT), SortField::FIELD_DOC())); expected = isFull ? L"IDHFGJABEC" : L"IDHFGJAEBC"; checkMatches(multi, queryA, sort, expected); sort->setSort(newCollection(newLucene(L"int", SortField::INT))); expected = isFull ? L"IDHFGJABEC" : L"IDHFGJAEBC"; checkMatches(multi, queryA, sort, expected); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE), SortField::FIELD_DOC())); checkMatches(multi, queryA, sort, L"GDHJCIEFAB"); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE))); checkMatches(multi, queryA, sort, L"GDHJCIEFAB"); sort->setSort(newCollection(newLucene(L"string", SortField::STRING))); checkMatches(multi, queryA, sort, L"DJAIHGFEBC"); sort->setSort(newCollection(newLucene(L"int", SortField::INT, true))); expected = isFull ? L"CABEJGFHDI" : L"CAEBJGFHDI"; checkMatches(multi, queryA, sort, expected); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE, true))); checkMatches(multi, queryA, sort, L"BAFECIJHDG"); sort->setSort(newCollection(newLucene(L"string", SortField::STRING, true))); checkMatches(multi, queryA, sort, L"CBEFGHIAJD"); sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"double", SortField::DOUBLE))); checkMatches(multi, queryA, sort, L"IDHFGJEABC"); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE), newLucene(L"string", SortField::STRING))); checkMatches(multi, queryA, sort, L"GDHJICEFAB"); sort->setSort(newCollection(newLucene(L"int", SortField::INT))); checkMatches(multi, queryF, sort, L"IZJ"); sort->setSort(newCollection(newLucene(L"int", SortField::INT, true))); checkMatches(multi, queryF, sort, L"JZI"); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE))); checkMatches(multi, queryF, sort, L"ZJI"); sort->setSort(newCollection(newLucene(L"string", SortField::STRING))); checkMatches(multi, queryF, sort, L"ZJI"); sort->setSort(newCollection(newLucene(L"string", SortField::STRING, true))); checkMatches(multi, queryF, sort, L"IJZ"); // up to this point, all of the searches should have "sane" FieldCache behavior, and should have reused // the cache in several cases checkSaneFieldCaches(); // next we'll check Locale based (Collection) for 'string', so purge first FieldCache::DEFAULT()->purgeAllCaches(); sort->setSort(newCollection(newLucene(L"string", std::locale()))); checkMatches(multi, queryA, sort, L"DJAIHGFEBC"); sort->setSort(newCollection(newLucene(L"string", std::locale(), true))); checkMatches(multi, queryA, sort, L"CBEFGHIAJD"); sort->setSort(newCollection(newLucene(L"string", std::locale()))); checkMatches(multi, queryA, sort, L"DJAIHGFEBC"); checkSaneFieldCaches(); FieldCache::DEFAULT()->purgeAllCaches(); } }; const int32_t SortFixture::NUM_STRINGS = 6000; BOOST_FIXTURE_TEST_SUITE(SortTest, SortFixture) /// test the sorts by score and document number BOOST_AUTO_TEST_CASE(testBuiltInSorts) { sort = newLucene(); checkMatches(full, queryX, sort, L"ACEGI"); checkMatches(full, queryY, sort, L"BDFHJ"); sort->setSort(SortField::FIELD_DOC()); checkMatches(full, queryX, sort, L"ACEGI"); checkMatches(full, queryY, sort, L"BDFHJ"); } /// test sorts where the type of field is specified BOOST_AUTO_TEST_CASE(testTypedSort) { sort->setSort(newCollection(newLucene(L"int", SortField::INT), SortField::FIELD_DOC())); checkMatches(full, queryX, sort, L"IGAEC"); checkMatches(full, queryY, sort, L"DHFJB"); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE), SortField::FIELD_DOC())); checkMatches(full, queryX, sort, L"GCIEA"); checkMatches(full, queryY, sort, L"DHJFB"); sort->setSort(newCollection(newLucene(L"long", SortField::LONG), SortField::FIELD_DOC())); checkMatches(full, queryX, sort, L"EACGI"); checkMatches(full, queryY, sort, L"FBJHD"); sort->setSort(newCollection(newLucene(L"byte", SortField::BYTE), SortField::FIELD_DOC())); checkMatches(full, queryX, sort, L"CIGAE"); checkMatches(full, queryY, sort, L"DHFBJ"); sort->setSort(newCollection(newLucene(L"string", SortField::STRING), SortField::FIELD_DOC())); checkMatches(full, queryX, sort, L"AIGEC"); checkMatches(full, queryY, sort, L"DJHFB"); } /// Test String sorting: small queue to many matches, multi field sort, reverse sort BOOST_AUTO_TEST_CASE(testStringSort) { IndexSearcherPtr searcher = getFullStrings(); sort->setSort(newCollection(newLucene(L"string", SortField::STRING), newLucene(L"string2", SortField::STRING, true), SortField::FIELD_DOC())); Collection result = searcher->search(newLucene(), FilterPtr(), 500, sort)->scoreDocs; StringStream buff; int32_t n = result.size(); String last; String lastSub; int32_t lastDocId = 0; for (int32_t x = 0; x < n; ++x) { DocumentPtr doc2 = searcher->doc(result[x]->doc); Collection v = doc2->getValues(L"tracer"); Collection v2 = doc2->getValues(L"tracer2"); for (int32_t j = 0; j < v.size(); ++j) { if (!last.empty()) { int32_t cmp = v[j].compare(last); if (cmp < 0) BOOST_FAIL("first field out of order"); if (cmp == 0) // ensure second field is in reverse order { cmp = v2[j].compare(lastSub); if (cmp > 0) BOOST_FAIL("second field out of order"); else if (cmp == 0) // ensure docid is in order { if (result[x]->doc < lastDocId) BOOST_FAIL("docid out of order"); } } } last = v[j]; lastSub = v2[j]; lastDocId = result[x]->doc; buff << v[j] << L"(" << v2[j] << L")(" << result[x]->doc << L") "; } } } namespace TestCustomFieldParserSort { class CustomIntParser : public IntParser { public: virtual ~CustomIntParser() { } public: virtual int32_t parseInt(const String& string) { return (string[0] - L'A') * 123456; } }; class CustomDoubleParser : public DoubleParser { public: virtual ~CustomDoubleParser() { } public: virtual double parseDouble(const String& string) { return std::sqrt((double)string[0]); } }; class CustomLongParser : public LongParser { public: virtual ~CustomLongParser() { } public: virtual int64_t parseLong(const String& string) { return (string[0] - L'A') * (int64_t)1234567890; } }; class CustomByteParser : public ByteParser { public: virtual ~CustomByteParser() { } public: virtual uint8_t parseByte(const String& string) { return (uint8_t)(string[0] - L'A'); } }; } /// Test sorts where the type of field is specified and a custom field parser is used, that uses a simple char encoding. /// The sorted string contains a character beginning from 'A' that is mapped to a numeric value using some "funny" /// algorithm to be different for each data type. BOOST_AUTO_TEST_CASE(testCustomFieldParserSort) { // since tests explicitly uses different parsers on the same field name we explicitly check/purge the FieldCache between each assertMatch FieldCachePtr fc = FieldCache::DEFAULT(); sort->setSort(newCollection(newLucene(L"parser", newLucene()), SortField::FIELD_DOC())); checkMatches(full, queryA, sort, L"JIHGFEDCBA"); checkSaneFieldCaches(); fc->purgeAllCaches(); sort->setSort(newCollection(newLucene(L"parser", newLucene()), SortField::FIELD_DOC())); checkMatches(full, queryA, sort, L"JIHGFEDCBA"); checkSaneFieldCaches(); fc->purgeAllCaches(); sort->setSort(newCollection(newLucene(L"parser", newLucene()), SortField::FIELD_DOC())); checkMatches(full, queryA, sort, L"JIHGFEDCBA"); checkSaneFieldCaches(); fc->purgeAllCaches(); sort->setSort(newCollection(newLucene(L"parser", newLucene()), SortField::FIELD_DOC())); checkMatches(full, queryA, sort, L"JIHGFEDCBA"); checkSaneFieldCaches(); fc->purgeAllCaches(); } /// test sorts when there's nothing in the index BOOST_AUTO_TEST_CASE(testEmptyIndex) { SearcherPtr empty = getEmptyIndex(); sort = newLucene(); checkMatches(empty, queryX, sort, L""); sort->setSort(SortField::FIELD_DOC()); checkMatches(empty, queryX, sort, L""); sort->setSort(newCollection(newLucene(L"int", SortField::INT), SortField::FIELD_DOC())); checkMatches(empty, queryX, sort, L""); sort->setSort(newCollection(newLucene(L"string", SortField::STRING, true), SortField::FIELD_DOC())); checkMatches(empty, queryX, sort, L""); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE), newLucene(L"string", SortField::STRING))); checkMatches(empty, queryX, sort, L""); } namespace TestNewCustomFieldParserSort { class MyIntParser : public IntParser { public: virtual ~MyIntParser() { } public: virtual int32_t parseInt(const String& string) { return (string[0] - L'A') * 123456; } }; class MyFieldComparator : public FieldComparator { public: MyFieldComparator(int32_t numHits) { slotValues = Collection::newInstance(numHits); bottomValue = 0; } virtual ~MyFieldComparator() { } public: Collection docValues; Collection slotValues; int32_t bottomValue; public: virtual void copy(int32_t slot, int32_t doc) { slotValues[slot] = docValues[doc]; } virtual int32_t compare(int32_t slot1, int32_t slot2) { return slotValues[slot1] - slotValues[slot2]; } virtual int32_t compareBottom(int32_t doc) { return bottomValue - docValues[doc]; } virtual void setBottom(int32_t slot) { bottomValue = slotValues[slot]; } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { docValues = FieldCache::DEFAULT()->getInts(reader, L"parser", newLucene()); } virtual ComparableValue value(int32_t slot) { return slotValues[slot]; } }; class MyFieldComparatorSource : public FieldComparatorSource { public: virtual ~MyFieldComparatorSource() { } public: virtual FieldComparatorPtr newComparator(const String& fieldname, int32_t numHits, int32_t sortPos, bool reversed) { return newLucene(numHits); } }; } // Test sorting with custom FieldComparator BOOST_AUTO_TEST_CASE(testNewCustomFieldParserSort) { sort->setSort(newCollection(newLucene(L"parser", newLucene()))); checkMatches(full, queryA, sort, L"JIHGFEDCBA"); } /// test sorts in reverse BOOST_AUTO_TEST_CASE(testReverseSort) { sort->setSort(newCollection(newLucene(L"", SortField::SCORE, true), SortField::FIELD_DOC())); checkMatches(full, queryX, sort, L"IEGCA"); checkMatches(full, queryY, sort, L"JFHDB"); sort->setSort(newCollection(newLucene(L"", SortField::DOC, true))); checkMatches(full, queryX, sort, L"IGECA"); checkMatches(full, queryY, sort, L"JHFDB"); sort->setSort(newCollection(newLucene(L"int", SortField::INT, true))); checkMatches(full, queryX, sort, L"CAEGI"); checkMatches(full, queryY, sort, L"BJFHD"); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE, true))); checkMatches(full, queryX, sort, L"AECIG"); checkMatches(full, queryY, sort, L"BFJHD"); sort->setSort(newCollection(newLucene(L"string", SortField::STRING, true))); checkMatches(full, queryX, sort, L"CEGIA"); checkMatches(full, queryY, sort, L"BFHJD"); } /// test sorting when the sort field is empty (undefined) for some of the documents BOOST_AUTO_TEST_CASE(testEmptyFieldSort) { sort->setSort(newCollection(newLucene(L"string", SortField::STRING))); checkMatches(full, queryF, sort, L"ZJI"); sort->setSort(newCollection(newLucene(L"string", SortField::STRING, true))); checkMatches(full, queryF, sort, L"IJZ"); sort->setSort(newCollection(newLucene(L"i18n", std::locale()))); checkMatches(full, queryF, sort, L"ZJI"); sort->setSort(newCollection(newLucene(L"i18n", std::locale(), true))); checkMatches(full, queryF, sort, L"IJZ"); sort->setSort(newCollection(newLucene(L"int", SortField::INT))); checkMatches(full, queryF, sort, L"IZJ"); sort->setSort(newCollection(newLucene(L"int", SortField::INT, true))); checkMatches(full, queryF, sort, L"JZI"); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE))); checkMatches(full, queryF, sort, L"ZJI"); // using a non-existing field as first sort key shouldn't make a difference sort->setSort(newCollection(newLucene(L"nosuchfield", SortField::STRING), newLucene(L"double", SortField::DOUBLE))); checkMatches(full, queryF, sort, L"ZJI"); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE, true))); checkMatches(full, queryF, sort, L"IJZ"); // When a field is null for both documents, the next SortField should be used. sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"string", SortField::STRING), newLucene(L"double", SortField::DOUBLE))); checkMatches(full, queryG, sort, L"ZWXY"); // Reverse the last criterion to make sure the test didn't pass by chance sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"string", SortField::STRING), newLucene(L"double", SortField::DOUBLE, true))); checkMatches(full, queryG, sort, L"ZYXW"); // Do the same for a MultiSearcher SearcherPtr multiSearcher = newLucene(newCollection(full)); sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"string", SortField::STRING), newLucene(L"double", SortField::DOUBLE))); checkMatches(multiSearcher, queryG, sort, L"ZWXY"); sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"string", SortField::STRING), newLucene(L"double", SortField::DOUBLE, true))); checkMatches(multiSearcher, queryG, sort, L"ZYXW"); // Don't close the multiSearcher. it would close the full searcher too! // Do the same for a ParallelMultiSearcher SearcherPtr parallelSearcher = newLucene(newCollection(full)); sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"string", SortField::STRING), newLucene(L"double", SortField::DOUBLE))); checkMatches(parallelSearcher, queryG, sort, L"ZWXY"); sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"string", SortField::STRING), newLucene(L"double", SortField::DOUBLE, true))); checkMatches(parallelSearcher, queryG, sort, L"ZYXW"); } /// test sorts using a series of fields BOOST_AUTO_TEST_CASE(testSortCombos) { sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"double", SortField::DOUBLE))); checkMatches(full, queryX, sort, L"IGEAC"); sort->setSort(newCollection(newLucene(L"int", SortField::INT, true), newLucene(L"", SortField::DOC, true))); checkMatches(full, queryX, sort, L"CEAGI"); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE), newLucene(L"string", SortField::STRING))); checkMatches(full, queryX, sort, L"GICEA"); } /// test using a Locale for sorting strings BOOST_AUTO_TEST_CASE(testLocaleSort) { sort->setSort(newCollection(newLucene(L"string", std::locale()))); checkMatches(full, queryX, sort, L"AIGEC"); checkMatches(full, queryY, sort, L"DJHFB"); sort->setSort(newCollection(newLucene(L"string", std::locale(), true))); checkMatches(full, queryX, sort, L"CEGIA"); checkMatches(full, queryY, sort, L"BFHJD"); } /// test a variety of sorts using more than one searcher BOOST_AUTO_TEST_CASE(testMultiSort) { MultiSearcherPtr searcher = newLucene(newCollection(searchX, searchY)); runMultiSorts(searcher, false); } /// test a variety of sorts using a parallel multisearcher BOOST_AUTO_TEST_CASE(testParallelMultiSort) { MultiSearcherPtr searcher = newLucene(newCollection(searchX, searchY)); runMultiSorts(searcher, false); } // test that the relevancy scores are the same even if hits are sorted BOOST_AUTO_TEST_CASE(testNormalizedScores) { // capture relevancy scores MapStringDouble scoresX = getScores(full->search(queryX, FilterPtr(), 1000)->scoreDocs, full); MapStringDouble scoresY = getScores(full->search(queryY, FilterPtr(), 1000)->scoreDocs, full); MapStringDouble scoresA = getScores(full->search(queryA, FilterPtr(), 1000)->scoreDocs, full); // we'll test searching locally, remote and multi MultiSearcherPtr multi = newLucene(newCollection(searchX, searchY)); // change sorting and make sure relevancy stays the same sort = newLucene(); checkSameValues(scoresX, getScores(full->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresX, getScores(multi->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresY, getScores(full->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresY, getScores(multi->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresA, getScores(full->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresA, getScores(multi->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, multi)); sort->setSort(SortField::FIELD_DOC()); checkSameValues(scoresX, getScores(full->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresX, getScores(multi->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresY, getScores(full->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresY, getScores(multi->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresA, getScores(full->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresA, getScores(multi->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, multi)); sort->setSort(newCollection(newLucene(L"int", SortField::INT))); checkSameValues(scoresX, getScores(full->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresX, getScores(multi->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresY, getScores(full->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresY, getScores(multi->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresA, getScores(full->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresA, getScores(multi->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, multi)); sort->setSort(newCollection(newLucene(L"double", SortField::DOUBLE))); checkSameValues(scoresX, getScores(full->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresX, getScores(multi->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresY, getScores(full->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresY, getScores(multi->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresA, getScores(full->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresA, getScores(multi->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, multi)); sort->setSort(newCollection(newLucene(L"string", SortField::STRING))); checkSameValues(scoresX, getScores(full->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresX, getScores(multi->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresY, getScores(full->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresY, getScores(multi->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresA, getScores(full->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresA, getScores(multi->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, multi)); sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"double", SortField::DOUBLE))); checkSameValues(scoresX, getScores(full->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresX, getScores(multi->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresY, getScores(full->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresY, getScores(multi->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresA, getScores(full->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresA, getScores(multi->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, multi)); sort->setSort(newCollection(newLucene(L"int", SortField::INT, true), newLucene(L"", SortField::DOC, true))); checkSameValues(scoresX, getScores(full->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresX, getScores(multi->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresY, getScores(full->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresY, getScores(multi->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresA, getScores(full->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresA, getScores(multi->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, multi)); sort->setSort(newCollection(newLucene(L"int", SortField::INT), newLucene(L"string", SortField::STRING))); checkSameValues(scoresX, getScores(full->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresX, getScores(multi->search(queryX, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresY, getScores(full->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresY, getScores(multi->search(queryY, FilterPtr(), 1000, sort)->scoreDocs, multi)); checkSameValues(scoresA, getScores(full->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, full)); checkSameValues(scoresA, getScores(multi->search(queryA, FilterPtr(), 1000, sort)->scoreDocs, multi)); } namespace TestTopDocsScores { class TopDocsFilter : public Filter { public: TopDocsFilter(TopDocsPtr docs) { this->docs = docs; } virtual ~TopDocsFilter() { } protected: TopDocsPtr docs; public: virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) { BitSetPtr bs = newLucene(reader->maxDoc()); bs->set((uint32_t)0, (uint32_t)reader->maxDoc()); bs->set(docs->scoreDocs[0]->doc); return newLucene(bs); } }; } BOOST_AUTO_TEST_CASE(testTopDocsScores) { SortPtr sort = newLucene(); int32_t numDocs = 10; // try to pick a query that will result in an unnormalized score greater than 1 to test for correct normalization TopDocsPtr docs1 = full->search(queryE, FilterPtr(), numDocs, sort); // a filter that only allows through the first hit FilterPtr filter = newLucene(docs1); TopDocsPtr docs2 = full->search(queryE, filter, numDocs, sort); BOOST_CHECK_CLOSE_FRACTION(docs1->scoreDocs[0]->score, docs2->scoreDocs[0]->score, 1e-6); } BOOST_AUTO_TEST_CASE(testSortWithoutFillFields) { Collection sort = newCollection(newLucene(SortField::FIELD_DOC()), newLucene()); for (int32_t i = 0; i < sort.size(); ++i) { QueryPtr q = newLucene(); TopDocsCollectorPtr tdc = TopFieldCollector::create(sort[i], 10, false, false, false, true); full->search(q, tdc); Collection sd = tdc->topDocs()->scoreDocs; for (int32_t j = 1; j < sd.size(); ++j) BOOST_CHECK_NE(sd[j]->doc, sd[j - 1]->doc); } } BOOST_AUTO_TEST_CASE(testSortWithoutScoreTracking) { // Two Sort criteria to instantiate the multi/single comparators. Collection sort = newCollection(newLucene(SortField::FIELD_DOC()), newLucene()); for (int32_t i = 0; i < sort.size(); ++i) { QueryPtr q = newLucene(); TopDocsCollectorPtr tdc = TopFieldCollector::create(sort[i], 10, true, false, false, true); full->search(q, tdc); TopDocsPtr td = tdc->topDocs(); Collection sd = td->scoreDocs; for (int32_t j = 1; j < sd.size(); ++j) BOOST_CHECK(MiscUtils::isNaN(sd[j]->score)); BOOST_CHECK(MiscUtils::isNaN(td->maxScore)); } } BOOST_AUTO_TEST_CASE(testSortWithScoreNoMaxScoreTracking) { // Two Sort criteria to instantiate the multi/single comparators. Collection sort = newCollection(newLucene(SortField::FIELD_DOC()), newLucene()); for (int32_t i = 0; i < sort.size(); ++i) { QueryPtr q = newLucene(); TopDocsCollectorPtr tdc = TopFieldCollector::create(sort[i], 10, true, true, false, true); full->search(q, tdc); TopDocsPtr td = tdc->topDocs(); Collection sd = td->scoreDocs; for (int32_t j = 1; j < sd.size(); ++j) BOOST_CHECK(!MiscUtils::isNaN(sd[j]->score)); BOOST_CHECK(MiscUtils::isNaN(td->maxScore)); } } BOOST_AUTO_TEST_CASE(testSortWithScoreAndMaxScoreTracking) { // Two Sort criteria to instantiate the multi/single comparators. Collection sort = newCollection(newLucene(SortField::FIELD_DOC()), newLucene()); for (int32_t i = 0; i < sort.size(); ++i) { QueryPtr q = newLucene(); TopDocsCollectorPtr tdc = TopFieldCollector::create(sort[i], 10, true, true, true, true); full->search(q, tdc); TopDocsPtr td = tdc->topDocs(); Collection sd = td->scoreDocs; for (int32_t j = 1; j < sd.size(); ++j) BOOST_CHECK(!MiscUtils::isNaN(sd[j]->score)); BOOST_CHECK(!MiscUtils::isNaN(td->maxScore)); } } BOOST_AUTO_TEST_CASE(testOutOfOrderDocsScoringSort) { // Two Sort criteria to instantiate the multi/single comparators. Collection sort = newCollection(newLucene(SortField::FIELD_DOC()), newLucene()); Collection< Collection > tfcOptions = newCollection< Collection >( newCollection(false, false, false), newCollection(false, false, true), newCollection(false, true, false), newCollection(false, true, true), newCollection(true, false, false), newCollection(true, false, true), newCollection(true, true, false), newCollection(true, true, true) ); Collection actualTFCClasses = newCollection( L"OutOfOrderOneComparatorNonScoringCollector", L"OutOfOrderOneComparatorScoringMaxScoreCollector", L"OutOfOrderOneComparatorScoringNoMaxScoreCollector", L"OutOfOrderOneComparatorScoringMaxScoreCollector", L"OutOfOrderOneComparatorNonScoringCollector", L"OutOfOrderOneComparatorScoringMaxScoreCollector", L"OutOfOrderOneComparatorScoringNoMaxScoreCollector", L"OutOfOrderOneComparatorScoringMaxScoreCollector" ); BooleanQueryPtr bq = newLucene(); // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 which delegates to // BS if there are no mandatory clauses. bq->add(newLucene(), BooleanClause::SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return the clause // instead of BQ. bq->setMinimumNumberShouldMatch(1); for (int32_t i = 0; i < sort.size(); ++i) { for (int32_t j = 0; j < tfcOptions.size(); ++j) { TopDocsCollectorPtr tdc = TopFieldCollector::create(sort[i], 10, tfcOptions[j][0] == 1, tfcOptions[j][1] == 1, tfcOptions[j][2] == 1, false); BOOST_CHECK_EQUAL(tdc->getClassName(), actualTFCClasses[j]); full->search(bq, tdc); TopDocsPtr td = tdc->topDocs(); Collection sd = td->scoreDocs; BOOST_CHECK_EQUAL(10, sd.size()); } } } BOOST_AUTO_TEST_CASE(testSortWithScoreAndMaxScoreTrackingNoResults) { // Two Sort criteria to instantiate the multi/single comparators. Collection sort = newCollection(newLucene(SortField::FIELD_DOC()), newLucene()); for (int32_t i = 0; i < sort.size(); ++i) { TopDocsCollectorPtr tdc = TopFieldCollector::create(sort[i], 10, true, true, true, true); TopDocsPtr td = tdc->topDocs(); BOOST_CHECK_EQUAL(0, td->totalHits); BOOST_CHECK(MiscUtils::isNaN(td->maxScore)); } } BOOST_AUTO_TEST_CASE(testSortWithStringNoException) { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 5; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"string", L"a" + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"string", L"b" + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); writer->addDocument (doc); } writer->optimize(); // enforce one segment to have a higher unique term count in all cases writer->close(); sort->setSort(newCollection(newLucene(L"string", SortField::STRING), SortField::FIELD_DOC())); // this should not throw IndexSearcherPtr is = newLucene(indexStore, true); is->search(newLucene(), FilterPtr(), 500, sort); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/SpanQueryFilterTest.cpp000066400000000000000000000053361217574114600247760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexReader.h" #include "SpanTermQuery.h" #include "Term.h" #include "SpanQueryFilter.h" #include "SpanFilterResult.h" #include "DocIdSet.h" #include "DocIdSetIterator.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SpanQueryFilterTest, LuceneTestFixture) static int32_t getDocIdSetSize(DocIdSetPtr docIdSet) { int32_t size = 0; DocIdSetIteratorPtr it = docIdSet->iterator(); while (it->nextDoc() != DocIdSetIterator::NO_MORE_DOCS) ++size; return size; } static void checkContainsDocId(DocIdSetPtr docIdSet, int32_t docId) { DocIdSetIteratorPtr it = docIdSet->iterator(); BOOST_CHECK_NE(it->advance(docId), DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK_EQUAL(it->docID(), docId); } BOOST_AUTO_TEST_CASE(testFilterWorks) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 500; ++i) { DocumentPtr document = newLucene(); document->add(newLucene(L"field", intToEnglish(i) + L" equals " + intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(document); } writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); SpanTermQueryPtr query = newLucene(newLucene(L"field", intToEnglish(10))); SpanQueryFilterPtr filter = newLucene(query); SpanFilterResultPtr result = filter->bitSpans(reader); DocIdSetPtr docIdSet = result->getDocIdSet(); BOOST_CHECK(docIdSet); checkContainsDocId(docIdSet, 10); Collection spans = result->getPositions(); BOOST_CHECK(spans); int32_t size = getDocIdSetSize(docIdSet); BOOST_CHECK_EQUAL(spans.size(), size); for (Collection::iterator info = spans.begin(); info != spans.end(); ++info) { BOOST_CHECK(*info); // The doc should indicate the bit is on checkContainsDocId(docIdSet, (*info)->getDoc()); // There should be two positions in each BOOST_CHECK_EQUAL((*info)->getPositions().size(), 2); } reader->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/TermRangeFilterTest.cpp000066400000000000000000000345051217574114600247330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTestRangeFilterFixture.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "TermRangeFilter.h" #include "TermQuery.h" #include "Term.h" #include "RAMDirectory.h" #include "Collator.h" #include "VariantUtils.h" using namespace Lucene; class TermRangeFilterFixture : public BaseTestRangeFilterFixture { public: virtual ~TermRangeFilterFixture() { } }; BOOST_FIXTURE_TEST_SUITE(TermRangeFilterTest, TermRangeFilterFixture) BOOST_AUTO_TEST_CASE(testRangeFilterId) { IndexReaderPtr reader = IndexReader::open(signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); int32_t medId = ((maxId - minId) / 2); String minIP = pad(minId); String maxIP = pad(maxId); String medIP = pad(medId); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); QueryPtr q = newLucene(newLucene(L"body", L"body")); // test id, bounded on both ends Collection result = search->search(q, newLucene(L"id", minIP, maxIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, newLucene(L"id", minIP, maxIP, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, newLucene(L"id", minIP, maxIP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, newLucene(L"id", minIP, maxIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); result = search->search(q, newLucene(L"id", medIP, maxIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + maxId - medId, result.size()); result = search->search(q, newLucene(L"id", minIP, medIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1 + medId - minId, result.size()); // unbounded id result = search->search(q, newLucene(L"id", minIP, VariantUtils::null(), true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, newLucene(L"id", VariantUtils::null(), maxIP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, newLucene(L"id", minIP, VariantUtils::null(), false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, newLucene(L"id", VariantUtils::null(), maxIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, newLucene(L"id", medIP, maxIP, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(maxId - medId, result.size()); result = search->search(q, newLucene(L"id", minIP, medIP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(medId - minId, result.size()); // very small sets result = search->search(q, newLucene(L"id", minIP, minIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, newLucene(L"id", medIP, medIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, newLucene(L"id", maxIP, maxIP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, newLucene(L"id", minIP, minIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, newLucene(L"id", VariantUtils::null(), minIP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, newLucene(L"id", maxIP, maxIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, newLucene(L"id", maxIP, VariantUtils::null(), true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, newLucene(L"id", medIP, medIP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_CASE(testRangeFilterIdCollating) { IndexReaderPtr reader = IndexReader::open(signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); CollatorPtr c = newLucene(std::locale()); int32_t medId = ((maxId - minId) / 2); String minIP = pad(minId); String maxIP = pad(maxId); String medIP = pad(medId); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); QueryPtr q = newLucene(newLucene(L"body", L"body")); // test id, bounded on both ends int32_t numHits = search->search(q, newLucene(L"id", minIP, maxIP, true, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs, numHits); numHits = search->search(q, newLucene(L"id", minIP, maxIP, true, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 1, numHits); numHits = search->search(q, newLucene(L"id", minIP, maxIP, false, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 1, numHits); numHits = search->search(q, newLucene(L"id", minIP, maxIP, false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 2, numHits); numHits = search->search(q, newLucene(L"id", medIP, maxIP, true, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1 + maxId - medId, numHits); numHits = search->search(q, newLucene(L"id", minIP, medIP, true, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1 + medId - minId, numHits); // unbounded id numHits = search->search(q, newLucene(L"id", minIP, VariantUtils::null(), true, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs, numHits); numHits = search->search(q, newLucene(L"id", VariantUtils::null(), maxIP, false, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs, numHits); numHits = search->search(q, newLucene(L"id", minIP, VariantUtils::null(), false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 1, numHits); numHits = search->search(q, newLucene(L"id", VariantUtils::null(), maxIP, false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 1, numHits); numHits = search->search(q, newLucene(L"id", medIP, maxIP, true, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(maxId - medId, numHits); numHits = search->search(q, newLucene(L"id", minIP, medIP, false, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(medId - minId, numHits); // very small sets numHits = search->search(q, newLucene(L"id", minIP, minIP, false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(0, numHits); numHits = search->search(q, newLucene(L"id", medIP, medIP, false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(0, numHits); numHits = search->search(q, newLucene(L"id", maxIP, maxIP, false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(0, numHits); numHits = search->search(q, newLucene(L"id", minIP, minIP, true, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1, numHits); numHits = search->search(q, newLucene(L"id", VariantUtils::null(), minIP, false, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1, numHits); numHits = search->search(q, newLucene(L"id", maxIP, maxIP, true, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1, numHits); numHits = search->search(q, newLucene(L"id", maxIP, VariantUtils::null(), true, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1, numHits); numHits = search->search(q, newLucene(L"id", medIP, medIP, true, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1, numHits); } BOOST_AUTO_TEST_CASE(testRangeFilterRand) { IndexReaderPtr reader = IndexReader::open(signedIndex->index, true); IndexSearcherPtr search = newLucene(reader); String minRP = pad(signedIndex->minR); String maxRP = pad(signedIndex->maxR); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); QueryPtr q = newLucene(newLucene(L"body", L"body")); // test extremes, bounded on both ends Collection result = search->search(q, newLucene(L"rand", minRP, maxRP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, newLucene(L"rand", minRP, maxRP, true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, newLucene(L"rand", minRP, maxRP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, newLucene(L"rand", minRP, maxRP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 2, result.size()); // unbounded result = search->search(q, newLucene(L"rand", minRP, VariantUtils::null(), true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, newLucene(L"rand", VariantUtils::null(), maxRP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs, result.size()); result = search->search(q, newLucene(L"rand", minRP, VariantUtils::null(), false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); result = search->search(q, newLucene(L"rand", VariantUtils::null(), maxRP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(numDocs - 1, result.size()); // very small sets result = search->search(q, newLucene(L"rand", minRP, minRP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, newLucene(L"rand", maxRP, maxRP, false, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(0, result.size()); result = search->search(q, newLucene(L"rand", minRP, minRP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, newLucene(L"rand", VariantUtils::null(), minRP, false, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, newLucene(L"rand", maxRP, maxRP, true, true), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); result = search->search(q, newLucene(L"rand", maxRP, VariantUtils::null(), true, false), numDocs)->scoreDocs; BOOST_CHECK_EQUAL(1, result.size()); } BOOST_AUTO_TEST_CASE(testRangeFilterRandCollating) { // using the unsigned index because collation seems to ignore hyphens IndexReaderPtr reader = IndexReader::open(unsignedIndex->index, true); IndexSearcherPtr search = newLucene(reader); CollatorPtr c = newLucene(std::locale()); String minRP = pad(unsignedIndex->minR); String maxRP = pad(unsignedIndex->maxR); int32_t numDocs = reader->numDocs(); BOOST_CHECK_EQUAL(numDocs, 1 + maxId - minId); QueryPtr q = newLucene(newLucene(L"body", L"body")); // test extremes, bounded on both ends int32_t numHits = search->search(q, newLucene(L"rand", minRP, maxRP, true, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs, numHits); numHits = search->search(q, newLucene(L"rand", minRP, maxRP, true, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 1, numHits); numHits = search->search(q, newLucene(L"rand", minRP, maxRP, false, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 1, numHits); numHits = search->search(q, newLucene(L"rand", minRP, maxRP, false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 2, numHits); // unbounded numHits = search->search(q, newLucene(L"rand", minRP, VariantUtils::null(), true, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs, numHits); numHits = search->search(q, newLucene(L"rand", VariantUtils::null(), maxRP, false, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs, numHits); numHits = search->search(q, newLucene(L"rand", minRP, VariantUtils::null(), false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 1, numHits); numHits = search->search(q, newLucene(L"rand", VariantUtils::null(), maxRP, false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(numDocs - 1, numHits); // very small sets numHits = search->search(q, newLucene(L"rand", minRP, minRP, false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(0, numHits); numHits = search->search(q, newLucene(L"rand", maxRP, maxRP, false, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(0, numHits); numHits = search->search(q, newLucene(L"rand", minRP, minRP, true, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1, numHits); numHits = search->search(q, newLucene(L"rand", VariantUtils::null(), minRP, false, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1, numHits); numHits = search->search(q, newLucene(L"rand", maxRP, maxRP, true, true, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1, numHits); numHits = search->search(q, newLucene(L"rand", maxRP, VariantUtils::null(), true, false, c), 1000)->totalHits; BOOST_CHECK_EQUAL(1, numHits); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/TermRangeQueryTest.cpp000066400000000000000000000260471217574114600246150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexSearcher.h" #include "TermRangeQuery.h" #include "WhitespaceAnalyzer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "Collator.h" #include "Tokenizer.h" #include "TermAttribute.h" #include "Reader.h" #include "VariantUtils.h" using namespace Lucene; class SingleCharTokenizer : public Tokenizer { public: SingleCharTokenizer(ReaderPtr r) : Tokenizer(r) { termAtt = addAttribute(); buffer = CharArray::newInstance(1); done = false; } virtual ~SingleCharTokenizer() { } public: CharArray buffer; bool done; TermAttributePtr termAtt; public: virtual bool incrementToken() { int32_t count = input->read(buffer.get(), 0, 1); if (done) return false; else { clearAttributes(); done = true; if (count == 1) { termAtt->termBuffer()[0] = buffer[0]; termAtt->setTermLength(1); } else termAtt->setTermLength(0); return true; } } virtual void reset(ReaderPtr input) { Tokenizer::reset(input); done = false; } }; class SingleCharAnalyzer : public Analyzer { public: virtual ~SingleCharAnalyzer() { } public: virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader) { TokenizerPtr tokenizer = boost::dynamic_pointer_cast(getPreviousTokenStream()); if (!tokenizer) { tokenizer = newLucene(reader); setPreviousTokenStream(tokenizer); } else tokenizer->reset(reader); return tokenizer; } virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { return newLucene(reader); } }; class TermRangeQueryFixture : public LuceneTestFixture { public: TermRangeQueryFixture() { docCount = 0; dir = newLucene(); } virtual ~TermRangeQueryFixture() { } protected: int32_t docCount; RAMDirectoryPtr dir; public: void initializeIndex(Collection values) { initializeIndex(values, newLucene()); } void initializeIndex(Collection values, AnalyzerPtr analyzer) { IndexWriterPtr writer = newLucene(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < values.size(); ++i) insertDoc(writer, values[i]); writer->close(); } void addDoc(const String& content) { IndexWriterPtr writer = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED); insertDoc(writer, content); writer->close(); } void insertDoc(IndexWriterPtr writer, const String& content) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", L"id" + StringUtils::toString(docCount), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"content", content, Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); ++docCount; } }; BOOST_FIXTURE_TEST_SUITE(TermRangeQueryTest, TermRangeQueryFixture) BOOST_AUTO_TEST_CASE(testExclusive) { QueryPtr query = newLucene(L"content", L"A", L"C", false, false); initializeIndex(newCollection(L"A", L"B", L"C", L"D")); IndexSearcherPtr searcher = newLucene(dir, true); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); searcher->close(); initializeIndex(newCollection(L"A", L"B", L"D")); searcher = newLucene(dir, true); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); searcher->close(); addDoc(L"C"); searcher = newLucene(dir, true); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); searcher->close(); } BOOST_AUTO_TEST_CASE(testInclusive) { QueryPtr query = newLucene(L"content", L"A", L"C", true, true); initializeIndex(newCollection(L"A", L"B", L"C", L"D")); IndexSearcherPtr searcher = newLucene(dir, true); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); searcher->close(); initializeIndex(newCollection(L"A", L"B", L"D")); searcher = newLucene(dir, true); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); searcher->close(); addDoc(L"C"); searcher = newLucene(dir, true); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); searcher->close(); } BOOST_AUTO_TEST_CASE(testEqualsHashcode) { QueryPtr query = newLucene(L"content", L"A", L"C", true, true); query->setBoost(1.0); QueryPtr other = newLucene(L"content", L"A", L"C", true, true); other->setBoost(1.0); BOOST_CHECK(query->equals(query)); BOOST_CHECK(query->equals(other)); BOOST_CHECK_EQUAL(query->hashCode(), other->hashCode()); other->setBoost(2.0); BOOST_CHECK(!query->equals(other)); other = newLucene(L"notcontent", L"A", L"C", true, true); BOOST_CHECK(!query->equals(other)); other = newLucene(L"content", L"X", L"C", true, true); BOOST_CHECK(!query->equals(other)); other = newLucene(L"content", L"A", L"Z", true, true); BOOST_CHECK(!query->equals(other)); query = newLucene(L"content", L"", L"C", true, true); other = newLucene(L"content", L"", L"C", true, true); BOOST_CHECK(query->equals(other)); BOOST_CHECK_EQUAL(query->hashCode(), other->hashCode()); query = newLucene(L"content", L"C", L"", true, true); other = newLucene(L"content", L"C", L"", true, true); BOOST_CHECK(query->equals(other)); BOOST_CHECK_EQUAL(query->hashCode(), other->hashCode()); query = newLucene(L"content", L"", L"C", true, true); other = newLucene(L"content", L"C", L"", true, true); BOOST_CHECK(!query->equals(other)); query = newLucene(L"content", L"A", L"C", false, false); other = newLucene(L"content", L"A", L"C", true, true); BOOST_CHECK(!query->equals(other)); query = newLucene(L"content", L"A", L"C", false, false); other = newLucene(L"content", L"A", L"C", false, false, newLucene(std::locale())); BOOST_CHECK(!query->equals(other)); } BOOST_AUTO_TEST_CASE(testExclusiveCollating) { QueryPtr query = newLucene(L"content", L"A", L"C", false, false, newLucene(std::locale())); initializeIndex(newCollection(L"A", L"B", L"C", L"D")); IndexSearcherPtr searcher = newLucene(dir, true); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); searcher->close(); initializeIndex(newCollection(L"A", L"B", L"D")); searcher = newLucene(dir, true); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); searcher->close(); addDoc(L"C"); searcher = newLucene(dir, true); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); searcher->close(); } BOOST_AUTO_TEST_CASE(testInclusiveCollating) { QueryPtr query = newLucene(L"content", L"A", L"C", true, true, newLucene(std::locale())); initializeIndex(newCollection(L"A", L"B", L"C", L"D")); IndexSearcherPtr searcher = newLucene(dir, true); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); searcher->close(); initializeIndex(newCollection(L"A", L"B", L"D")); searcher = newLucene(dir, true); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(2, hits.size()); searcher->close(); addDoc(L"C"); searcher = newLucene(dir, true); hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(3, hits.size()); searcher->close(); } BOOST_AUTO_TEST_CASE(testExclusiveLowerNull) { AnalyzerPtr analyzer = newLucene(); QueryPtr query = newLucene(L"content", VariantUtils::null(), L"C", false, false); initializeIndex(newCollection(L"A", L"B", L"", L"C", L"D"), analyzer); IndexSearcherPtr searcher = newLucene(dir, true); int32_t numHits = searcher->search(query, FilterPtr(), 1000)->totalHits; BOOST_CHECK_EQUAL(3, numHits); searcher->close(); initializeIndex(newCollection(L"A", L"B", L"", L"D"), analyzer); searcher = newLucene(dir, true); numHits = searcher->search(query, FilterPtr(), 1000)->totalHits; BOOST_CHECK_EQUAL(3, numHits); searcher->close(); addDoc(L"C"); searcher = newLucene(dir, true); numHits = searcher->search(query, FilterPtr(), 1000)->totalHits; BOOST_CHECK_EQUAL(3, numHits); searcher->close(); } BOOST_AUTO_TEST_CASE(testInclusiveLowerNull) { AnalyzerPtr analyzer = newLucene(); QueryPtr query = newLucene(L"content", VariantUtils::null(), L"C", true, true); initializeIndex(newCollection(L"A", L"B", L"", L"C", L"D"), analyzer); IndexSearcherPtr searcher = newLucene(dir, true); int32_t numHits = searcher->search(query, FilterPtr(), 1000)->totalHits; BOOST_CHECK_EQUAL(4, numHits); searcher->close(); initializeIndex(newCollection(L"A", L"B", L"", L"D"), analyzer); searcher = newLucene(dir, true); numHits = searcher->search(query, FilterPtr(), 1000)->totalHits; BOOST_CHECK_EQUAL(3, numHits); searcher->close(); addDoc(L"C"); searcher = newLucene(dir, true); numHits = searcher->search(query, FilterPtr(), 1000)->totalHits; BOOST_CHECK_EQUAL(4, numHits); searcher->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/TermScorerTest.cpp000066400000000000000000000122351217574114600237620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "TermQuery.h" #include "Term.h" #include "Weight.h" #include "TermScorer.h" #include "IndexSearcher.h" #include "IndexReader.h" #include "Collector.h" #include "DocIdSetIterator.h" using namespace Lucene; DECLARE_SHARED_PTR(TestHit) class TestHit : public LuceneObject { public: TestHit(int32_t doc, double score) { this->doc = doc; this->score = score; } virtual ~TestHit() { } public: int32_t doc; double score; public: virtual String toString() { return L"TestHit{doc=" + StringUtils::toString(doc) + L", score=" + StringUtils::toString(score) + L"}"; } }; class TermScorerFixture : public LuceneTestFixture { public: TermScorerFixture() { values = newCollection(L"all", L"dogs dogs", L"like", L"playing", L"fetch", L"all"); directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < values.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD, values[i], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); indexSearcher = newLucene(directory, false); indexReader = indexSearcher->getIndexReader(); } virtual ~TermScorerFixture() { } protected: static const String FIELD; RAMDirectoryPtr directory; Collection values; IndexSearcherPtr indexSearcher; IndexReaderPtr indexReader; }; const String TermScorerFixture::FIELD = L"field"; BOOST_FIXTURE_TEST_SUITE(TermScorerTest, TermScorerFixture) namespace TestTermScorer { class TestCollector : public Collector { public: TestCollector(Collection docs) { this->docs = docs; this->base = 0; } virtual ~TestCollector() { } protected: int32_t base; ScorerPtr scorer; Collection docs; public: virtual void setScorer(ScorerPtr scorer) { this->scorer = scorer; } virtual void collect(int32_t doc) { double score = scorer->score(); doc = doc + base; docs.add(newLucene(doc, score)); BOOST_CHECK(score > 0); BOOST_CHECK(doc == 0 || doc == 5); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { base = docBase; } virtual bool acceptsDocsOutOfOrder() { return true; } }; } BOOST_AUTO_TEST_CASE(testTermScorer) { TermPtr allTerm = newLucene(FIELD, L"all"); TermQueryPtr termQuery = newLucene(allTerm); WeightPtr weight = termQuery->weight(indexSearcher); TermScorerPtr ts = newLucene(weight, indexReader->termDocs(allTerm), indexSearcher->getSimilarity(), indexReader->norms(FIELD)); // we have 2 documents with the term all in them, one document for all the other values Collection docs = Collection::newInstance(); ts->score(newLucene(docs)); BOOST_CHECK_EQUAL(docs.size(), 2); BOOST_CHECK_EQUAL(docs[0]->score, docs[1]->score); BOOST_CHECK_CLOSE_FRACTION(docs[0]->score, 1.6931472, 0.000001); } BOOST_AUTO_TEST_CASE(testNext) { TermPtr allTerm = newLucene(FIELD, L"all"); TermQueryPtr termQuery = newLucene(allTerm); WeightPtr weight = termQuery->weight(indexSearcher); TermScorerPtr ts = newLucene(weight, indexReader->termDocs(allTerm), indexSearcher->getSimilarity(), indexReader->norms(FIELD)); BOOST_CHECK_NE(ts->nextDoc(), DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK_CLOSE_FRACTION(ts->score(), 1.6931472, 0.000001); BOOST_CHECK_NE(ts->nextDoc(), DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK_CLOSE_FRACTION(ts->score(), 1.6931472, 0.000001); BOOST_CHECK_EQUAL(ts->nextDoc(), DocIdSetIterator::NO_MORE_DOCS); } BOOST_AUTO_TEST_CASE(testSkipTo) { TermPtr allTerm = newLucene(FIELD, L"all"); TermQueryPtr termQuery = newLucene(allTerm); WeightPtr weight = termQuery->weight(indexSearcher); TermScorerPtr ts = newLucene(weight, indexReader->termDocs(allTerm), indexSearcher->getSimilarity(), indexReader->norms(FIELD)); BOOST_CHECK_NE(ts->advance(3), DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK_EQUAL(ts->docID(), 5); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/TermVectorsTest.cpp000066400000000000000000000400221217574114600241450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "Term.h" #include "TermFreqVector.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "IndexReader.h" #include "TermPositionVector.h" #include "SortedTermVectorMapper.h" #include "TermVectorEntryFreqSortedComparator.h" #include "FieldSortedTermVectorMapper.h" #include "TermEnum.h" #include "TermDocs.h" #include "Similarity.h" #include "TermVectorEntry.h" #include "TermVectorOffsetInfo.h" using namespace Lucene; class TermVectorsFixture : public LuceneTestFixture { public: TermVectorsFixture() { directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 1000; ++i) { DocumentPtr doc = newLucene(); Field::TermVector termVector; int32_t mod3 = i % 3; int32_t mod2 = i % 2; if (mod2 == 0 && mod3 == 0) termVector = Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS; else if (mod2 == 0) termVector = Field::TERM_VECTOR_WITH_POSITIONS; else if (mod3 == 0) termVector = Field::TERM_VECTOR_WITH_OFFSETS; else termVector = Field::TERM_VECTOR_YES; doc->add(newLucene(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED, termVector)); writer->addDocument(doc); } writer->close(); searcher = newLucene(directory, true); } virtual ~TermVectorsFixture() { } protected: IndexSearcherPtr searcher; DirectoryPtr directory; public: void setupDoc(DocumentPtr doc, const String& text) { doc->add(newLucene(L"field2", text, Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"field", text, Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); } }; BOOST_FIXTURE_TEST_SUITE(TermVectorsTest, TermVectorsFixture) BOOST_AUTO_TEST_CASE(testTermVectors) { QueryPtr query = newLucene(newLucene(L"field", L"seventy")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(100, hits.size()); for (int32_t i = 0; i < hits.size(); ++i) { Collection vector = searcher->reader->getTermFreqVectors(hits[i]->doc); BOOST_CHECK(vector); BOOST_CHECK_EQUAL(vector.size(), 1); } } BOOST_AUTO_TEST_CASE(testTermVectorsFieldOrder) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"c", L"some content here", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"a", L"some content here", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"b", L"some content here", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"x", L"some content here", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); Collection v = reader->getTermFreqVectors(0); BOOST_CHECK_EQUAL(4, v.size()); Collection expectedFields = newCollection(L"a", L"b", L"c", L"x"); Collection expectedPositions = newCollection(1, 2, 0); for (int32_t i = 0; i < v.size(); ++i) { TermPositionVectorPtr posVec = boost::dynamic_pointer_cast(v[i]); BOOST_CHECK_EQUAL(expectedFields[i], posVec->getField()); Collection terms = posVec->getTerms(); BOOST_CHECK_EQUAL(3, terms.size()); BOOST_CHECK_EQUAL(L"content", terms[0]); BOOST_CHECK_EQUAL(L"here", terms[1]); BOOST_CHECK_EQUAL(L"some", terms[2]); for (int32_t j = 0; j < 3; ++j) { Collection positions = posVec->getTermPositions(j); BOOST_CHECK_EQUAL(1, positions.size()); BOOST_CHECK_EQUAL(expectedPositions[j], positions[0]); } } } BOOST_AUTO_TEST_CASE(testTermPositionVectors) { QueryPtr query = newLucene(newLucene(L"field", L"zero")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); for (int32_t i = 0; i < hits.size(); ++i) { Collection vector = searcher->reader->getTermFreqVectors(hits[i]->doc); BOOST_CHECK(vector); BOOST_CHECK_EQUAL(vector.size(), 1); bool shouldBePosVector = (hits[i]->doc % 2 == 0); BOOST_CHECK(!shouldBePosVector || (shouldBePosVector && boost::dynamic_pointer_cast(vector[0]))); bool shouldBeOffVector = (hits[i]->doc % 3 == 0); BOOST_CHECK(!shouldBeOffVector || (shouldBeOffVector && boost::dynamic_pointer_cast(vector[0]))); if (shouldBePosVector || shouldBeOffVector) { TermPositionVectorPtr posVec = boost::dynamic_pointer_cast(vector[0]); Collection terms = posVec->getTerms(); BOOST_CHECK(terms && !terms.empty()); for (int32_t j = 0; j < terms.size(); ++j) { Collection positions = posVec->getTermPositions(j); Collection offsets = posVec->getOffsets(j); if (shouldBePosVector) { BOOST_CHECK(positions); BOOST_CHECK(!positions.empty()); } else BOOST_CHECK(!positions); if (shouldBeOffVector) { BOOST_CHECK(offsets); BOOST_CHECK(!offsets.empty()); } else BOOST_CHECK(!offsets); } } else { BOOST_CHECK(!boost::dynamic_pointer_cast(vector[0])); TermFreqVectorPtr freqVec = boost::dynamic_pointer_cast(vector[0]); Collection terms = freqVec->getTerms(); BOOST_CHECK(terms && !terms.empty()); } } } BOOST_AUTO_TEST_CASE(testTermOffsetVectors) { QueryPtr query = newLucene(newLucene(L"field", L"fifty")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(100, hits.size()); for (int32_t i = 0; i < hits.size(); ++i) { Collection vector = searcher->reader->getTermFreqVectors(hits[i]->doc); BOOST_CHECK(vector); BOOST_CHECK_EQUAL(vector.size(), 1); } } BOOST_AUTO_TEST_CASE(testKnownSetOfDocuments) { String test1 = L"eating chocolate in a computer lab"; // 6 terms String test2 = L"computer in a computer lab"; // 5 terms String test3 = L"a chocolate lab grows old"; // 5 terms String test4 = L"eating chocolate with a chocolate lab in an old chocolate colored computer lab"; // 13 terms MapStringInt test4Map = MapStringInt::newInstance(); test4Map.put(L"chocolate", 3); test4Map.put(L"lab", 2); test4Map.put(L"eating", 1); test4Map.put(L"computer", 1); test4Map.put(L"with", 1); test4Map.put(L"a", 1); test4Map.put(L"colored", 1); test4Map.put(L"in", 1); test4Map.put(L"an", 1); test4Map.put(L"computer", 1); test4Map.put(L"old", 1); DocumentPtr testDoc1 = newLucene(); setupDoc(testDoc1, test1); DocumentPtr testDoc2 = newLucene(); setupDoc(testDoc2, test2); DocumentPtr testDoc3 = newLucene(); setupDoc(testDoc3, test3); DocumentPtr testDoc4 = newLucene(); setupDoc(testDoc4, test4); DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); BOOST_CHECK(writer); writer->addDocument(testDoc1); writer->addDocument(testDoc2); writer->addDocument(testDoc3); writer->addDocument(testDoc4); writer->close(); IndexSearcherPtr knownSearcher = newLucene(dir, true); TermEnumPtr termEnum = knownSearcher->reader->terms(); TermDocsPtr termDocs = knownSearcher->reader->termDocs(); SimilarityPtr sim = knownSearcher->getSimilarity(); while (termEnum->next()) { TermPtr term = termEnum->term(); termDocs->seek(term); while (termDocs->next()) { int32_t docId = termDocs->doc(); int32_t freq = termDocs->freq(); TermFreqVectorPtr vector = knownSearcher->reader->getTermFreqVector(docId, L"field"); double tf = sim->tf(freq); double idf = sim->idf(knownSearcher->docFreq(term), knownSearcher->maxDoc()); // This is fine since we don't have stop words double lNorm = sim->lengthNorm(L"field", vector->getTerms().size()); BOOST_CHECK(vector); Collection vTerms = vector->getTerms(); Collection freqs = vector->getTermFrequencies(); for (int32_t i = 0; i < vTerms.size(); ++i) { if (term->text() == vTerms[i]) BOOST_CHECK_EQUAL(freqs[i], freq); } } } QueryPtr query = newLucene(newLucene(L"field", L"chocolate")); Collection hits = knownSearcher->search(query, FilterPtr(), 1000)->scoreDocs; // doc 3 should be the first hit because it is the shortest match BOOST_CHECK_EQUAL(hits.size(), 3); double score = hits[0]->score; BOOST_CHECK_EQUAL(hits[0]->doc, 2); BOOST_CHECK_EQUAL(hits[1]->doc, 3); BOOST_CHECK_EQUAL(hits[2]->doc, 0); TermFreqVectorPtr vector = knownSearcher->reader->getTermFreqVector(hits[1]->doc, L"field"); BOOST_CHECK(vector); Collection terms = vector->getTerms(); Collection freqs = vector->getTermFrequencies(); BOOST_CHECK(terms && terms.size() == 10); for (int32_t i = 0; i < terms.size(); ++i) { String term = terms[i]; int32_t freq = freqs[i]; BOOST_CHECK(test4.find(term) != String::npos); BOOST_CHECK(test4Map.contains(term)); BOOST_CHECK_EQUAL(test4Map[term], freq); } SortedTermVectorMapperPtr mapper = newLucene(TermVectorEntryFreqSortedComparator::compare); knownSearcher->reader->getTermFreqVector(hits[1]->doc, mapper); Collection vectorEntrySet = mapper->getTermVectorEntrySet(); BOOST_CHECK_EQUAL(vectorEntrySet.size(), 10); TermVectorEntryPtr last; for (Collection::iterator tve = vectorEntrySet.begin(); tve != vectorEntrySet.end(); ++tve) { if (*tve && last) { BOOST_CHECK(last->getFrequency() >= (*tve)->getFrequency()); int32_t expectedFreq = test4Map.get((*tve)->getTerm()); // we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields BOOST_CHECK_EQUAL((*tve)->getFrequency(), 2 * expectedFreq); } last = *tve; } FieldSortedTermVectorMapperPtr fieldMapper = newLucene(TermVectorEntryFreqSortedComparator::compare); knownSearcher->reader->getTermFreqVector(hits[1]->doc, fieldMapper); MapStringCollectionTermVectorEntry map = fieldMapper->getFieldToTerms(); BOOST_CHECK_EQUAL(map.size(), 2); vectorEntrySet = map.get(L"field"); BOOST_CHECK(vectorEntrySet); BOOST_CHECK_EQUAL(vectorEntrySet.size(), 10); knownSearcher->close(); } /// Test only a few docs having vectors BOOST_AUTO_TEST_CASE(testRareVectors) { IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 100; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO)); writer->addDocument(doc); } for (int32_t i = 0; i < 10; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", intToEnglish(100 + i), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); } writer->close(); searcher = newLucene(directory, true); QueryPtr query = newLucene(newLucene(L"field", L"hundred")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(10, hits.size()); for (int32_t i = 0; i < hits.size(); ++i) { Collection vector = searcher->reader->getTermFreqVectors(hits[i]->doc); BOOST_CHECK(vector); BOOST_CHECK_EQUAL(vector.size(), 1); } } /// In a single doc, for the same field, mix the term vectors up BOOST_AUTO_TEST_CASE(testMixedVectrosVectors) { IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", L"one", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_NO)); doc->add(newLucene(L"field", L"one", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_YES)); doc->add(newLucene(L"field", L"one", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS)); doc->add(newLucene(L"field", L"one", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_OFFSETS)); doc->add(newLucene(L"field", L"one", Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); writer->addDocument(doc); writer->close(); searcher = newLucene(directory, true); QueryPtr query = newLucene(newLucene(L"field", L"one")); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); Collection vector = searcher->reader->getTermFreqVectors(hits[0]->doc); BOOST_CHECK(vector); BOOST_CHECK_EQUAL(vector.size(), 1); TermPositionVectorPtr tfv = boost::dynamic_pointer_cast(vector[0]); BOOST_CHECK_EQUAL(tfv->getField(), L"field"); Collection terms = tfv->getTerms(); BOOST_CHECK_EQUAL(1, terms.size()); BOOST_CHECK_EQUAL(terms[0], L"one"); BOOST_CHECK_EQUAL(5, tfv->getTermFrequencies()[0]); Collection positions = tfv->getTermPositions(0); BOOST_CHECK_EQUAL(5, positions.size()); for (int32_t i = 0; i < 5; ++i) BOOST_CHECK_EQUAL(i, positions[i]); Collection offsets = tfv->getOffsets(0); BOOST_CHECK_EQUAL(5, offsets.size()); for (int32_t i = 0; i < 5; ++i) { BOOST_CHECK_EQUAL(4 * i, offsets[i]->getStartOffset()); BOOST_CHECK_EQUAL(4 * i + 3, offsets[i]->getEndOffset()); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/ThreadSafeTest.cpp000066400000000000000000000112231217574114600236770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "LuceneThread.h" #include "IndexReader.h" #include "Random.h" #include "FieldSelector.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" using namespace Lucene; class TestFieldSelector : public FieldSelector { public: TestFieldSelector(RandomPtr rand) { this->rand = rand; } virtual ~TestFieldSelector() { } protected: RandomPtr rand; public: virtual FieldSelectorResult accept(const String& fieldName) { switch (rand->nextInt(2)) { case 0: return FieldSelector::SELECTOR_LAZY_LOAD; case 1: return FieldSelector::SELECTOR_LOAD; default: return FieldSelector::SELECTOR_LOAD; } } }; class TestThread : public LuceneThread { public: TestThread(int32_t iter, RandomPtr rand, IndexReaderPtr reader) { this->iter = iter; this->rand = rand; this->reader = reader; } virtual ~TestThread() { } LUCENE_CLASS(TestThread); protected: IndexReaderPtr reader; int32_t iter; RandomPtr rand; public: virtual void run() { try { for (int32_t i = 0; i < iter; ++i) loadDoc(); } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } void loadDoc() { DocumentPtr doc = reader->document(rand->nextInt(reader->maxDoc()), newLucene(rand)); Collection fields = doc->getFields(); for (int32_t i = 0; i < fields.size(); ++i) validateField(fields[i]); } void validateField(FieldablePtr f) { String val = f->stringValue(); if (!boost::starts_with(val, L"^") || !boost::ends_with(val, L"$")) BOOST_FAIL("Invalid field"); } }; class ThreadSafeFixture : public LuceneTestFixture { public: ThreadSafeFixture() { r = newLucene(17); dir = newLucene(); words = StringUtils::split(L"now is the time for all good men to come to the aid of their country", L" "); } virtual ~ThreadSafeFixture() { } public: RandomPtr r; DirectoryPtr dir; IndexReaderPtr reader; Collection words; public: void buildDir(DirectoryPtr dir, int32_t numDocs, int32_t maxFields, int32_t maxFieldLen) { IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(10); for (int32_t j = 0; j < numDocs; ++j) { DocumentPtr doc = newLucene(); int32_t numFields = r->nextInt(maxFields); for (int32_t i = 0; i < numFields; ++i) { int32_t flen = r->nextInt(maxFieldLen); StringStream buf; buf << L"^ "; while ((int32_t)buf.str().length() < flen) buf << L" " << words[r->nextInt(words.size())]; buf << L" $"; doc->add(newLucene(L"f" + StringUtils::toString(i), buf.str(), Field::STORE_YES, Field::INDEX_ANALYZED)); } writer->addDocument(doc); } writer->close(); } void doTest(int32_t iter, int32_t numThreads) { Collection threads = Collection::newInstance(numThreads); for (int32_t i = 0; i < numThreads; ++i) { threads[i] = newLucene(iter, newLucene(r->nextInt()), reader); threads[i]->start(); } for (int32_t i = 0; i < numThreads; ++i) threads[i]->join(); } }; BOOST_FIXTURE_TEST_SUITE(ThreadSafeTest, ThreadSafeFixture) BOOST_AUTO_TEST_CASE(testLazyLoadThreadSafety) { // test with field sizes bigger than the buffer of an index input buildDir(dir, 15, 5, 2000); // do many small tests so the thread locals go away in between for (int32_t i = 0; i < 100; ++i) { reader = IndexReader::open(dir, false); doTest(10, 100); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/TimeLimitingCollectorTest.cpp000066400000000000000000000315131217574114600261370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "LuceneThread.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "IndexSearcher.h" #include "QueryParser.h" #include "Document.h" #include "Field.h" #include "Collector.h" #include "BitSet.h" #include "TimeLimitingCollector.h" using namespace Lucene; DECLARE_SHARED_PTR(MyHitCollector) /// counting collector that can slow down at collect(). class MyHitCollector : public Collector { public: MyHitCollector() { bits = newLucene(); slowdown = 0; lastDocCollected = -1; docBase = 0; } virtual ~MyHitCollector() { } protected: BitSetPtr bits; int32_t slowdown; int32_t lastDocCollected; int32_t docBase; public: /// amount of time to wait on each collect to simulate a long iteration void setSlowDown(int32_t milliseconds) { slowdown = milliseconds; } int32_t hitCount() { return bits->cardinality(); } int32_t getLastDocCollected() { return lastDocCollected; } virtual void setScorer(ScorerPtr scorer) { // scorer is not needed } virtual void collect(int32_t doc) { int32_t docId = doc + docBase; if (slowdown > 0) LuceneThread::threadSleep(slowdown); if (docId < 0) BOOST_FAIL("Invalid doc"); bits->set(docId); lastDocCollected = docId; } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { this->docBase = docBase; } virtual bool acceptsDocsOutOfOrder() { return false; } }; class TimeLimitingCollectorFixture; class TimeLimitingThread : public LuceneThread { public: TimeLimitingThread(bool withTimeout, TimeLimitingCollectorFixture* fixture); virtual ~TimeLimitingThread() { } LUCENE_CLASS(TimeLimitingThread); protected: bool withTimeout; TimeLimitingCollectorFixture* fixture; public: virtual void run(); }; /// Tests the {@link TimeLimitingCollector}. /// This test checks (1) search correctness (regardless of timeout), (2) expected timeout behaviour, and (3) a sanity test with multiple searching threads. class TimeLimitingCollectorFixture : public LuceneTestFixture { public: TimeLimitingCollectorFixture() { Collection docText = newCollection( L"docThatNeverMatchesSoWeCanRequireLastDocCollectedToBeGreaterThanZero", L"one blah three", L"one foo three multiOne", L"one foobar three multiThree", L"blueberry pancakes", L"blueberry pie", L"blueberry strudel", L"blueberry pizza" ); DirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < N_DOCS; ++i) add(docText[i % docText.size()], writer); writer->close(); searcher = newLucene(directory, true); String qtxt = L"one"; // start from 1, so that the 0th doc never matches for (int32_t i = 1; i < docText.size(); ++i) qtxt += L" " + docText[i]; // large query so that search will be longer QueryParserPtr queryParser = newLucene(LuceneVersion::LUCENE_CURRENT, FIELD_NAME, newLucene()); query = queryParser->parse(qtxt); // warm the searcher searcher->search(query, FilterPtr(), 1000); } virtual ~TimeLimitingCollectorFixture() { searcher->close(); TimeLimitingCollector::setResolution(TimeLimitingCollector::DEFAULT_RESOLUTION); TimeLimitingCollector::stopTimer(); } protected: static const int32_t SLOW_DOWN; static const int64_t TIME_ALLOWED; // so searches can find about 17 docs. // max time allowed is relaxed for multi-threading tests. // the multi-thread case fails when setting this to 1 (no slack) and launching many threads (>2000). // but this is not a real failure, just noise. static const int32_t MULTI_THREAD_SLACK; static const int32_t N_DOCS; static const int32_t N_THREADS; static const String FIELD_NAME; SearcherPtr searcher; QueryPtr query; public: void add(const String& value, IndexWriterPtr writer) { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD_NAME, value, Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } void doTestSearch() { int32_t totalResults = 0; int32_t totalTLCResults = 0; MyHitCollectorPtr myHc = newLucene(); search(myHc); totalResults = myHc->hitCount(); myHc = newLucene(); int64_t oneHour = 3600000; CollectorPtr tlCollector = createTimedCollector(myHc, oneHour, false); search(tlCollector); totalTLCResults = myHc->hitCount(); if (totalResults != totalTLCResults) BOOST_CHECK_EQUAL(totalResults, totalTLCResults); } void doTestTimeout(bool multiThreaded, bool greedy) { MyHitCollectorPtr myHc = newLucene(); myHc->setSlowDown(SLOW_DOWN); CollectorPtr tlCollector = createTimedCollector(myHc, TIME_ALLOWED, greedy); TimeExceededException timoutException; // search try { search(tlCollector); } catch (TimeExceededException& e) { timoutException = e; } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } // must get exception if (timoutException.isNull()) BOOST_CHECK(!timoutException.isNull()); String message = timoutException.getError(); String::size_type last = message.find_last_of(L":"); if (last == String::npos) BOOST_CHECK_NE(last, String::npos); // greediness affect last doc collected int32_t exceptionDoc = StringUtils::toInt(message.substr(last + 1)); int32_t lastCollected = myHc->getLastDocCollected(); if (exceptionDoc <= 0) BOOST_CHECK(exceptionDoc > 0); if (greedy) { if (exceptionDoc != lastCollected) BOOST_CHECK_EQUAL(exceptionDoc, lastCollected); if (myHc->hitCount() <= 0) BOOST_CHECK(myHc->hitCount() > 0); } else if (exceptionDoc <= lastCollected) BOOST_CHECK(exceptionDoc > lastCollected); String::size_type allowed = message.find_first_of(L":"); if (allowed == String::npos) BOOST_CHECK_NE(allowed, String::npos); int32_t timeAllowed = StringUtils::toInt(message.substr(allowed + 1)); String::size_type elapsed = message.find_first_of(L":", allowed + 1); if (elapsed == String::npos) BOOST_CHECK_NE(elapsed, String::npos); int32_t timeElapsed = StringUtils::toInt(message.substr(elapsed + 1)); // verify that elapsed time at exception is within valid limits if (timeAllowed != TIME_ALLOWED) BOOST_CHECK_EQUAL(timeAllowed, TIME_ALLOWED); // a) Not too early if (timeElapsed <= TIME_ALLOWED - TimeLimitingCollector::getResolution()) BOOST_CHECK(timeElapsed > TIME_ALLOWED - TimeLimitingCollector::getResolution()); // b) Not too late. // This part is problematic in a busy test system, so we just print a warning. // We already verified that a timeout occurred, we just can't be picky about how long it took. if (timeElapsed > maxTime(multiThreaded)) { BOOST_TEST_MESSAGE("Informative: timeout exceeded (no action required: most probably just " << "because the test machine is slower than usual): " << "lastDoc = " << exceptionDoc << ", && allowed = " << timeAllowed << ", && elapsed = " << timeElapsed << " >= " << StringUtils::toUTF8(maxTimeStr(multiThreaded))); } } void doTestMultiThreads(bool withTimeout) { Collection threads = Collection::newInstance(N_THREADS); for (int32_t i = 0; i < threads.size(); ++i) { threads[i] = newLucene(withTimeout, this); threads[i]->start(); } for (int32_t i = 0; i < threads.size(); ++i) threads[i]->join(); } int64_t maxTime(bool multiThreaded) { int64_t res = 2 * TimeLimitingCollector::getResolution() + TIME_ALLOWED + SLOW_DOWN; // some slack for less noise in this test if (multiThreaded) res *= (int64_t)MULTI_THREAD_SLACK; // larger slack return res; } String maxTimeStr(bool multiThreaded) { StringStream buf; buf << L"( 2 * resolution + TIME_ALLOWED + SLOW_DOWN = 2 * " << TimeLimitingCollector::getResolution() << L" + " << TIME_ALLOWED << L" + " << SLOW_DOWN << L")"; if (multiThreaded) buf << L" * " << MULTI_THREAD_SLACK; return StringUtils::toString(maxTime(multiThreaded)) + L" = " + buf.str(); } CollectorPtr createTimedCollector(MyHitCollectorPtr hc, int64_t timeAllowed, bool greedy) { TimeLimitingCollectorPtr res = newLucene(hc, timeAllowed); res->setGreedy(greedy); // set to true to make sure at least one doc is collected. return res; } void search(CollectorPtr collector) { searcher->search(query, collector); } }; TimeLimitingThread::TimeLimitingThread(bool withTimeout, TimeLimitingCollectorFixture* fixture) { this->withTimeout = withTimeout; this->fixture = fixture; } void TimeLimitingThread::run() { try { if (withTimeout) fixture->doTestTimeout(true, true); else fixture->doTestSearch(); } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } } const int32_t TimeLimitingCollectorFixture::SLOW_DOWN = 47; const int64_t TimeLimitingCollectorFixture::TIME_ALLOWED = 17 * TimeLimitingCollectorFixture::SLOW_DOWN; // so searches can find about 17 docs. // max time allowed is relaxed for multi-threading tests. // the multi-thread case fails when setting this to 1 (no slack) and launching many threads (>2000). // but this is not a real failure, just noise. const int32_t TimeLimitingCollectorFixture::MULTI_THREAD_SLACK = 7; const int32_t TimeLimitingCollectorFixture::N_DOCS = 3000; const int32_t TimeLimitingCollectorFixture::N_THREADS = 50; const String TimeLimitingCollectorFixture::FIELD_NAME = L"body"; BOOST_FIXTURE_TEST_SUITE(TimeLimitingCollectorTest, TimeLimitingCollectorFixture) /// test search correctness with no timeout BOOST_AUTO_TEST_CASE(testSearch) { doTestSearch(); } /// Test that timeout is obtained, and soon enough BOOST_AUTO_TEST_CASE(testTimeoutGreedy) { doTestTimeout(false, true); } /// Test that timeout is obtained, and soon enough BOOST_AUTO_TEST_CASE(testTimeoutNotGreedy) { doTestTimeout(false, false); } /// Test timeout behavior when resolution is modified. BOOST_AUTO_TEST_CASE(testModifyResolution) { // increase and test int64_t resolution = 20 * TimeLimitingCollector::DEFAULT_RESOLUTION; // 400 TimeLimitingCollector::setResolution(resolution); BOOST_CHECK_EQUAL(resolution, TimeLimitingCollector::getResolution()); doTestTimeout(false, true); // decrease much and test resolution = 5; TimeLimitingCollector::setResolution(resolution); BOOST_CHECK_EQUAL(resolution, TimeLimitingCollector::getResolution()); doTestTimeout(false, true); // return to default and test resolution = TimeLimitingCollector::DEFAULT_RESOLUTION; TimeLimitingCollector::setResolution(resolution); BOOST_CHECK_EQUAL(resolution, TimeLimitingCollector::getResolution()); doTestTimeout(false, true); } /// Test correctness with multiple searching threads. BOOST_AUTO_TEST_CASE(testSearchMultiThreaded) { doTestMultiThreads(false); } /// Test correctness with multiple searching threads. BOOST_AUTO_TEST_CASE(testTimeoutMultiThreaded) { doTestMultiThreads(true); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/TopDocsCollectorTest.cpp000066400000000000000000000146721217574114600251260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "KeywordAnalyzer.h" #include "Document.h" #include "TopDocsCollector.h" #include "MatchAllDocsQuery.h" #include "IndexSearcher.h" #include "HitQueue.h" #include "TopDocs.h" #include "ScoreDoc.h" using namespace Lucene; class MyTopsDocCollector : public TopDocsCollector { public: MyTopsDocCollector(int32_t size, Collection scores) : TopDocsCollector(newLucene(size, false)) { this->scores = scores; this->idx = 0; this->base = 0; } virtual ~MyTopsDocCollector() { } protected: int32_t idx; int32_t base; Collection scores; protected: virtual TopDocsPtr newTopDocs(Collection results, int32_t start) { if (!results) return EMPTY_TOPDOCS(); double maxScore = std::numeric_limits::quiet_NaN(); if (start == 0) maxScore = results[0]->score; else { for (int32_t i = pq->size(); i > 1; --i) pq->pop(); maxScore = boost::dynamic_pointer_cast(pq->pop())->score; } return newLucene(totalHits, results, maxScore); } virtual void collect(int32_t doc) { ++totalHits; pq->addOverflow(newLucene(doc + base, scores[idx++])); } virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) { base = docBase; } virtual void setScorer(ScorerPtr scorer) { // Don't do anything. Assign scores in random } virtual bool acceptsDocsOutOfOrder() { return true; } }; class TopDocsCollectorFixture : public LuceneTestFixture { public: TopDocsCollectorFixture() { MAX_SCORE = 9.17561; // Scores array to be used by MyTopDocsCollector. If it is changed, MAX_SCORE must also change. const double _scores[] = { 0.7767749, 1.7839992, 8.9925785, 7.9608946, 0.07948637, 2.6356435, 7.4950366, 7.1490803, 8.108544, 4.961808, 2.2423935, 7.285586, 4.6699767, 2.9655676, 6.953706, 5.383931, 6.9916306, 8.365894, 7.888485, 8.723962, 3.1796896, 0.39971232, 1.3077754, 6.8489285, 9.17561, 5.060466, 7.9793315, 8.601509, 4.1858315, 0.28146625 }; scores = Collection::newInstance(_scores, _scores + SIZEOF_ARRAY(_scores)); dir = newLucene(); // populate an index with 30 documents, this should be enough for the test. // The documents have no content - the test uses MatchAllDocsQuery(). IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < 30; ++i) writer->addDocument(newLucene()); writer->close(); } virtual ~TopDocsCollectorFixture() { dir->close(); } protected: DirectoryPtr dir; Collection scores; double MAX_SCORE; public: TopDocsCollectorPtr doSearch(int32_t numResults) { QueryPtr q = newLucene(); IndexSearcherPtr searcher = newLucene(dir, true); TopDocsCollectorPtr tdc = newLucene(numResults, scores); searcher->search(q, tdc); searcher->close(); return tdc; } }; BOOST_FIXTURE_TEST_SUITE(TopDocsCollectorTest, TopDocsCollectorFixture) BOOST_AUTO_TEST_CASE(testInvalidArguments) { int32_t numResults = 5; TopDocsCollectorPtr tdc = doSearch(numResults); // start < 0 BOOST_CHECK_EQUAL(0, tdc->topDocs(-1)->scoreDocs.size()); // start > pq.size() BOOST_CHECK_EQUAL(0, tdc->topDocs(numResults + 1)->scoreDocs.size()); // start == pq.size() BOOST_CHECK_EQUAL(0, tdc->topDocs(numResults)->scoreDocs.size()); // howMany < 0 BOOST_CHECK_EQUAL(0, tdc->topDocs(0, -1)->scoreDocs.size()); // howMany == 0 BOOST_CHECK_EQUAL(0, tdc->topDocs(0, 0)->scoreDocs.size()); } BOOST_AUTO_TEST_CASE(testZeroResults) { TopDocsCollectorPtr tdc = newLucene(5, scores); BOOST_CHECK_EQUAL(0, tdc->topDocs(0, 1)->scoreDocs.size()); } BOOST_AUTO_TEST_CASE(testFirstResultsPage) { TopDocsCollectorPtr tdc = doSearch(15); BOOST_CHECK_EQUAL(10, tdc->topDocs(0, 10)->scoreDocs.size()); } BOOST_AUTO_TEST_CASE(testSecondResultsPages) { TopDocsCollectorPtr tdc = doSearch(15); // ask for more results than are available BOOST_CHECK_EQUAL(5, tdc->topDocs(10, 10)->scoreDocs.size()); // ask for 5 results (exactly what there should be tdc = doSearch(15); BOOST_CHECK_EQUAL(5, tdc->topDocs(10, 5)->scoreDocs.size()); // ask for less results than there are tdc = doSearch(15); BOOST_CHECK_EQUAL(4, tdc->topDocs(10, 4)->scoreDocs.size()); } BOOST_AUTO_TEST_CASE(testGetAllResults) { TopDocsCollectorPtr tdc = doSearch(15); BOOST_CHECK_EQUAL(15, tdc->topDocs()->scoreDocs.size()); } BOOST_AUTO_TEST_CASE(testGetResultsFromStart) { TopDocsCollectorPtr tdc = doSearch(15); // should bring all results BOOST_CHECK_EQUAL(15, tdc->topDocs(0)->scoreDocs.size()); tdc = doSearch(15); // get the last 5 only. BOOST_CHECK_EQUAL(5, tdc->topDocs(10)->scoreDocs.size()); } BOOST_AUTO_TEST_CASE(testMaxScore) { // ask for all results TopDocsCollectorPtr tdc = doSearch(15); TopDocsPtr td = tdc->topDocs(); BOOST_CHECK_EQUAL(MAX_SCORE, td->maxScore); // ask for 5 last results tdc = doSearch(15); td = tdc->topDocs(10); BOOST_CHECK_EQUAL(MAX_SCORE, td->maxScore); } /// This does not test the PQ's correctness, but whether topDocs() implementations /// return the results in decreasing score order. BOOST_AUTO_TEST_CASE(testResultsOrder) { TopDocsCollectorPtr tdc = doSearch(15); Collection sd = tdc->topDocs()->scoreDocs; BOOST_CHECK_EQUAL(MAX_SCORE, sd[0]->score); for (int32_t i = 1; i < sd.size(); ++i) BOOST_CHECK(sd[i - 1]->score >= sd[i]->score); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/TopScoreDocCollectorTest.cpp000066400000000000000000000043631217574114600257330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "Document.h" #include "BooleanQuery.h" #include "MatchAllDocsQuery.h" #include "IndexSearcher.h" #include "TopDocsCollector.h" #include "TopScoreDocCollector.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(TopScoreDocCollectorTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testOutOfOrderCollection) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, AnalyzerPtr(), IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < 10; ++i) writer->addDocument(newLucene()); writer->commit(); writer->close(); Collection inOrder = newCollection(false, true); Collection actualTSDCClass = newCollection(L"OutOfOrderTopScoreDocCollector", L"InOrderTopScoreDocCollector"); BooleanQueryPtr bq = newLucene(); // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 // which delegates to BS if there are no mandatory clauses. bq->add(newLucene(), BooleanClause::SHOULD); // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return the clause instead of BQ. bq->setMinimumNumberShouldMatch(1); IndexSearcherPtr searcher = newLucene(dir, true); for (int32_t i = 0; i < inOrder.size(); ++i) { TopDocsCollectorPtr tdc = TopScoreDocCollector::create(3, inOrder[i] == 1); BOOST_CHECK_EQUAL(actualTSDCClass[i], tdc->getClassName()); searcher->search(newLucene(), tdc); Collection sd = tdc->topDocs()->scoreDocs; BOOST_CHECK_EQUAL(3, sd.size()); for (int32_t j = 0; j < sd.size(); ++j) BOOST_CHECK_EQUAL(j, sd[j]->doc); } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/WildcardTest.cpp000066400000000000000000000331701217574114600234270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "WildcardQuery.h" #include "Term.h" #include "FuzzyQuery.h" #include "IndexSearcher.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "TermQuery.h" #include "ConstantScoreQuery.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "BooleanQuery.h" #include "PrefixQuery.h" #include "QueryParser.h" #include "WhitespaceAnalyzer.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(WildcardTest, LuceneTestFixture) static RAMDirectoryPtr getIndexStore(const String& field, Collection contents) { RAMDirectoryPtr indexStore = newLucene(); IndexWriterPtr writer = newLucene(indexStore, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < contents.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(field, contents[i], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->optimize(); writer->close(); return indexStore; } static void checkMatches(IndexSearcherPtr searcher, QueryPtr q, int32_t expectedMatches) { Collection result = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(expectedMatches, result.size()); } BOOST_AUTO_TEST_CASE(testEquals) { WildcardQueryPtr wq1 = newLucene(newLucene(L"field", L"b*a")); WildcardQueryPtr wq2 = newLucene(newLucene(L"field", L"b*a")); WildcardQueryPtr wq3 = newLucene(newLucene(L"field", L"b*a")); // reflexive? BOOST_CHECK(wq1->equals(wq2)); BOOST_CHECK(wq2->equals(wq1)); // transitive? BOOST_CHECK(wq2->equals(wq3)); BOOST_CHECK(wq1->equals(wq3)); BOOST_CHECK(!wq1->equals(WildcardQueryPtr())); FuzzyQueryPtr fq = newLucene(newLucene(L"field", L"b*a")); BOOST_CHECK(!wq1->equals(fq)); BOOST_CHECK(!fq->equals(wq1)); } /// Tests if a WildcardQuery that has no wildcard in the term is rewritten to a single TermQuery. /// The boost should be preserved, and the rewrite should return a ConstantScoreQuery if the /// WildcardQuery had a ConstantScore rewriteMethod. BOOST_AUTO_TEST_CASE(testTermWithoutWildcard) { RAMDirectoryPtr indexStore = getIndexStore(L"field", newCollection(L"nowildcard", L"nowildcardx")); IndexSearcherPtr searcher = newLucene(indexStore, true); MultiTermQueryPtr wq = newLucene(newLucene(L"field", L"nowildcard")); checkMatches(searcher, wq, 1); wq->setRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); wq->setBoost(0.1); QueryPtr q = searcher->rewrite(wq); BOOST_CHECK(MiscUtils::typeOf(q)); BOOST_CHECK_EQUAL(q->getBoost(), wq->getBoost()); wq->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); wq->setBoost(0.2); q = searcher->rewrite(wq); BOOST_CHECK(MiscUtils::typeOf(q)); BOOST_CHECK_EQUAL(q->getBoost(), wq->getBoost()); wq->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); wq->setBoost(0.3); q = searcher->rewrite(wq); BOOST_CHECK(MiscUtils::typeOf(q)); BOOST_CHECK_EQUAL(q->getBoost(), wq->getBoost()); wq->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); wq->setBoost(0.4); q = searcher->rewrite(wq); BOOST_CHECK(MiscUtils::typeOf(q)); BOOST_CHECK_EQUAL(q->getBoost(), wq->getBoost()); } /// Tests if a WildcardQuery with an empty term is rewritten to an empty BooleanQuery BOOST_AUTO_TEST_CASE(testEmptyTerm) { RAMDirectoryPtr indexStore = getIndexStore(L"field", newCollection(L"nowildcard", L"nowildcardx")); IndexSearcherPtr searcher = newLucene(indexStore, true); MultiTermQueryPtr wq = newLucene(newLucene(L"field", L"")); wq->setRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); checkMatches(searcher, wq, 0); BooleanQueryPtr expected = newLucene(true); BOOST_CHECK(searcher->rewrite(expected)->equals(searcher->rewrite(wq))); } /// Tests if a WildcardQuery that has only a trailing * in the term is rewritten to a /// single PrefixQuery. The boost and rewriteMethod should be preserved. BOOST_AUTO_TEST_CASE(testPrefixTerm) { RAMDirectoryPtr indexStore = getIndexStore(L"field", newCollection(L"prefix", L"prefixx")); IndexSearcherPtr searcher = newLucene(indexStore, true); MultiTermQueryPtr wq = newLucene(newLucene(L"field", L"prefix*")); checkMatches(searcher, wq, 2); MultiTermQueryPtr expected = newLucene(newLucene(L"field", L"prefix")); wq->setRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); wq->setBoost(0.1); expected->setRewriteMethod(wq->getRewriteMethod()); expected->setBoost(wq->getBoost()); BOOST_CHECK(searcher->rewrite(expected)->equals(searcher->rewrite(wq))); wq->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); wq->setBoost(0.2); expected->setRewriteMethod(wq->getRewriteMethod()); expected->setBoost(wq->getBoost()); BOOST_CHECK(searcher->rewrite(expected)->equals(searcher->rewrite(wq))); wq->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); wq->setBoost(0.3); expected->setRewriteMethod(wq->getRewriteMethod()); expected->setBoost(wq->getBoost()); BOOST_CHECK(searcher->rewrite(expected)->equals(searcher->rewrite(wq))); wq->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); wq->setBoost(0.4); expected->setRewriteMethod(wq->getRewriteMethod()); expected->setBoost(wq->getBoost()); BOOST_CHECK(searcher->rewrite(expected)->equals(searcher->rewrite(wq))); } BOOST_AUTO_TEST_CASE(testAsterisk) { RAMDirectoryPtr indexStore = getIndexStore(L"body", newCollection(L"metal", L"metals")); IndexSearcherPtr searcher = newLucene(indexStore, true); QueryPtr query1 = newLucene(newLucene(L"body", L"metal")); QueryPtr query2 = newLucene(newLucene(L"body", L"metal*")); QueryPtr query3 = newLucene(newLucene(L"body", L"m*tal")); QueryPtr query4 = newLucene(newLucene(L"body", L"m*tal*")); QueryPtr query5 = newLucene(newLucene(L"body", L"m*tals")); BooleanQueryPtr query6 = newLucene(); query6->add(query5, BooleanClause::SHOULD); BooleanQueryPtr query7 = newLucene(); query7->add(query3, BooleanClause::SHOULD); query7->add(query5, BooleanClause::SHOULD); // Queries do not automatically lower-case search terms: QueryPtr query8 = newLucene(newLucene(L"body", L"M*tal*")); checkMatches(searcher, query1, 1); checkMatches(searcher, query2, 2); checkMatches(searcher, query3, 1); checkMatches(searcher, query4, 2); checkMatches(searcher, query5, 1); checkMatches(searcher, query6, 1); checkMatches(searcher, query7, 2); checkMatches(searcher, query8, 0); checkMatches(searcher, newLucene(newLucene(L"body", L"*tall")), 0); checkMatches(searcher, newLucene(newLucene(L"body", L"*tal")), 1); checkMatches(searcher, newLucene(newLucene(L"body", L"*tal*")), 2); } BOOST_AUTO_TEST_CASE(testLotsOfAsterisks) { RAMDirectoryPtr indexStore = getIndexStore(L"body", newCollection(L"metal", L"metals")); IndexSearcherPtr searcher = newLucene(indexStore, true); StringStream term; term << L"m"; for (int32_t i = 0; i < 512; ++i) term << L"*"; term << L"tal"; QueryPtr query3 = newLucene(newLucene(L"body", term.str())); checkMatches(searcher, query3, 1); searcher->close(); indexStore->close(); } BOOST_AUTO_TEST_CASE(testQuestionmark) { RAMDirectoryPtr indexStore = getIndexStore(L"body", newCollection(L"metal", L"metals", L"mXtals", L"mXtXls")); IndexSearcherPtr searcher = newLucene(indexStore, true); QueryPtr query1 = newLucene(newLucene(L"body", L"m?tal")); QueryPtr query2 = newLucene(newLucene(L"body", L"metal?")); QueryPtr query3 = newLucene(newLucene(L"body", L"metals?")); QueryPtr query4 = newLucene(newLucene(L"body", L"m?t?ls")); QueryPtr query5 = newLucene(newLucene(L"body", L"M?t?ls")); QueryPtr query6 = newLucene(newLucene(L"body", L"meta??")); checkMatches(searcher, query1, 1); checkMatches(searcher, query2, 1); checkMatches(searcher, query3, 0); checkMatches(searcher, query4, 3); checkMatches(searcher, query5, 0); checkMatches(searcher, query6, 1); // Query: 'meta??' matches 'metals' not 'metal' } /// Test that wild card queries are parsed to the correct type and are searched correctly. /// This test looks at both parsing and execution of wildcard queries. Although placed /// here, it also tests prefix queries, verifying that prefix queries are not parsed into /// wild card queries, and vice-versa. BOOST_AUTO_TEST_CASE(testParsingAndSearching) { String field = L"content"; QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, field, newLucene()); qp->setAllowLeadingWildcard(true); Collection docs = newCollection(L"\\ abcdefg1", L"\\79 hijklmn1", L"\\\\ opqrstu1"); // queries that should find all docs Collection matchAll = newCollection(L"*", L"*1", L"**1", L"*?", L"*?1", L"?*1", L"**", L"***", L"\\\\*"); // queries that should find no docs Collection matchNone = newCollection(L"a*h", L"a?h", L"*a*h", L"?a", L"a?"); // queries that should be parsed to prefix queries Collection< Collection > matchOneDocPrefix = newCollection< Collection >( newCollection(L"a*", L"ab*", L"abc*"), // these should find only doc 0 newCollection(L"h*", L"hi*", L"hij*", L"\\\\7*"), // these should find only doc 1 newCollection(L"o*", L"op*", L"opq*", L"\\\\\\\\*") // these should find only doc 2 ); // queries that should be parsed to wildcard queries Collection< Collection > matchOneDocWild = newCollection< Collection >( newCollection(L"*a*", L"*ab*", L"*abc**", L"ab*e*", L"*g?", L"*f?1", L"abc**"), // these should find only doc 0 newCollection(L"*h*", L"*hi*", L"*hij**", L"hi*k*", L"*n?", L"*m?1", L"hij**"), // these should find only doc 1 newCollection(L"*o*", L"*op*", L"*opq**", L"op*q*", L"*u?", L"*t?1", L"opq**") // these should find only doc 2 ); // prepare the index RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < docs.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(field, docs[i], Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); IndexSearcherPtr searcher = newLucene(dir, true); // test queries that must find all for (int32_t i = 0; i < matchAll.size(); ++i) { String qtxt = matchAll[i]; QueryPtr q = qp->parse(qtxt); Collection hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(docs.size(), hits.size()); } // test queries that must find none for (int32_t i = 0; i < matchNone.size(); ++i) { String qtxt = matchNone[i]; QueryPtr q = qp->parse(qtxt); Collection hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(0, hits.size()); } // test queries that must be prefix queries and must find only one doc for (int32_t i = 0; i < matchOneDocPrefix.size(); ++i) { for (int32_t j = 0; j < matchOneDocPrefix[i].size(); ++j) { String qtxt = matchOneDocPrefix[i][j]; QueryPtr q = qp->parse(qtxt); BOOST_CHECK(MiscUtils::typeOf(q)); Collection hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(i, hits[0]->doc); } } // test queries that must be wildcard queries and must find only one doc for (int32_t i = 0; i < matchOneDocPrefix.size(); ++i) { for (int32_t j = 0; j < matchOneDocWild[i].size(); ++j) { String qtxt = matchOneDocWild[i][j]; QueryPtr q = qp->parse(qtxt); BOOST_CHECK(MiscUtils::typeOf(q)); Collection hits = searcher->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(1, hits.size()); BOOST_CHECK_EQUAL(i, hits[0]->doc); } } searcher->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/function/000077500000000000000000000000001217574114600221535ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/search/function/CustomScoreQueryTest.cpp000066400000000000000000000264151217574114600270230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "FunctionFixture.h" #include "FieldScoreQuery.h" #include "IndexSearcher.h" #include "QueryParser.h" #include "Query.h" #include "CustomScoreQuery.h" #include "CustomScoreProvider.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "ValueSourceQuery.h" #include "Explanation.h" #include "IndexReader.h" #include "Document.h" #include "FieldCache.h" using namespace Lucene; class CustomAddScoreProvider : public CustomScoreProvider { public: CustomAddScoreProvider(IndexReaderPtr reader) : CustomScoreProvider(reader) { } virtual ~CustomAddScoreProvider() { } public: virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore) { return subQueryScore + valSrcScore; } virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl) { double valSrcScore = valSrcExpl ? valSrcExpl->getValue() : 0.0; ExplanationPtr exp = newLucene(valSrcScore + subQueryExpl->getValue(), L"custom score: sum of:"); exp->addDetail(subQueryExpl); if (valSrcExpl) exp->addDetail(valSrcExpl); return exp; } }; class CustomAddQuery : public CustomScoreQuery { public: CustomAddQuery(QueryPtr q, ValueSourceQueryPtr qValSrc) : CustomScoreQuery(q, qValSrc) { } virtual ~CustomAddQuery() { } public: virtual String name() { return L"customAdd"; } protected: virtual CustomScoreProviderPtr getCustomScoreProvider(IndexReaderPtr reader) { return newLucene(reader); } }; class CustomMulAddScoreProvider : public CustomScoreProvider { public: CustomMulAddScoreProvider(IndexReaderPtr reader) : CustomScoreProvider(reader) { } virtual ~CustomMulAddScoreProvider() { } public: virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { if (valSrcScores.empty()) return subQueryScore; if (valSrcScores.size() == 1) return subQueryScore + valSrcScores[0]; // confirm that skipping beyond the last doc, on the previous reader, hits NO_MORE_DOCS return (subQueryScore + valSrcScores[0]) * valSrcScores[1]; // we know there are two } virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls) { if (valSrcExpls.empty()) return subQueryExpl; ExplanationPtr exp = newLucene(valSrcExpls[0]->getValue() + subQueryExpl->getValue(), L"sum of:"); exp->addDetail(subQueryExpl); exp->addDetail(valSrcExpls[0]); if (valSrcExpls.size() == 1) { exp->setDescription(L"CustomMulAdd, sum of:"); return exp; } ExplanationPtr exp2 = newLucene(valSrcExpls[1]->getValue() * exp->getValue(), L"custom score: product of:"); exp2->addDetail(valSrcExpls[1]); exp2->addDetail(exp); return exp2; } }; class CustomMulAddQuery : public CustomScoreQuery { public: CustomMulAddQuery(QueryPtr q, ValueSourceQueryPtr qValSrc1, ValueSourceQueryPtr qValSrc2) : CustomScoreQuery(q, newCollection(qValSrc1, qValSrc2)) { } virtual ~CustomMulAddQuery() { } public: virtual String name() { return L"customMulAdd"; } protected: virtual CustomScoreProviderPtr getCustomScoreProvider(IndexReaderPtr reader) { return newLucene(reader); } }; class CustomExternalScoreProvider : public CustomScoreProvider { public: CustomExternalScoreProvider(IndexReaderPtr reader, Collection values) : CustomScoreProvider(reader) { this->values = values; } virtual ~CustomExternalScoreProvider() { } protected: Collection values; public: virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore) { BOOST_CHECK(doc <= reader->maxDoc()); return (double)values[doc]; } }; class CustomExternalQuery : public CustomScoreQuery { public: CustomExternalQuery(QueryPtr q) : CustomScoreQuery(q) { } virtual ~CustomExternalQuery() { } protected: virtual CustomScoreProviderPtr getCustomScoreProvider(IndexReaderPtr reader) { Collection values = FieldCache::DEFAULT()->getInts(reader, FunctionFixture::INT_FIELD); return newLucene(reader, values); } }; class CustomScoreQueryFixture : public FunctionFixture { public: CustomScoreQueryFixture() : FunctionFixture(true) { } virtual ~CustomScoreQueryFixture() { } public: /// since custom scoring modifies the order of docs, map results by doc ids so that we can later compare/verify them MapIntDouble topDocsToMap(TopDocsPtr td) { MapIntDouble h = MapIntDouble::newInstance(); for (int32_t i = 0; i < td->totalHits; ++i) h.put(td->scoreDocs[i]->doc, td->scoreDocs[i]->score); return h; } void verifyResults(double boost, IndexSearcherPtr s, MapIntDouble h1, MapIntDouble h2customNeutral, MapIntDouble h3CustomMul, MapIntDouble h4CustomAdd, MapIntDouble h5CustomMulAdd, QueryPtr q1, QueryPtr q2, QueryPtr q3, QueryPtr q4, QueryPtr q5) { // verify numbers of matches BOOST_CHECK_EQUAL(h1.size(), h2customNeutral.size()); BOOST_CHECK_EQUAL(h1.size(), h3CustomMul.size()); BOOST_CHECK_EQUAL(h1.size(), h4CustomAdd.size()); BOOST_CHECK_EQUAL(h1.size(), h5CustomMulAdd.size()); // verify scores ratios for (MapIntDouble::iterator it = h1.begin(); it != h1.end(); ++it) { int32_t doc = it->first; double fieldScore = expectedFieldScore(s->getIndexReader()->document(doc)->get(ID_FIELD)); BOOST_CHECK(fieldScore > 0); double score1 = it->second; double score2 = h2customNeutral.get(doc); BOOST_CHECK_CLOSE_FRACTION(boost * score1, score2, TEST_SCORE_TOLERANCE_DELTA); double score3 = h3CustomMul.get(doc); BOOST_CHECK_CLOSE_FRACTION(boost * fieldScore * score1, score3, TEST_SCORE_TOLERANCE_DELTA); double score4 = h4CustomAdd.get(doc); BOOST_CHECK_CLOSE_FRACTION(boost * (fieldScore + score1), score4, TEST_SCORE_TOLERANCE_DELTA); double score5 = h5CustomMulAdd.get(doc); BOOST_CHECK_CLOSE_FRACTION(boost * fieldScore * (score1 + fieldScore), score5, TEST_SCORE_TOLERANCE_DELTA); } } /// Test that FieldScoreQuery returns docs with expected score. void doTestCustomScore(const String& field, FieldScoreQuery::Type tp, double boost) { IndexSearcherPtr s = newLucene(dir, true); FieldScoreQueryPtr qValSrc = newLucene(field, tp); // a query that would score by the field QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, TEXT_FIELD, anlzr); String qtxt = L"first aid text"; // regular (boolean) query. QueryPtr q1 = qp->parse(qtxt); // custom query, that should score the same as q1. CustomScoreQueryPtr q2CustomNeutral = newLucene(q1); q2CustomNeutral->setBoost(boost); // custom query, that should (by default) multiply the scores of q1 by that of the field CustomScoreQueryPtr q3CustomMul = newLucene(q1, qValSrc); q3CustomMul->setStrict(true); q3CustomMul->setBoost(boost); // custom query, that should add the scores of q1 to that of the field CustomScoreQueryPtr q4CustomAdd = newLucene(q1,qValSrc); q4CustomAdd->setStrict(true); q4CustomAdd->setBoost(boost); // custom query, that multiplies and adds the field score to that of q1 CustomScoreQueryPtr q5CustomMulAdd = newLucene(q1, qValSrc, qValSrc); q5CustomMulAdd->setStrict(true); q5CustomMulAdd->setBoost(boost); // do al the searches TopDocsPtr td1 = s->search(q1, FilterPtr(), 1000); TopDocsPtr td2CustomNeutral = s->search(q2CustomNeutral, FilterPtr(), 1000); TopDocsPtr td3CustomMul = s->search(q3CustomMul, FilterPtr(), 1000); TopDocsPtr td4CustomAdd = s->search(q4CustomAdd, FilterPtr(), 1000); TopDocsPtr td5CustomMulAdd = s->search(q5CustomMulAdd, FilterPtr(), 1000); // put results in map so we can verify the scores although they have changed MapIntDouble h1 = topDocsToMap(td1); MapIntDouble h2CustomNeutral = topDocsToMap(td2CustomNeutral); MapIntDouble h3CustomMul = topDocsToMap(td3CustomMul); MapIntDouble h4CustomAdd = topDocsToMap(td4CustomAdd); MapIntDouble h5CustomMulAdd = topDocsToMap(td5CustomMulAdd); verifyResults(boost, s, h1, h2CustomNeutral, h3CustomMul, h4CustomAdd, h5CustomMulAdd, q1, q2CustomNeutral, q3CustomMul, q4CustomAdd, q5CustomMulAdd); } }; BOOST_FIXTURE_TEST_SUITE(CustomScoreQueryTest, CustomScoreQueryFixture) BOOST_AUTO_TEST_CASE(testCustomExternalQuery) { QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, TEXT_FIELD, anlzr); String qtxt = L"first aid text"; // from the doc texts in FunctionFixture. QueryPtr q1 = qp->parse(qtxt); QueryPtr q = newLucene(q1); IndexSearcherPtr s = newLucene(dir); TopDocsPtr hits = s->search(q, 1000); BOOST_CHECK_EQUAL(N_DOCS, hits->totalHits); for (int32_t i = 0; i < N_DOCS; ++i) { int32_t doc = hits->scoreDocs[i]->doc; double score = hits->scoreDocs[i]->score; BOOST_CHECK_CLOSE_FRACTION((double)(1 + (4 * doc) % N_DOCS), score, 0.0001); } s->close(); } /// Test that CustomScoreQuery of Type.BYTE returns the expected scores. BOOST_AUTO_TEST_CASE(testCustomScoreByte) { // INT field values are small enough to be parsed as byte doTestCustomScore(INT_FIELD, FieldScoreQuery::BYTE, 1.0); doTestCustomScore(INT_FIELD, FieldScoreQuery::BYTE, 2.0); } /// Test that CustomScoreQuery of Type.INT returns the expected scores. BOOST_AUTO_TEST_CASE(testCustomScoreInt) { // INT field values are small enough to be parsed as int doTestCustomScore(INT_FIELD, FieldScoreQuery::INT, 1.0); doTestCustomScore(INT_FIELD, FieldScoreQuery::INT, 2.0); } /// Test that CustomScoreQuery of Type.DOUBLE returns the expected scores. BOOST_AUTO_TEST_CASE(testCustomScoreDouble) { // INT field can be parsed as double doTestCustomScore(INT_FIELD, FieldScoreQuery::DOUBLE, 1.0); doTestCustomScore(INT_FIELD, FieldScoreQuery::DOUBLE, 5.0); // same values, but in double format doTestCustomScore(DOUBLE_FIELD, FieldScoreQuery::DOUBLE, 1.0); doTestCustomScore(DOUBLE_FIELD, FieldScoreQuery::DOUBLE, 6.0); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/function/DocValuesTest.cpp000066400000000000000000000064251217574114600254130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "DocValues.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(DocValuesTest, LuceneTestFixture) DECLARE_SHARED_PTR(TestableDocValues) class TestableDocValues : public DocValues { public: TestableDocValues(Collection innerArray) { this->innerArray = innerArray; } virtual ~TestableDocValues() { } public: Collection innerArray; public: virtual double doubleVal(int32_t doc) { if (doc < 0 || doc >= innerArray.size()) boost::throw_exception(IndexOutOfBoundsException()); return innerArray[doc]; } virtual String toString(int32_t doc) { return StringUtils::toString(doc); } }; BOOST_AUTO_TEST_CASE(testGetMinValue) { Collection innerArray = newCollection(1.0, 2.0, -1.0, 100.0); TestableDocValuesPtr docValues = newLucene(innerArray); BOOST_CHECK_EQUAL(-1.0, docValues->getMinValue()); // test with without values - NaN innerArray = Collection::newInstance(); docValues = newLucene(innerArray); BOOST_CHECK(MiscUtils::isNaN(docValues->getMinValue())); } BOOST_AUTO_TEST_CASE(testGetMaxValue) { Collection innerArray = newCollection(1.0, 2.0, -1.0, 10.0); TestableDocValuesPtr docValues = newLucene(innerArray); BOOST_CHECK_EQUAL(10.0, docValues->getMaxValue()); innerArray = newCollection(-3.0, -1.0, -100.0); docValues = newLucene(innerArray); BOOST_CHECK_EQUAL(-1.0, docValues->getMaxValue()); innerArray = newCollection(-3.0, -1.0, -100.0, DBL_MAX, DBL_MAX - 1); docValues = newLucene(innerArray); BOOST_CHECK_EQUAL(DBL_MAX, docValues->getMaxValue()); // test with without values - NaN innerArray = Collection::newInstance(); docValues = newLucene(innerArray); BOOST_CHECK(MiscUtils::isNaN(docValues->getMaxValue())); } BOOST_AUTO_TEST_CASE(testGetAverageValue) { Collection innerArray = newCollection(1.0, 1.0, 1.0, 1.0); TestableDocValuesPtr docValues = newLucene(innerArray); BOOST_CHECK_EQUAL(1.0, docValues->getAverageValue()); innerArray = newCollection(1.0, 2.0, 3.0, 4.0, 5.0, 6.0); docValues = newLucene(innerArray); BOOST_CHECK_EQUAL(3.5, docValues->getAverageValue()); // test with negative values innerArray = newCollection(-1.0, 2.0); docValues = newLucene(innerArray); BOOST_CHECK_EQUAL(0.5, docValues->getAverageValue()); // test with without values - NaN innerArray = Collection::newInstance(); docValues = newLucene(innerArray); BOOST_CHECK(MiscUtils::isNaN(docValues->getAverageValue())); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/function/FieldScoreQueryTest.cpp000066400000000000000000000204771217574114600265760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "FunctionFixture.h" #include "IndexSearcher.h" #include "FieldScoreQuery.h" #include "QueryUtils.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "Document.h" #include "IndexReader.h" #include "ValueSource.h" #include "DocValues.h" #include "VariantUtils.h" using namespace Lucene; /// Tests here create an index with a few documents, each having an int value indexed /// field and a double value indexed field. The values of these fields are later used /// for scoring. /// /// The rank tests use Hits to verify that docs are ordered (by score) as expected. /// /// The exact score tests use TopDocs top to verify the exact score. class FieldScoreQueryFixture : public FunctionFixture { public: FieldScoreQueryFixture() : FunctionFixture(true) { } virtual ~FieldScoreQueryFixture() { } public: /// Test that FieldScoreQuery returns docs in expected order. void doTestRank(const String& field, FieldScoreQuery::Type tp) { IndexSearcherPtr s = newLucene(dir, true); QueryPtr q = newLucene(field,tp); QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(N_DOCS, h.size()); String prevID = L"ID" + StringUtils::toString(N_DOCS + 1); // greater than all ids of docs in this test for (int32_t i = 0; i < h.size(); ++i) { String resID = s->doc(h[i]->doc)->get(ID_FIELD); BOOST_CHECK(resID.compare(prevID) < 0); prevID = resID; } } /// Test that FieldScoreQuery returns docs with expected score. void doTestExactScore(const String& field, FieldScoreQuery::Type tp) { IndexSearcherPtr s = newLucene(dir, true); QueryPtr q = newLucene(field, tp); TopDocsPtr td = s->search(q, FilterPtr(), 1000); BOOST_CHECK_EQUAL(N_DOCS, td->totalHits); Collection sd = td->scoreDocs; for (int32_t i = 0; i < sd.size(); ++i) { double score = sd[i]->score; String id = s->getIndexReader()->document(sd[i]->doc)->get(ID_FIELD); double expectedScore = expectedFieldScore(id); // "ID7" --> 7.0 BOOST_CHECK_CLOSE_FRACTION(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA); } } /// Test that values loaded for FieldScoreQuery are cached properly and consumes /// the proper RAM resources. void doTestCaching(const String& field, FieldScoreQuery::Type tp) { // prepare expected array types for comparison HashMap expectedArrayTypes = HashMap::newInstance(); expectedArrayTypes.put(FieldScoreQuery::BYTE, Collection::newInstance()); expectedArrayTypes.put(FieldScoreQuery::INT, Collection::newInstance()); expectedArrayTypes.put(FieldScoreQuery::DOUBLE, Collection::newInstance()); IndexSearcherPtr s = newLucene(dir, true); Collection innerArray = Collection::newInstance(s->getIndexReader()->getSequentialSubReaders().size()); bool warned = false; // print warning once. for (int32_t i = 0; i < 10; ++i) { FieldScoreQueryPtr q = newLucene(field, tp); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(N_DOCS, h.size()); Collection readers = s->getIndexReader()->getSequentialSubReaders(); for (int32_t j = 0; j < readers.size(); ++j) { IndexReaderPtr reader = readers[j]; try { if (i == 0) { innerArray[j] = q->valSrc->getValues(reader)->getInnerArray(); BOOST_CHECK(VariantUtils::equalsType(innerArray[j], expectedArrayTypes.get(tp))); } else BOOST_CHECK(VariantUtils::equals(innerArray[j], q->valSrc->getValues(reader)->getInnerArray())); } catch (UnsupportedOperationException&) { if (!warned) { BOOST_TEST_MESSAGE("WARNING: Cannot fully test values of " << StringUtils::toUTF8(q->toString())); warned = true; } } } } // verify new values are reloaded (not reused) for a new reader s = newLucene(dir, true); FieldScoreQueryPtr q = newLucene(field, tp); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(N_DOCS, h.size()); Collection readers = s->getIndexReader()->getSequentialSubReaders(); for (int32_t j = 0; j < readers.size(); ++j) { IndexReaderPtr reader = readers[j]; try { BOOST_CHECK(!equalCollectionValues(innerArray[j], q->valSrc->getValues(reader)->getInnerArray())); } catch (UnsupportedOperationException&) { if (!warned) { BOOST_TEST_MESSAGE("WARNING: Cannot fully test values of " << StringUtils::toUTF8(q->toString())); warned = true; } } } } }; BOOST_FIXTURE_TEST_SUITE(FieldScoreQueryTest, FieldScoreQueryFixture) /// Test that FieldScoreQuery of Type.BYTE returns docs in expected order. BOOST_AUTO_TEST_CASE(testRankByte) { // INT field values are small enough to be parsed as byte doTestRank(INT_FIELD, FieldScoreQuery::BYTE); } /// Test that FieldScoreQuery of Type.INT returns docs in expected order. BOOST_AUTO_TEST_CASE(testRankInt) { doTestRank(INT_FIELD, FieldScoreQuery::INT); } /// Test that FieldScoreQuery of Type.DOUBLE returns docs in expected order. BOOST_AUTO_TEST_CASE(testRankDouble) { // INT field can be parsed as double doTestRank(INT_FIELD, FieldScoreQuery::DOUBLE); // same values, but in double format doTestRank(DOUBLE_FIELD, FieldScoreQuery::DOUBLE); } /// Test that FieldScoreQuery of Type.BYTE returns the expected scores. BOOST_AUTO_TEST_CASE(testExactScoreByte) { // INT field values are small enough to be parsed as byte doTestExactScore(INT_FIELD, FieldScoreQuery::BYTE); } /// Test that FieldScoreQuery of Type.INT returns the expected scores. BOOST_AUTO_TEST_CASE(testExactScoreInt) { // INT field values are small enough to be parsed as byte doTestExactScore(INT_FIELD, FieldScoreQuery::INT); } /// Test that FieldScoreQuery of Type.DOUBLE returns the expected scores. BOOST_AUTO_TEST_CASE(testExactScoreDouble) { // INT field can be parsed as double doTestExactScore(INT_FIELD, FieldScoreQuery::DOUBLE); // same values, but in double format doTestExactScore(DOUBLE_FIELD, FieldScoreQuery::DOUBLE); } /// Test that FieldScoreQuery of Type.BYTE caches/reuses loaded values and consumes /// the proper RAM resources. BOOST_AUTO_TEST_CASE(testCachingByte) { // INT field values are small enough to be parsed as byte doTestCaching(INT_FIELD, FieldScoreQuery::BYTE); } /// Test that FieldScoreQuery of Type.INT caches/reuses loaded values and consumes /// the proper RAM resources. BOOST_AUTO_TEST_CASE(testCachingInt) { // INT field values are small enough to be parsed as byte doTestCaching(INT_FIELD, FieldScoreQuery::INT); } /// Test that FieldScoreQuery of Type.DOUBLE caches/reuses loaded values and consumes /// the proper RAM resources. BOOST_AUTO_TEST_CASE(testCachingDouble) { // INT field values can be parsed as float doTestCaching(INT_FIELD, FieldScoreQuery::DOUBLE); // same values, but in double format doTestCaching(DOUBLE_FIELD, FieldScoreQuery::DOUBLE); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/function/FunctionFixture.cpp000066400000000000000000000144071217574114600260210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "FunctionFixture.h" #include "RAMDirectory.h" #include "StandardAnalyzer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "VariantUtils.h" namespace Lucene { /// Actual score computation order is slightly different than assumptions this allows for a small amount of variation const double FunctionFixture::TEST_SCORE_TOLERANCE_DELTA = 0.001; const int32_t FunctionFixture::N_DOCS = 17; const String FunctionFixture::ID_FIELD = L"id"; const String FunctionFixture::TEXT_FIELD = L"text"; const String FunctionFixture::INT_FIELD = L"iii"; const String FunctionFixture::DOUBLE_FIELD = L"fff"; FunctionFixture::FunctionFixture(bool doMultiSegment) { this->doMultiSegment = doMultiSegment; // prepare a small index with just a few documents. dir = newLucene(); anlzr = newLucene(LuceneVersion::LUCENE_CURRENT); IndexWriterPtr iw = newLucene(dir, anlzr, IndexWriter::MaxFieldLengthLIMITED); // add docs not exactly in natural ID order, to verify we do check the order of docs by scores int32_t remaining = N_DOCS; Collection done = Collection::newInstance(N_DOCS); int32_t i = 0; while (remaining > 0) { if (done[i]) boost::throw_exception(RuntimeException(L"to set this test correctly N_DOCS=" + StringUtils::toString(N_DOCS) + L" must be primary and greater than 2!")); addDoc(iw, i); done[i] = true; i = (i + 4) % N_DOCS; if (doMultiSegment && remaining % 3 == 0) iw->commit(); --remaining; } iw->close(); } FunctionFixture::~FunctionFixture() { } const Collection FunctionFixture::DOC_TEXT_LINES() { static Collection _DOC_TEXT_LINES; if (!_DOC_TEXT_LINES) { _DOC_TEXT_LINES = Collection::newInstance(); _DOC_TEXT_LINES.add(L"Well, this is just some plain text we use for creating the "); _DOC_TEXT_LINES.add(L"test documents. It used to be a text from an online collection "); _DOC_TEXT_LINES.add(L"devoted to first aid, but if there was there an (online) lawyers "); _DOC_TEXT_LINES.add(L"first aid collection with legal advices, \"it\" might have quite "); _DOC_TEXT_LINES.add(L"probably advised one not to include \"it\"'s text or the text of "); _DOC_TEXT_LINES.add(L"any other online collection in one's code, unless one has money "); _DOC_TEXT_LINES.add(L"that one don't need and one is happy to donate for lawyers "); _DOC_TEXT_LINES.add(L"charity. Anyhow at some point, rechecking the usage of this text, "); _DOC_TEXT_LINES.add(L"it became uncertain that this text is free to use, because "); _DOC_TEXT_LINES.add(L"the web site in the disclaimer of he eBook containing that text "); _DOC_TEXT_LINES.add(L"was not responding anymore, and at the same time, in projGut, "); _DOC_TEXT_LINES.add(L"searching for first aid no longer found that eBook as well. "); _DOC_TEXT_LINES.add(L"So here we are, with a perhaps much less interesting "); _DOC_TEXT_LINES.add(L"text for the test, but oh much much safer. "); } return _DOC_TEXT_LINES; } void FunctionFixture::addDoc(IndexWriterPtr iw, int32_t i) { DocumentPtr d = newLucene(); int32_t scoreAndID = i + 1; FieldPtr f = newLucene(ID_FIELD, id2String(scoreAndID), Field::STORE_YES, Field::INDEX_NOT_ANALYZED); // for debug purposes f->setOmitNorms(true); d->add(f); f = newLucene(TEXT_FIELD, L"text of doc" + StringUtils::toString(scoreAndID) + textLine(i), Field::STORE_NO, Field::INDEX_ANALYZED); // for regular search f->setOmitNorms(true); d->add(f); f = newLucene(INT_FIELD, StringUtils::toString(scoreAndID), Field::STORE_YES, Field::INDEX_NOT_ANALYZED); // for function scoring f->setOmitNorms(true); d->add(f); f = newLucene(DOUBLE_FIELD, StringUtils::toString(scoreAndID) + L".000", Field::STORE_YES, Field::INDEX_NOT_ANALYZED); // for function scoring f->setOmitNorms(true); d->add(f); iw->addDocument(d); } String FunctionFixture::id2String(int32_t scoreAndID) { String s = L"000000000" + StringUtils::toString(scoreAndID); // 17 --> ID00017 int32_t n = StringUtils::toString(N_DOCS).length() + 3; int32_t k = s.length() - n; return L"ID" + s.substr(k); } String FunctionFixture::textLine(int32_t docNum) { // some text line for regular search return DOC_TEXT_LINES()[docNum % DOC_TEXT_LINES().size()]; } double FunctionFixture::expectedFieldScore(const String& docIDFieldVal) { // extract expected doc score from its ID Field: "ID7" --> 7.0 return StringUtils::toDouble(docIDFieldVal.substr(2)); } bool FunctionFixture::equalCollectionValues(CollectionValue first, CollectionValue second) { if (!VariantUtils::equalsType(first, second)) return false; if (VariantUtils::typeOf< Collection >(first)) return (VariantUtils::get< Collection >(first).hashCode() == VariantUtils::get< Collection >(second).hashCode()); if (VariantUtils::typeOf< Collection >(first)) return (VariantUtils::get< Collection >(first).hashCode() == VariantUtils::get< Collection >(second).hashCode()); if (VariantUtils::typeOf< Collection >(first)) return (VariantUtils::get< Collection >(first).hashCode() == VariantUtils::get< Collection >(second).hashCode()); return false; } } LucenePlusPlus-rel_3.0.4/src/test/search/function/OrdValuesTest.cpp000066400000000000000000000172311217574114600254270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "FunctionFixture.h" #include "IndexSearcher.h" #include "ValueSource.h" #include "OrdFieldSource.h" #include "ReverseOrdFieldSource.h" #include "ValueSourceQuery.h" #include "QueryUtils.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "Document.h" #include "IndexReader.h" #include "DocValues.h" #include "VariantUtils.h" using namespace Lucene; /// Test search based on OrdFieldSource and ReverseOrdFieldSource. /// /// Tests here create an index with a few documents, each having an indexed "id" field. /// The ord values of this field are later used for scoring. /// /// The order tests use Hits to verify that docs are ordered as expected. /// /// The exact score tests use TopDocs top to verify the exact score. class OrdValuesFixture : public FunctionFixture { public: OrdValuesFixture() : FunctionFixture(false) { } virtual ~OrdValuesFixture() { } public: void doTestRank(const String& field, bool inOrder) { IndexSearcherPtr s = newLucene(dir, true); ValueSourcePtr vs; if (inOrder) vs = newLucene(field); else vs = newLucene(field); QueryPtr q = newLucene(vs); QueryUtils::check(q, s); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(N_DOCS, h.size()); String prevID = inOrder ? L"IE" : // greater than all ids of docs in this test ("ID0001", etc.) L"IC"; // smaller than all ids of docs in this test ("ID0001", etc.) for (int32_t i = 0; i < h.size(); ++i) { String resID = s->doc(h[i]->doc)->get(ID_FIELD); if (inOrder) BOOST_CHECK(resID.compare(prevID) < 0); else BOOST_CHECK(resID.compare(prevID) > 0); prevID = resID; } } void doTestExactScore(const String& field, bool inOrder) { IndexSearcherPtr s = newLucene(dir, true); ValueSourcePtr vs; if (inOrder) vs = newLucene(field); else vs = newLucene(field); QueryPtr q = newLucene(vs); TopDocsPtr td = s->search(q, FilterPtr(),1000); BOOST_CHECK_EQUAL(N_DOCS, td->totalHits); Collection sd = td->scoreDocs; for (int32_t i = 0; i < sd.size(); ++i) { double score = sd[i]->score; String id = s->getIndexReader()->document(sd[i]->doc)->get(ID_FIELD); double expectedScore = N_DOCS - i; BOOST_CHECK_CLOSE_FRACTION(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA); String expectedId = inOrder ? id2String(N_DOCS - i) : // in-order ==> larger values first id2String(i + 1); // reverse ==> smaller values first BOOST_CHECK_EQUAL(expectedId, id); } } void doTestCaching(const String& field, bool inOrder) { IndexSearcherPtr s = newLucene(dir, true); CollectionValue innerArray = VariantUtils::null(); bool warned = false; // print warning once. for (int32_t i = 0; i < 10; ++i) { ValueSourcePtr vs; if (inOrder) vs = newLucene(field); else vs = newLucene(field); ValueSourceQueryPtr q = newLucene(vs); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; try { BOOST_CHECK_EQUAL(N_DOCS, h.size()); Collection readers = s->getIndexReader()->getSequentialSubReaders(); for (int32_t j = 0; j < readers.size(); ++j) { IndexReaderPtr reader = readers[j]; if (i == 0) innerArray = q->valSrc->getValues(reader)->getInnerArray(); else BOOST_CHECK(equalCollectionValues(innerArray, q->valSrc->getValues(reader)->getInnerArray())); } } catch (UnsupportedOperationException&) { if (!warned) { BOOST_TEST_MESSAGE("WARNING: Cannot fully test values of " << StringUtils::toUTF8(q->toString())); warned = true; } } } // verify that different values are loaded for a different field String field2 = INT_FIELD; BOOST_CHECK_NE(field, field2); // otherwise this test is meaningless. ValueSourcePtr vs; if (inOrder) vs = newLucene(field2); else vs = newLucene(field2); ValueSourceQueryPtr q = newLucene(vs); Collection h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(N_DOCS, h.size()); Collection readers = s->getIndexReader()->getSequentialSubReaders(); for (int32_t j = 0; j < readers.size(); ++j) { IndexReaderPtr reader = readers[j]; try { BOOST_CHECK(!equalCollectionValues(innerArray, q->valSrc->getValues(reader)->getInnerArray())); } catch (UnsupportedOperationException&) { if (!warned) { BOOST_TEST_MESSAGE("WARNING: Cannot fully test values of " << StringUtils::toUTF8(q->toString())); warned = true; } } } // verify new values are reloaded (not reused) for a new reader s = newLucene(dir, true); if (inOrder) vs = newLucene(field); else vs = newLucene(field); q = newLucene(vs); h = s->search(q, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(N_DOCS, h.size()); readers = s->getIndexReader()->getSequentialSubReaders(); for (int32_t j = 0; j < readers.size(); ++j) { IndexReaderPtr reader = readers[j]; try { BOOST_CHECK(!equalCollectionValues(innerArray, q->valSrc->getValues(reader)->getInnerArray())); } catch (UnsupportedOperationException&) { if (!warned) { BOOST_TEST_MESSAGE("WARNING: Cannot fully test values of " << StringUtils::toUTF8(q->toString())); warned = true; } } } } }; BOOST_FIXTURE_TEST_SUITE(OrdValuesTest, OrdValuesFixture) BOOST_AUTO_TEST_CASE(testOrdFieldRank) { doTestRank(ID_FIELD, true); } BOOST_AUTO_TEST_CASE(testReverseOrdFieldRank) { doTestRank(ID_FIELD, false); } BOOST_AUTO_TEST_CASE(testOrdFieldExactScore) { doTestExactScore(ID_FIELD, true); } BOOST_AUTO_TEST_CASE(testReverseOrdFieldExactScore) { doTestExactScore(ID_FIELD, false); } BOOST_AUTO_TEST_CASE(testCachingOrd) { doTestCaching(ID_FIELD, true); } BOOST_AUTO_TEST_CASE(testCachingReverseOrd) { doTestCaching(ID_FIELD, false); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/payloads/000077500000000000000000000000001217574114600221425ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/search/payloads/PayloadHelper.cpp000066400000000000000000000112301217574114600253740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "PayloadHelper.h" #include "TestUtils.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "PayloadAttribute.h" #include "TokenFilter.h" #include "Payload.h" #include "LowerCaseTokenizer.h" #include "Analyzer.h" namespace Lucene { const String PayloadHelper::NO_PAYLOAD_FIELD = L"noPayloadField"; const String PayloadHelper::MULTI_FIELD = L"multiField"; const String PayloadHelper::FIELD = L"field"; DECLARE_SHARED_PTR(PayloadHelperAnalyzer) class PayloadHelperFilter : public TokenFilter { public: PayloadHelperFilter(TokenStreamPtr input, const String& fieldName) : TokenFilter(input) { this->numSeen = 0; this->fieldName = fieldName; this->payloadAtt = addAttribute(); } virtual ~PayloadHelperFilter() { } LUCENE_CLASS(PayloadHelperFilter); public: String fieldName; int32_t numSeen; PayloadAttributePtr payloadAtt; public: virtual bool incrementToken() { if (input->incrementToken()) { if (fieldName == PayloadHelper::FIELD) payloadAtt->setPayload(newLucene(PayloadHelper::payloadField())); else if (fieldName == PayloadHelper::MULTI_FIELD) { if (numSeen % 2 == 0) payloadAtt->setPayload(newLucene(PayloadHelper::payloadMultiField1())); else payloadAtt->setPayload(newLucene(PayloadHelper::payloadMultiField2())); ++numSeen; } return true; } return false; } }; class PayloadHelperAnalyzer : public Analyzer { public: virtual ~PayloadHelperAnalyzer() { } LUCENE_CLASS(PayloadHelperAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result, fieldName); return result; } }; PayloadHelper::~PayloadHelper() { } const ByteArray PayloadHelper::payloadField() { static ByteArray _payloadField; if (!_payloadField) { _payloadField = ByteArray::newInstance(1); _payloadField[0] = 1; } return _payloadField; } const ByteArray PayloadHelper::payloadMultiField1() { static ByteArray _payloadMultiField1; if (!_payloadMultiField1) { _payloadMultiField1 = ByteArray::newInstance(1); _payloadMultiField1[0] = 2; } return _payloadMultiField1; } const ByteArray PayloadHelper::payloadMultiField2() { static ByteArray _payloadMultiField2; if (!_payloadMultiField2) { _payloadMultiField2 = ByteArray::newInstance(1); _payloadMultiField2[0] = 4; } return _payloadMultiField2; } IndexSearcherPtr PayloadHelper::setUp(SimilarityPtr similarity, int32_t numDocs) { RAMDirectoryPtr directory = newLucene(); PayloadHelperAnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setSimilarity(similarity); for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD, intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(MULTI_FIELD, intToEnglish(i) + L" " + intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(NO_PAYLOAD_FIELD, intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); IndexSearcherPtr searcher = newLucene(directory, true); searcher->setSimilarity(similarity); return searcher; } } LucenePlusPlus-rel_3.0.4/src/test/search/payloads/PayloadNearQueryTest.cpp000066400000000000000000000233601217574114600267370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "DefaultSimilarity.h" #include "RAMDirectory.h" #include "PayloadAttribute.h" #include "TokenFilter.h" #include "Payload.h" #include "LowerCaseTokenizer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "IndexSearcher.h" #include "PayloadNearQuery.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "QueryUtils.h" #include "Analyzer.h" #include "SpanQuery.h" #include "PayloadTermQuery.h" #include "Term.h" #include "AveragePayloadFunction.h" using namespace Lucene; DECLARE_SHARED_PTR(BoostingNearSimilarity) DECLARE_SHARED_PTR(PayloadNearAnalyzer) class BoostingNearIDFExplanation : public IDFExplanation { public: virtual ~BoostingNearIDFExplanation() { } public: virtual double getIdf() { return 1.0; } virtual String explain() { return L"Inexplicable"; } }; class BoostingNearSimilarity : public DefaultSimilarity { public: virtual ~BoostingNearSimilarity() { } public: virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) { // we know it is size 4 here, so ignore the offset/length return (double)payload[0]; } virtual double lengthNorm(const String& fieldName, int32_t numTokens) { return 1.0; } virtual double queryNorm(double sumOfSquaredWeights) { return 1.0; } virtual double sloppyFreq(int32_t distance) { return 1.0; } virtual double coord(int32_t overlap, int32_t maxOverlap) { return 1.0; } virtual double tf(double freq) { return 1.0; } virtual IDFExplanationPtr idfExplain(Collection terms, SearcherPtr searcher) { return newLucene(); } }; class PayloadNearFilter : public TokenFilter { public: PayloadNearFilter(ByteArray payload2, ByteArray payload4, TokenStreamPtr input, const String& fieldName) : TokenFilter(input) { this->payload2 = payload2; this->payload4 = payload4; this->numSeen = 0; this->fieldName = fieldName; this->payAtt = addAttribute(); } virtual ~PayloadNearFilter() { } LUCENE_CLASS(PayloadNearFilter); public: ByteArray payload2; ByteArray payload4; String fieldName; int32_t numSeen; PayloadAttributePtr payAtt; public: virtual bool incrementToken() { bool result = false; if (input->incrementToken()) { if (numSeen % 2 == 0) payAtt->setPayload(newLucene(payload2)); else payAtt->setPayload(newLucene(payload4)); ++numSeen; result = true; } return result; } }; class PayloadNearAnalyzer : public Analyzer { public: PayloadNearAnalyzer(ByteArray payload2, ByteArray payload4) { this->payload2 = payload2; this->payload4 = payload4; } virtual ~PayloadNearAnalyzer() { } LUCENE_CLASS(PayloadNearAnalyzer); protected: ByteArray payload2; ByteArray payload4; public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(payload2, payload4, result, fieldName); return result; } }; class PayloadNearQueryFixture : public LuceneTestFixture { public: PayloadNearQueryFixture() { similarity = newLucene(); payload2 = ByteArray::newInstance(1); payload2[0] = 2; payload4 = ByteArray::newInstance(1); payload4[0] = 4; RAMDirectoryPtr directory = newLucene(); PayloadNearAnalyzerPtr analyzer = newLucene(payload2, payload4); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setSimilarity(similarity); for (int32_t i = 0; i < 1000; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED)); String txt = intToEnglish(i) + L" " + intToEnglish(i + 1); doc->add(newLucene(L"field2", txt, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->optimize(); writer->close(); searcher = newLucene(directory, true); searcher->setSimilarity(similarity); } virtual ~PayloadNearQueryFixture() { } protected: IndexSearcherPtr searcher; BoostingNearSimilarityPtr similarity; ByteArray payload2; ByteArray payload4; public: PayloadNearQueryPtr newPhraseQuery(const String& fieldName, const String& phrase, bool inOrder) { std::wstring phraseClauses(phrase.c_str()); Collection clauses = Collection::newInstance(); boost::wsregex_token_iterator tokenIterator(phraseClauses.begin(), phraseClauses.end(), boost::wregex(L"[\\s]+"), -1); boost::wsregex_token_iterator endToken; while (tokenIterator != endToken) { clauses.add(newLucene(newLucene(fieldName, *tokenIterator), newLucene())); ++tokenIterator; } return newLucene(clauses, 0, inOrder); } SpanNearQueryPtr spanNearQuery(const String& fieldName, const String& words) { std::wstring phraseClauses(words.c_str()); Collection clauses = Collection::newInstance(); boost::wsregex_token_iterator tokenIterator(phraseClauses.begin(), phraseClauses.end(), boost::wregex(L"[\\s]+"), -1); boost::wsregex_token_iterator endToken; while (tokenIterator != endToken) { clauses.add(newLucene(newLucene(fieldName, *tokenIterator), newLucene())); ++tokenIterator; } return newLucene(clauses, 10000, false); } }; BOOST_FIXTURE_TEST_SUITE(PayloadNearQueryTest, PayloadNearQueryFixture) BOOST_AUTO_TEST_CASE(testSetup) { PayloadNearQueryPtr query = newPhraseQuery(L"field", L"twenty two", true); QueryUtils::check(query); // all 10 hits should have score = 3 because adjacent terms have payloads of 2, 4 and all the similarity factors are set to 1 TopDocsPtr hits = searcher->search(query, FilterPtr(), 100); BOOST_CHECK(hits); BOOST_CHECK_EQUAL(hits->totalHits, 10); for (int32_t j = 0; j < hits->scoreDocs.size(); ++j) { ScoreDocPtr doc = hits->scoreDocs[j]; BOOST_CHECK_EQUAL(doc->score, 3); } for (int32_t i = 1; i < 10; ++i) { query = newPhraseQuery(L"field", intToEnglish(i) + L" hundred", true); // all should have score = 3 because adjacent terms have payloads of 2, 4 and all the similarity factors are set to 1 hits = searcher->search(query, FilterPtr(), 100); BOOST_CHECK(hits); BOOST_CHECK_EQUAL(hits->totalHits, 100); for (int32_t j = 0; j < hits->scoreDocs.size(); ++j) { ScoreDocPtr doc = hits->scoreDocs[j]; BOOST_CHECK_EQUAL(doc->score, 3); } } } BOOST_AUTO_TEST_CASE(testPayloadNear) { SpanNearQueryPtr q1 = spanNearQuery(L"field2", L"twenty two"); SpanNearQueryPtr q2 = spanNearQuery(L"field2", L"twenty three"); Collection clauses = newCollection(q1, q2); PayloadNearQueryPtr query = newLucene(clauses, 10, false); BOOST_CHECK_EQUAL(12, searcher->search(query, FilterPtr(), 100)->totalHits); } BOOST_AUTO_TEST_CASE(testLongerSpan) { SpanNearQueryPtr query = newPhraseQuery(L"field", L"nine hundred ninety nine", true); TopDocsPtr hits = searcher->search(query, FilterPtr(), 100); BOOST_CHECK(hits); ScoreDocPtr doc = hits->scoreDocs[0]; BOOST_CHECK_EQUAL(hits->totalHits, 1); // should have score = 3 because adjacent terms have payloads of 2,4 BOOST_CHECK_EQUAL(doc->score, 3); } BOOST_AUTO_TEST_CASE(testComplexNested) { // combine ordered and unordered spans with some nesting to make sure all payloads are counted SpanQueryPtr q1 = newPhraseQuery(L"field", L"nine hundred", true); SpanQueryPtr q2 = newPhraseQuery(L"field", L"ninety nine", true); SpanQueryPtr q3 = newPhraseQuery(L"field", L"nine ninety", false); SpanQueryPtr q4 = newPhraseQuery(L"field", L"hundred nine", false); Collection clauses = newCollection( newLucene(newCollection(q1, q2), 0, true), newLucene(newCollection(q3, q4), 0, false) ); PayloadNearQueryPtr query = newLucene(clauses, 0, false); TopDocsPtr hits = searcher->search(query, FilterPtr(), 100); BOOST_CHECK(hits); // should be only 1 hit - doc 999 BOOST_CHECK_EQUAL(hits->scoreDocs.size(), 1); // the score should be 3 - the average of all the underlying payloads ScoreDocPtr doc = hits->scoreDocs[0]; BOOST_CHECK_EQUAL(doc->score, 3); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/payloads/PayloadTermQueryTest.cpp000066400000000000000000000304341217574114600267610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "IndexSearcher.h" #include "DefaultSimilarity.h" #include "RAMDirectory.h" #include "Analyzer.h" #include "TokenFilter.h" #include "LowerCaseTokenizer.h" #include "PayloadAttribute.h" #include "Payload.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "PayloadTermQuery.h" #include "Term.h" #include "MaxPayloadFunction.h" #include "AveragePayloadFunction.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "CheckHits.h" #include "TermSpans.h" #include "SpanTermQuery.h" #include "QueryUtils.h" #include "BooleanClause.h" #include "BooleanQuery.h" #include "PayloadHelper.h" #include "MiscUtils.h" using namespace Lucene; DECLARE_SHARED_PTR(BoostingTermSimilarity) DECLARE_SHARED_PTR(PayloadTermAnalyzer) class BoostingTermSimilarity : public DefaultSimilarity { public: virtual ~BoostingTermSimilarity() { } public: virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) { // we know it is size 4 here, so ignore the offset/length return (double)payload[0]; } virtual double lengthNorm(const String& fieldName, int32_t numTokens) { return 1.0; } virtual double queryNorm(double sumOfSquaredWeights) { return 1.0; } virtual double sloppyFreq(int32_t distance) { return 1.0; } virtual double coord(int32_t overlap, int32_t maxOverlap) { return 1.0; } virtual double idf(int32_t docFreq, int32_t numDocs) { return 1.0; } virtual double tf(double freq) { return freq == 0.0 ? 0.0 : 1.0; } }; class FullSimilarity : public DefaultSimilarity { public: virtual ~FullSimilarity() { } public: virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) { // we know it is size 4 here, so ignore the offset/length return payload[0]; } }; class PayloadTermFilter : public TokenFilter { public: PayloadTermFilter(ByteArray payloadField, ByteArray payloadMultiField1, ByteArray payloadMultiField2, TokenStreamPtr input, const String& fieldName) : TokenFilter(input) { this->payloadField = payloadField; this->payloadMultiField1 = payloadMultiField1; this->payloadMultiField2 = payloadMultiField2; this->numSeen = 0; this->fieldName = fieldName; this->payloadAtt = addAttribute(); } virtual ~PayloadTermFilter() { } LUCENE_CLASS(PayloadTermFilter); public: ByteArray payloadField; ByteArray payloadMultiField1; ByteArray payloadMultiField2; String fieldName; int32_t numSeen; PayloadAttributePtr payloadAtt; public: virtual bool incrementToken() { bool hasNext = input->incrementToken(); if (hasNext) { if (fieldName == L"field") payloadAtt->setPayload(newLucene(payloadField)); else if (fieldName == L"multiField") { if (numSeen % 2 == 0) payloadAtt->setPayload(newLucene(payloadMultiField1)); else payloadAtt->setPayload(newLucene(payloadMultiField2)); ++numSeen; } return true; } else return false; } }; class PayloadTermAnalyzer : public Analyzer { public: PayloadTermAnalyzer(ByteArray payloadField, ByteArray payloadMultiField1, ByteArray payloadMultiField2) { this->payloadField = payloadField; this->payloadMultiField1 = payloadMultiField1; this->payloadMultiField2 = payloadMultiField2; } virtual ~PayloadTermAnalyzer() { } LUCENE_CLASS(PayloadTermAnalyzer); protected: ByteArray payloadField; ByteArray payloadMultiField1; ByteArray payloadMultiField2; public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(payloadField, payloadMultiField1, payloadMultiField2, result, fieldName); return result; } }; class PayloadTermQueryFixture : public LuceneTestFixture { public: PayloadTermQueryFixture() { similarity = newLucene(); payloadField = ByteArray::newInstance(1); payloadField[0] = 1; payloadMultiField1 = ByteArray::newInstance(1); payloadMultiField1[0] = 2; payloadMultiField2 = ByteArray::newInstance(1); payloadMultiField2[0] = 4; directory = newLucene(); PayloadTermAnalyzerPtr analyzer = newLucene(payloadField, payloadMultiField1, payloadMultiField2); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); writer->setSimilarity(similarity); for (int32_t i = 0; i < 1000; ++i) { DocumentPtr doc = newLucene(); FieldPtr noPayloadField = newLucene(PayloadHelper::NO_PAYLOAD_FIELD, intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED); doc->add(noPayloadField); doc->add(newLucene(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"multiField", intToEnglish(i) + L" " + intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->optimize(); writer->close(); searcher = newLucene(directory, true); searcher->setSimilarity(similarity); } virtual ~PayloadTermQueryFixture() { } protected: IndexSearcherPtr searcher; BoostingTermSimilarityPtr similarity; ByteArray payloadField; ByteArray payloadMultiField1; ByteArray payloadMultiField2; RAMDirectoryPtr directory; }; BOOST_FIXTURE_TEST_SUITE(PayloadTermQueryTest, PayloadTermQueryFixture) BOOST_AUTO_TEST_CASE(testSetup) { PayloadTermQueryPtr query = newLucene(newLucene(L"field", L"seventy"), newLucene()); TopDocsPtr hits = searcher->search(query, FilterPtr(), 100); BOOST_CHECK(hits); BOOST_CHECK_EQUAL(hits->totalHits, 100); // they should all have the exact same score, because they all contain seventy once, and we set all the other similarity factors to be 1 BOOST_CHECK_EQUAL(hits->getMaxScore(), 1); for (int32_t i = 0; i < hits->scoreDocs.size(); ++i) { ScoreDocPtr doc = hits->scoreDocs[i]; BOOST_CHECK_EQUAL(doc->score, 1); } CheckHits::checkExplanations(query, PayloadHelper::FIELD, searcher, true); SpansPtr spans = query->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans); BOOST_CHECK(MiscUtils::typeOf(spans)); } BOOST_AUTO_TEST_CASE(testQuery) { PayloadTermQueryPtr BoostingTermFuncTermQuery = newLucene(newLucene(PayloadHelper::MULTI_FIELD, L"seventy"), newLucene()); QueryUtils::check(BoostingTermFuncTermQuery); SpanTermQueryPtr spanTermQuery = newLucene(newLucene(PayloadHelper::MULTI_FIELD, L"seventy")); BOOST_CHECK(BoostingTermFuncTermQuery->equals(spanTermQuery) == spanTermQuery->equals(BoostingTermFuncTermQuery)); PayloadTermQueryPtr BoostingTermFuncTermQuery2 = newLucene(newLucene(PayloadHelper::MULTI_FIELD, L"seventy"), newLucene()); QueryUtils::checkUnequal(BoostingTermFuncTermQuery, BoostingTermFuncTermQuery2); } BOOST_AUTO_TEST_CASE(testMultipleMatchesPerDoc) { PayloadTermQueryPtr query = newLucene(newLucene(PayloadHelper::MULTI_FIELD, L"seventy"), newLucene()); TopDocsPtr hits = searcher->search(query, FilterPtr(), 100); BOOST_CHECK(hits); BOOST_CHECK_EQUAL(hits->totalHits, 100); // they should all have the exact same score, because they all contain seventy once, and we set all the other similarity factors to be 1 BOOST_CHECK_EQUAL(hits->getMaxScore(), 4.0); // there should be exactly 10 items that score a 4, all the rest should score a 2 // The 10 items are: 70 + i*100 where i in [0-9] int32_t numTens = 0; for (int32_t i = 0; i < hits->scoreDocs.size(); ++i) { ScoreDocPtr doc = hits->scoreDocs[i]; if (doc->doc % 10 == 0) { ++numTens; BOOST_CHECK_EQUAL(doc->score, 4.0); } else BOOST_CHECK_EQUAL(doc->score, 2.0); } BOOST_CHECK_EQUAL(numTens, 10); CheckHits::checkExplanations(query, L"field", searcher, true); SpansPtr spans = query->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans); BOOST_CHECK(MiscUtils::typeOf(spans)); // should be two matches per document int32_t count = 0; // 100 hits times 2 matches per hit, we should have 200 in count while (spans->next()) ++count; BOOST_CHECK_EQUAL(count, 200); } BOOST_AUTO_TEST_CASE(testIgnoreSpanScorer) { PayloadTermQueryPtr query = newLucene(newLucene(PayloadHelper::MULTI_FIELD, L"seventy"), newLucene(), false); IndexSearcherPtr theSearcher = newLucene(directory, true); theSearcher->setSimilarity(newLucene()); TopDocsPtr hits = searcher->search(query, FilterPtr(), 100); BOOST_CHECK(hits); BOOST_CHECK_EQUAL(hits->totalHits, 100); // they should all have the exact same score, because they all contain seventy once, and we set all the other similarity factors to be 1 BOOST_CHECK_EQUAL(hits->getMaxScore(), 4.0); // there should be exactly 10 items that score a 4, all the rest should score a 2 // The 10 items are: 70 + i*100 where i in [0-9] int32_t numTens = 0; for (int32_t i = 0; i < hits->scoreDocs.size(); ++i) { ScoreDocPtr doc = hits->scoreDocs[i]; if (doc->doc % 10 == 0) { ++numTens; BOOST_CHECK_EQUAL(doc->score, 4.0); } else BOOST_CHECK_EQUAL(doc->score, 2.0); } BOOST_CHECK_EQUAL(numTens, 10); CheckHits::checkExplanations(query, L"field", searcher, true); SpansPtr spans = query->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans); BOOST_CHECK(MiscUtils::typeOf(spans)); // should be two matches per document int32_t count = 0; // 100 hits times 2 matches per hit, we should have 200 in count while (spans->next()) ++count; BOOST_CHECK_EQUAL(count, 200); } BOOST_AUTO_TEST_CASE(testNoMatch) { PayloadTermQueryPtr query = newLucene(newLucene(PayloadHelper::FIELD, L"junk"), newLucene()); TopDocsPtr hits = searcher->search(query, FilterPtr(), 100); BOOST_CHECK(hits); BOOST_CHECK_EQUAL(hits->totalHits, 0); } BOOST_AUTO_TEST_CASE(testNoPayload) { PayloadTermQueryPtr q1 = newLucene(newLucene(PayloadHelper::NO_PAYLOAD_FIELD, L"zero"), newLucene()); PayloadTermQueryPtr q2 = newLucene(newLucene(PayloadHelper::NO_PAYLOAD_FIELD, L"foo"), newLucene()); BooleanClausePtr c1 = newLucene(q1, BooleanClause::MUST); BooleanClausePtr c2 = newLucene(q2, BooleanClause::MUST_NOT); BooleanQueryPtr query = newLucene(); query->add(c1); query->add(c2); TopDocsPtr hits = searcher->search(query, FilterPtr(), 100); BOOST_CHECK(hits); BOOST_CHECK_EQUAL(hits->totalHits, 1); Collection results = newCollection(0); CheckHits::checkHitCollector(query, PayloadHelper::NO_PAYLOAD_FIELD, searcher, results); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/spans/000077500000000000000000000000001217574114600214525ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/search/spans/BasicSpansTest.cpp000066400000000000000000000430011217574114600250420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Explanation.h" #include "TestUtils.h" #include "IndexSearcher.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "TermQuery.h" #include "Term.h" #include "CheckHits.h" #include "PhraseQuery.h" #include "BooleanQuery.h" #include "SpanTermQuery.h" #include "SpanNearQuery.h" #include "QueryUtils.h" #include "SpanNotQuery.h" #include "SpanOrQuery.h" #include "SpanFirstQuery.h" using namespace Lucene; /// Tests basic search capabilities. /// /// Uses a collection of 1000 documents, each the english rendition of their document number. /// For example, the document numbered 333 has text "three hundred thirty three". /// /// Tests are each a single query, and its hits are checked to ensure that all and only the /// correct documents are returned, thus providing end-to-end testing of the indexing and /// search code. class BasicSpansFixture : public LuceneTestFixture { public: BasicSpansFixture() { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 1000; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); searcher = newLucene(directory, true); } virtual ~BasicSpansFixture() { } protected: IndexSearcherPtr searcher; public: void checkHits(QueryPtr query, Collection results) { CheckHits::checkHits(query, L"field", searcher, results); } bool skipTo(SpansPtr s, int32_t target) { do { if (!s->next()) return false; } while (target > s->doc()); return true; } }; BOOST_FIXTURE_TEST_SUITE(BasicSpansTest, BasicSpansFixture) BOOST_AUTO_TEST_CASE(testTerm) { QueryPtr query = newLucene(newLucene(L"field", L"seventy")); static const int32_t results[] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979 }; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); } BOOST_AUTO_TEST_CASE(testTerm2) { QueryPtr query = newLucene(newLucene(L"field", L"seventish")); checkHits(query, Collection::newInstance()); } BOOST_AUTO_TEST_CASE(testPhrase) { PhraseQueryPtr query = newLucene(); query->add(newLucene(L"field", L"seventy")); query->add(newLucene(L"field", L"seven")); static const int32_t results[] = {77, 177, 277, 377, 477, 577, 677, 777, 877, 977}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); } BOOST_AUTO_TEST_CASE(testPhrase2) { PhraseQueryPtr query = newLucene(); query->add(newLucene(L"field", L"seventish")); query->add(newLucene(L"field", L"sevenon")); checkHits(query, Collection::newInstance()); } BOOST_AUTO_TEST_CASE(testBoolean) { BooleanQueryPtr query = newLucene(); query->add(newLucene(newLucene(L"field", L"seventy")), BooleanClause::MUST); query->add(newLucene(newLucene(L"field", L"seven")), BooleanClause::MUST); static const int32_t results[] = { 77, 777, 177, 277, 377, 477, 577, 677, 770, 771, 772, 773, 774, 775, 776, 778, 779, 877, 977 }; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); } BOOST_AUTO_TEST_CASE(testBoolean2) { BooleanQueryPtr query = newLucene(); query->add(newLucene(newLucene(L"field", L"sevento")), BooleanClause::MUST); query->add(newLucene(newLucene(L"field", L"sevenly")), BooleanClause::MUST); checkHits(query, Collection::newInstance()); } BOOST_AUTO_TEST_CASE(testSpanNearExact) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"seventy")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"seven")); SpanNearQueryPtr query = newLucene(newCollection(term1, term2), 0, true); static const int32_t results[] = {77, 177, 277, 377, 477, 577, 677, 777, 877, 977}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); BOOST_CHECK(searcher->explain(query, 77)->getValue() > 0.0); BOOST_CHECK(searcher->explain(query, 977)->getValue() > 0.0); QueryUtils::check(term1); QueryUtils::check(term2); QueryUtils::checkUnequal(term1, term2); } BOOST_AUTO_TEST_CASE(testSpanNearUnordered) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"nine")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"six")); SpanNearQueryPtr query = newLucene(newCollection(term1, term2), 4, false); static const int32_t results[] = {609, 629, 639, 649, 659, 669, 679, 689, 699, 906, 926, 936, 946, 956, 966, 976, 986, 996}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); } BOOST_AUTO_TEST_CASE(testSpanNearOrdered) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"nine")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"six")); SpanNearQueryPtr query = newLucene(newCollection(term1, term2), 4, true); static const int32_t results[] = {906, 926, 936, 946, 956, 966, 976, 986, 996}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); } BOOST_AUTO_TEST_CASE(testSpanNot) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"eight")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"one")); SpanNearQueryPtr near1 = newLucene(newCollection(term1, term2), 4, true); SpanTermQueryPtr term3 = newLucene(newLucene(L"field", L"forty")); SpanNotQueryPtr query = newLucene(near1, term3); static const int32_t results[] = {801, 821, 831, 851, 861, 871, 881, 891}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); BOOST_CHECK(searcher->explain(query, 801)->getValue() > 0.0); BOOST_CHECK(searcher->explain(query, 891)->getValue() > 0.0); } BOOST_AUTO_TEST_CASE(testSpanWithMultipleNotSingle) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"eight")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"one")); SpanNearQueryPtr near1 = newLucene(newCollection(term1, term2), 4, true); SpanTermQueryPtr term3 = newLucene(newLucene(L"field", L"forty")); SpanOrQueryPtr or1 = newLucene(newCollection(term3)); SpanNotQueryPtr query = newLucene(near1, or1); static const int32_t results[] = {801, 821, 831, 851, 861, 871, 881, 891}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); BOOST_CHECK(searcher->explain(query, 801)->getValue() > 0.0); BOOST_CHECK(searcher->explain(query, 891)->getValue() > 0.0); } BOOST_AUTO_TEST_CASE(testSpanWithMultipleNotMany) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"eight")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"one")); SpanNearQueryPtr near1 = newLucene(newCollection(term1, term2), 4, true); SpanTermQueryPtr term3 = newLucene(newLucene(L"field", L"forty")); SpanTermQueryPtr term4 = newLucene(newLucene(L"field", L"sixty")); SpanTermQueryPtr term5 = newLucene(newLucene(L"field", L"eighty")); SpanOrQueryPtr or1 = newLucene(newCollection(term3, term4, term5)); SpanNotQueryPtr query = newLucene(near1, or1); static const int32_t results[] = {801, 821, 831, 851, 871, 891}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); BOOST_CHECK(searcher->explain(query, 801)->getValue() > 0.0); BOOST_CHECK(searcher->explain(query, 891)->getValue() > 0.0); } BOOST_AUTO_TEST_CASE(testNpeInSpanNearWithSpanNot) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"eight")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"one")); SpanNearQueryPtr near1 = newLucene(newCollection(term1, term2), 4, true); SpanTermQueryPtr hun = newLucene(newLucene(L"field", L"hundred")); SpanTermQueryPtr term3 = newLucene(newLucene(L"field", L"forty")); SpanNearQueryPtr exclude1 = newLucene(newCollection(hun, term3), 1, true); SpanNotQueryPtr query = newLucene(near1, exclude1); static const int32_t results[] = {801, 821, 831, 851, 861, 871, 881, 891}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); BOOST_CHECK(searcher->explain(query, 801)->getValue() > 0.0); BOOST_CHECK(searcher->explain(query, 891)->getValue() > 0.0); } BOOST_AUTO_TEST_CASE(testNpeInSpanNearInSpanFirstInSpanNot) { int32_t n = 5; SpanTermQueryPtr hun = newLucene(newLucene(L"field", L"hundred")); SpanTermQueryPtr term40 = newLucene(newLucene(L"field", L"forty")); SpanTermQueryPtr term40c = boost::dynamic_pointer_cast(term40->clone()); SpanFirstQueryPtr include = newLucene(term40, n); SpanNearQueryPtr near1 = newLucene(newCollection(hun, term40c), n - 1, true); SpanFirstQueryPtr exclude = newLucene(near1, n - 1); SpanNotQueryPtr query = newLucene(include, exclude); static const int32_t results[] = {40, 41, 42, 43, 44, 45, 46, 47, 48, 49}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); } BOOST_AUTO_TEST_CASE(testSpanFirst) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"five")); SpanFirstQueryPtr query = newLucene(term1, 1); static const int32_t results[] = { 5, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599 }; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); BOOST_CHECK(searcher->explain(query, 5)->getValue() > 0.0); BOOST_CHECK(searcher->explain(query, 599)->getValue() > 0.0); } BOOST_AUTO_TEST_CASE(testSpanOr) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"thirty")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"three")); SpanNearQueryPtr near1 = newLucene(newCollection(term1, term2), 0, true); SpanTermQueryPtr term3 = newLucene(newLucene(L"field", L"forty")); SpanTermQueryPtr term4 = newLucene(newLucene(L"field", L"seven")); SpanNearQueryPtr near2 = newLucene(newCollection(term3, term4), 0, true); SpanOrQueryPtr query = newLucene(newCollection(near1, near2)); static const int32_t results[] = { 33, 47, 133, 147, 233, 247, 333, 347, 433, 447, 533, 547, 633, 647, 733, 747, 833, 847, 933, 947 }; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); BOOST_CHECK(searcher->explain(query, 33)->getValue() > 0.0); BOOST_CHECK(searcher->explain(query, 947)->getValue() > 0.0); } BOOST_AUTO_TEST_CASE(testSpanExactNested) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"three")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"hundred")); SpanNearQueryPtr near1 = newLucene(newCollection(term1, term2), 0, true); SpanTermQueryPtr term3 = newLucene(newLucene(L"field", L"thirty")); SpanTermQueryPtr term4 = newLucene(newLucene(L"field", L"three")); SpanNearQueryPtr near2 = newLucene(newCollection(term3, term4), 0, true); SpanNearQueryPtr query = newLucene(newCollection(near1, near2), 0, true); static const int32_t results[] = {333}; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); BOOST_CHECK(searcher->explain(query, 333)->getValue() > 0.0); } BOOST_AUTO_TEST_CASE(testSpanNearOr) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"six")); SpanTermQueryPtr term3 = newLucene(newLucene(L"field", L"seven")); SpanTermQueryPtr term5 = newLucene(newLucene(L"field", L"seven")); SpanTermQueryPtr term6 = newLucene(newLucene(L"field", L"six")); SpanOrQueryPtr to1 = newLucene(newCollection(term1, term3)); SpanOrQueryPtr to2 = newLucene(newCollection(term5, term6)); SpanNearQueryPtr query = newLucene(newCollection(to1, to2), 10, true); static const int32_t results[] = { 606, 607, 626, 627, 636, 637, 646, 647, 656, 657, 666, 667, 676, 677, 686, 687, 696, 697, 706, 707, 726, 727, 736, 737, 746, 747, 756, 757, 766, 767, 776, 777, 786, 787, 796, 797 }; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); } BOOST_AUTO_TEST_CASE(testSpanComplex1) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"six")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"hundred")); SpanNearQueryPtr near1 = newLucene(newCollection(term1, term2), 0, true); SpanTermQueryPtr term3 = newLucene(newLucene(L"field", L"seven")); SpanTermQueryPtr term4 = newLucene(newLucene(L"field", L"hundred")); SpanNearQueryPtr near2 = newLucene(newCollection(term3, term4), 0, true); SpanTermQueryPtr term5 = newLucene(newLucene(L"field", L"seven")); SpanTermQueryPtr term6 = newLucene(newLucene(L"field", L"six")); SpanOrQueryPtr to1 = newLucene(newCollection(near1, near2)); SpanOrQueryPtr to2 = newLucene(newCollection(term5, term6)); SpanNearQueryPtr query = newLucene(newCollection(to1, to2), 100, true); static const int32_t results[] = { 606, 607, 626, 627, 636, 637, 646, 647, 656, 657, 666, 667, 676, 677, 686, 687, 696, 697, 706, 707, 726, 727, 736, 737, 746, 747, 756, 757, 766, 767, 776, 777, 786, 787, 796, 797 }; checkHits(query, Collection::newInstance(results, results + SIZEOF_ARRAY(results))); } BOOST_AUTO_TEST_CASE(testSpansSkipTo) { SpanTermQueryPtr term1 = newLucene(newLucene(L"field", L"seventy")); SpanTermQueryPtr term2 = newLucene(newLucene(L"field", L"seventy")); SpansPtr spans1 = term1->getSpans(searcher->getIndexReader()); SpansPtr spans2 = term2->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans1->next()); BOOST_CHECK(spans2->next()); bool hasMore = true; do { hasMore = skipTo(spans1, spans2->doc()); BOOST_CHECK_EQUAL(hasMore, spans2->skipTo(spans2->doc())); BOOST_CHECK_EQUAL(spans1->doc(), spans2->doc()); } while (hasMore); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/spans/FieldMaskingSpanQueryTest.cpp000066400000000000000000000311031217574114600272210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "IndexSearcher.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "FieldMaskingSpanQuery.h" #include "SpanTermQuery.h" #include "SpanOrQuery.h" #include "SpanNearQuery.h" #include "Term.h" #include "QueryUtils.h" #include "CheckHits.h" #include "Spans.h" using namespace Lucene; class FieldMaskingSpanQueryFixture : public LuceneTestFixture { public: FieldMaskingSpanQueryFixture() { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer= newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc(newCollection( field(L"id", L"0"), field(L"gender", L"male"), field(L"first", L"james"), field(L"last", L"jones"))) ); writer->addDocument(doc(newCollection( field(L"id", L"1"), field(L"gender", L"male"), field(L"first", L"james"), field(L"last", L"smith"), field(L"gender", L"female"), field(L"first", L"sally"), field(L"last", L"jones"))) ); writer->addDocument(doc(newCollection( field(L"id", L"2"), field(L"gender", L"female"), field(L"first", L"greta"), field(L"last", L"jones"), field(L"gender", L"female"), field(L"first", L"sally"), field(L"last", L"smith"), field(L"gender", L"male"), field(L"first", L"james"), field(L"last", L"jones"))) ); writer->addDocument(doc(newCollection( field(L"id", L"3"), field(L"gender", L"female"), field(L"first", L"lisa"), field(L"last", L"jones"), field(L"gender", L"male"), field(L"first", L"bob"), field(L"last", L"costas"))) ); writer->addDocument(doc(newCollection( field(L"id", L"4"), field(L"gender", L"female"), field(L"first", L"sally"), field(L"last", L"smith"), field(L"gender", L"female"), field(L"first", L"linda"), field(L"last", L"dixit"), field(L"gender", L"male"), field(L"first", L"bubba"), field(L"last", L"jones"))) ); writer->close(); searcher = newLucene(directory, true); } virtual ~FieldMaskingSpanQueryFixture() { searcher->close(); } protected: IndexSearcherPtr searcher; public: DocumentPtr doc(Collection fields) { DocumentPtr doc = newLucene(); for (int32_t i = 0; i < fields.size(); ++i) doc->add(fields[i]); return doc; } FieldPtr field(const String& name, const String& value) { return newLucene(name, value, Field::STORE_NO, Field::INDEX_ANALYZED); } void check(SpanQueryPtr q, Collection docs) { CheckHits::checkHitCollector(q, L"", searcher, docs); } String str(SpansPtr span) { return str(span->doc(), span->start(), span->end()); } String str(int32_t doc, int32_t start, int32_t end) { return L"s(" + StringUtils::toString(doc) + L"," + StringUtils::toString(start) + L"," + StringUtils::toString(end) + L")"; } }; BOOST_FIXTURE_TEST_SUITE(FieldMaskingSpanQueryTest, FieldMaskingSpanQueryFixture) BOOST_AUTO_TEST_CASE(testRewrite0) { SpanQueryPtr q = newLucene(newLucene(newLucene(L"last", L"sally")), L"first"); q->setBoost(8.7654321); SpanQueryPtr qr = boost::dynamic_pointer_cast(searcher->rewrite(q)); QueryUtils::checkEqual(q, qr); SetTerm terms = SetTerm::newInstance(); qr->extractTerms(terms); BOOST_CHECK_EQUAL(1, terms.size()); } namespace TestRewrite { class TestableFieldMaskingSpanQuery : public FieldMaskingSpanQuery { public: TestableFieldMaskingSpanQuery(SpanQueryPtr maskedQuery, const String& maskedField) : FieldMaskingSpanQuery(maskedQuery, maskedField) { } virtual ~TestableFieldMaskingSpanQuery() { } public: virtual QueryPtr rewrite(IndexReaderPtr reader) { return newLucene(newCollection( newLucene(newLucene(L"first", L"sally")), newLucene(newLucene(L"first", L"james"))) ); } }; } BOOST_AUTO_TEST_CASE(testRewrite1) { // mask an anon SpanQuery class that rewrites to something else. SpanQueryPtr q = newLucene(newLucene(newLucene(L"last", L"sally")), L"first"); SpanQueryPtr qr = boost::dynamic_pointer_cast(searcher->rewrite(q)); QueryUtils::checkUnequal(q, qr); SetTerm terms = SetTerm::newInstance(); qr->extractTerms(terms); BOOST_CHECK_EQUAL(2, terms.size()); } BOOST_AUTO_TEST_CASE(testRewrite2) { SpanQueryPtr q1 = newLucene(newLucene(L"last", L"smith")); SpanQueryPtr q2 = newLucene(newLucene(L"last", L"jones")); SpanQueryPtr q = newLucene(newCollection(q1, newLucene(q2, L"last")), 1, true); QueryPtr qr = searcher->rewrite(q); QueryUtils::checkEqual(q, qr); SetTerm terms = SetTerm::newInstance(); qr->extractTerms(terms); BOOST_CHECK_EQUAL(2, terms.size()); } BOOST_AUTO_TEST_CASE(testEquality1) { SpanQueryPtr q1 = newLucene(newLucene(newLucene(L"last", L"sally")), L"first"); SpanQueryPtr q2 = newLucene(newLucene(newLucene(L"last", L"sally")), L"first"); SpanQueryPtr q3 = newLucene(newLucene(newLucene(L"last", L"sally")), L"XXXXX"); SpanQueryPtr q4 = newLucene(newLucene(newLucene(L"last", L"XXXXX")), L"first"); SpanQueryPtr q5 = newLucene(newLucene(newLucene(L"xXXX", L"sally")), L"first"); QueryUtils::checkEqual(q1, q2); QueryUtils::checkUnequal(q1, q3); QueryUtils::checkUnequal(q1, q4); QueryUtils::checkUnequal(q1, q5); SpanQueryPtr qA = newLucene(newLucene(newLucene(L"last", L"sally")), L"first"); qA->setBoost(9.0); SpanQueryPtr qB = newLucene(newLucene(newLucene(L"last", L"sally")), L"first"); QueryUtils::checkUnequal(qA, qB); qB->setBoost(9.0); QueryUtils::checkEqual(qA, qB); } BOOST_AUTO_TEST_CASE(testNoop0) { SpanQueryPtr q1 = newLucene(newLucene(L"last", L"sally")); SpanQueryPtr q = newLucene(q1, L"first"); check(q, Collection::newInstance()); } BOOST_AUTO_TEST_CASE(testNoop1) { SpanQueryPtr q1 = newLucene(newLucene(L"last", L"smith")); SpanQueryPtr q2 = newLucene(newLucene(L"last", L"jones")); SpanQueryPtr q = newLucene(newCollection(q1, newLucene(q2, L"last")), 0, true); check(q, newCollection(1, 2)); q = newLucene(newCollection(newLucene(q1, L"last"), newLucene(q2, L"last")), 0, true); check(q, newCollection(1, 2)); } BOOST_AUTO_TEST_CASE(testSimple1) { SpanQueryPtr q1 = newLucene(newLucene(L"first", L"james")); SpanQueryPtr q2 = newLucene(newLucene(L"last", L"jones")); SpanQueryPtr q = newLucene(newCollection(q1, newLucene(q2, L"first")), -1, false); check(q, newCollection(0, 2)); q = newLucene(newCollection(newLucene(q2, L"first"), q1), -1, false); check(q, newCollection(0, 2)); q = newLucene(newCollection(q2, newLucene(q1, L"last")), -1, false); check(q, newCollection(0, 2)); q = newLucene(newCollection(newLucene(q1, L"last"), q2), -1, false); check(q, newCollection(0, 2)); } BOOST_AUTO_TEST_CASE(testSimple2) { SpanQueryPtr q1 = newLucene(newLucene(L"gender", L"female")); SpanQueryPtr q2 = newLucene(newLucene(L"last", L"smith")); SpanQueryPtr q = newLucene(newCollection(q1, newLucene(q2, L"gender")), -1, false); check(q, newCollection(2, 4)); q = newLucene(newCollection(newLucene(q1, L"id"), newLucene(q2, L"id")), -1, false); check(q, newCollection(2, 4)); } BOOST_AUTO_TEST_CASE(testSpans0) { SpanQueryPtr q1 = newLucene(newLucene(L"gender", L"female")); SpanQueryPtr q2 = newLucene(newLucene(L"first", L"james")); SpanQueryPtr q = newLucene(newCollection(q1, newLucene(q2, L"gender"))); check(q, newCollection(0, 1, 2, 3, 4)); SpansPtr span = q->getSpans(searcher->getIndexReader()); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(0, 0, 1), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(1, 0, 1), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(1, 1, 2), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(2, 0, 1), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(2, 1, 2), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(2, 2, 3), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(3, 0, 1), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(4, 0, 1), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(4, 1, 2), str(span)); BOOST_CHECK(!span->next()); } BOOST_AUTO_TEST_CASE(testSpans1) { SpanQueryPtr q1 = newLucene(newLucene(L"first", L"sally")); SpanQueryPtr q2 = newLucene(newLucene(L"first", L"james")); SpanQueryPtr qA = newLucene(newCollection(q1, q2)); SpanQueryPtr qB = newLucene(qA, L"id"); check(qA, newCollection(0, 1, 2, 4)); check(qB, newCollection(0, 1, 2, 4)); SpansPtr spanA = qA->getSpans(searcher->getIndexReader()); SpansPtr spanB = qB->getSpans(searcher->getIndexReader()); while (spanA->next()) { BOOST_CHECK(spanB->next()); BOOST_CHECK_EQUAL(str(spanA), str(spanB)); } BOOST_CHECK(!(spanB->next())); } BOOST_AUTO_TEST_CASE(testSpans2) { SpanQueryPtr qA1 = newLucene(newLucene(L"gender", L"female")); SpanQueryPtr qA2 = newLucene(newLucene(L"first", L"james")); SpanQueryPtr qA = newLucene(newCollection(qA1, newLucene(qA2, L"gender"))); SpanQueryPtr qB = newLucene(newLucene(L"last", L"jones")); SpanQueryPtr q = newLucene(newCollection(newLucene(qA, L"id"), newLucene(qB, L"id")), -1, false); check(q, newCollection(0, 1, 2, 3)); SpansPtr span = q->getSpans(searcher->getIndexReader()); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(0, 0, 1), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(1, 1, 2), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(2, 0, 1), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(2, 2, 3), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(3, 0, 1), str(span)); BOOST_CHECK(!span->next()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/spans/NearSpansOrderedTest.cpp000066400000000000000000000130511217574114600262150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "QueryParser.h" #include "IndexSearcher.h" #include "WhitespaceAnalyzer.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "SpanNearQuery.h" #include "CheckHits.h" #include "SpanTermQuery.h" #include "Term.h" #include "Spans.h" #include "Weight.h" #include "Scorer.h" #include "Explanation.h" using namespace Lucene; class NearSpansOrderedFixture : public LuceneTestFixture { public: NearSpansOrderedFixture() { qp = newLucene(LuceneVersion::LUCENE_CURRENT, FIELD, newLucene()); docFields = newCollection(L"w1 w2 w3 w4 w5", L"w1 w3 w2 w3 zz", L"w1 xx w2 yy w3", L"w1 w3 xx w2 yy w3 zz"); RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer= newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < docFields.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(FIELD, docFields[i], Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); searcher = newLucene(directory, true); } virtual ~NearSpansOrderedFixture() { searcher->close(); } protected: IndexSearcherPtr searcher; QueryParserPtr qp; Collection docFields; public: static const String FIELD; public: SpanNearQueryPtr makeQuery(const String& s1, const String& s2, const String& s3, int32_t slop, bool inOrder) { return newLucene(newCollection( newLucene(newLucene(FIELD, s1)), newLucene(newLucene(FIELD, s2)), newLucene(newLucene(FIELD, s3)) ), slop, inOrder); } SpanNearQueryPtr makeQuery() { return makeQuery(L"w1", L"w2", L"w3", 1, true); } String str(SpansPtr span) { return str(span->doc(), span->start(), span->end()); } String str(int32_t doc, int32_t start, int32_t end) { return L"s(" + StringUtils::toString(doc) + L"," + StringUtils::toString(start) + L"," + StringUtils::toString(end) + L")"; } }; const String NearSpansOrderedFixture::FIELD = L"field"; BOOST_FIXTURE_TEST_SUITE(NearSpansOrderedTest, NearSpansOrderedFixture) BOOST_AUTO_TEST_CASE(testSpanNearQuery) { SpanNearQueryPtr q = makeQuery(); CheckHits::checkHits(q, FIELD, searcher, newCollection(0, 1)); } BOOST_AUTO_TEST_CASE(testNearSpansNext) { SpanNearQueryPtr q = makeQuery(); SpansPtr span = q->getSpans(searcher->getIndexReader()); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(0, 0, 3), str(span)); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(1, 0, 4), str(span)); BOOST_CHECK(!span->next()); } /// test does not imply that skipTo(doc+1) should work exactly the same as next -- it's only applicable in this case /// since we know doc does not contain more than one span BOOST_AUTO_TEST_CASE(testNearSpansSkipToLikeNext) { SpanNearQueryPtr q = makeQuery(); SpansPtr span = q->getSpans(searcher->getIndexReader()); BOOST_CHECK(span->skipTo(0)); BOOST_CHECK_EQUAL(str(0, 0, 3), str(span)); BOOST_CHECK(span->skipTo(1)); BOOST_CHECK_EQUAL(str(1, 0, 4), str(span)); BOOST_CHECK(!span->skipTo(2)); } BOOST_AUTO_TEST_CASE(testNearSpansNextThenSkipTo) { SpanNearQueryPtr q = makeQuery(); SpansPtr span = q->getSpans(searcher->getIndexReader()); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(0, 0, 3), str(span)); BOOST_CHECK(span->skipTo(1)); BOOST_CHECK_EQUAL(str(1, 0, 4), str(span)); BOOST_CHECK(!span->next()); } BOOST_AUTO_TEST_CASE(testNearSpansNextThenSkipPast) { SpanNearQueryPtr q = makeQuery(); SpansPtr span = q->getSpans(searcher->getIndexReader()); BOOST_CHECK(span->next()); BOOST_CHECK_EQUAL(str(0, 0, 3), str(span)); BOOST_CHECK(!span->skipTo(2)); } BOOST_AUTO_TEST_CASE(testNearSpansSkipPast) { SpanNearQueryPtr q = makeQuery(); SpansPtr span = q->getSpans(searcher->getIndexReader()); BOOST_CHECK(!span->skipTo(2)); } BOOST_AUTO_TEST_CASE(testNearSpansSkipTo0) { SpanNearQueryPtr q = makeQuery(); SpansPtr span = q->getSpans(searcher->getIndexReader()); BOOST_CHECK(span->skipTo(0)); BOOST_CHECK_EQUAL(str(0, 0, 3), str(span)); } BOOST_AUTO_TEST_CASE(testNearSpansSkipTo1) { SpanNearQueryPtr q = makeQuery(); SpansPtr span = q->getSpans(searcher->getIndexReader()); BOOST_CHECK(span->skipTo(1)); BOOST_CHECK_EQUAL(str(1, 0, 4), str(span)); } BOOST_AUTO_TEST_CASE(testSpanNearScorerSkipTo1) { SpanNearQueryPtr q = makeQuery(); WeightPtr w = q->weight(searcher); ScorerPtr s = w->scorer(searcher->getIndexReader(), true, false); BOOST_CHECK_EQUAL(1, s->advance(1)); } BOOST_AUTO_TEST_CASE(testSpanNearScorerExplain) { SpanNearQueryPtr q = makeQuery(); ExplanationPtr e = q->weight(searcher)->explain(searcher->getIndexReader(), 1); BOOST_CHECK(0.0 < e->getValue()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/spans/PayloadSpansTest.cpp000066400000000000000000000467671217574114600254400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "IndexSearcher.h" #include "IndexReader.h" #include "DefaultSimilarity.h" #include "PayloadHelper.h" #include "SpanTermQuery.h" #include "SpanFirstQuery.h" #include "Term.h" #include "Spans.h" #include "SpanNearQuery.h" #include "SpanNotQuery.h" #include "RAMDirectory.h" #include "PayloadAttribute.h" #include "TokenFilter.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "Payload.h" #include "LowerCaseTokenizer.h" #include "Analyzer.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "StringReader.h" #include "TopDocs.h" #include "PayloadSpanUtil.h" #include "TermQuery.h" using namespace Lucene; DECLARE_SHARED_PTR(PayloadSpansAnalyzer) class PayloadSpansFilter : public TokenFilter { public: PayloadSpansFilter(TokenStreamPtr input, const String& fieldName) : TokenFilter(input) { this->fieldName = fieldName; this->pos = 0; this->entities = HashSet::newInstance(); this->entities.add(L"xx"); this->entities.add(L"one"); this->nopayload = HashSet::newInstance(); this->nopayload.add(L"nopayload"); this->nopayload.add(L"np"); this->termAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->payloadAtt = addAttribute(); } virtual ~PayloadSpansFilter() { } LUCENE_CLASS(PayloadSpansFilter); public: String fieldName; HashSet entities; HashSet nopayload; int32_t pos; PayloadAttributePtr payloadAtt; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; public: virtual bool incrementToken() { if (input->incrementToken()) { String token(termAtt->termBuffer().get(), termAtt->termLength()); if (!nopayload.contains(token)) { StringStream buf; buf << token; if (entities.contains(token)) buf << L":Entity:"; else buf << L":Noise:"; buf << pos; ByteArray data = ByteArray::newInstance(buf.str().length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)data.get(), buf.str().c_str(), buf.str().length()); payloadAtt->setPayload(newLucene(data)); } pos += posIncrAtt->getPositionIncrement(); return true; } else return false; } }; class PayloadSpansAnalyzer : public Analyzer { public: virtual ~PayloadSpansAnalyzer() { } LUCENE_CLASS(PayloadSpansAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result, fieldName); return result; } }; class PayloadSpansFixture : public LuceneTestFixture { public: PayloadSpansFixture() { similarity = newLucene(); searcher = PayloadHelper::setUp(similarity, 1000); indexReader = searcher->getIndexReader(); } virtual ~PayloadSpansFixture() { } protected: IndexSearcherPtr searcher; SimilarityPtr similarity; IndexReaderPtr indexReader; public: void checkSpans(SpansPtr spans, int32_t expectedNumSpans, int32_t expectedNumPayloads, int32_t expectedPayloadLength, int32_t expectedFirstByte) { BOOST_CHECK(spans); int32_t seen = 0; while (spans->next()) { // if we expect payloads, then isPayloadAvailable should be true if (expectedNumPayloads > 0) BOOST_CHECK(spans->isPayloadAvailable()); else BOOST_CHECK(!spans->isPayloadAvailable()); // See payload helper, for the PayloadHelper::FIELD field, there is a single byte payload at every token if (spans->isPayloadAvailable()) { Collection payload = spans->getPayload(); BOOST_CHECK_EQUAL(payload.size(), expectedNumPayloads); for (Collection::iterator thePayload = payload.begin(); thePayload != payload.end(); ++thePayload) { BOOST_CHECK_EQUAL(thePayload->size(), expectedPayloadLength); BOOST_CHECK_EQUAL((*thePayload)[0], expectedFirstByte); } } ++seen; } BOOST_CHECK_EQUAL(seen, expectedNumSpans); } void checkSpans(SpansPtr spans, int32_t numSpans, Collection numPayloads) { int32_t cnt = 0; while (spans->next()) { if (spans->isPayloadAvailable()) { Collection payload = spans->getPayload(); BOOST_CHECK_EQUAL(numPayloads[cnt], payload.size()); } else BOOST_CHECK(numPayloads.size() <= 0 || numPayloads[cnt] <= 0); } ++cnt; } IndexSearcherPtr getSpanNotSearcher() { RAMDirectoryPtr directory = newLucene(); PayloadSpansAnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setSimilarity(similarity); DocumentPtr doc = newLucene(); doc->add(newLucene(PayloadHelper::FIELD, L"one two three one four three", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexSearcherPtr searcher = newLucene(directory, true); searcher->setSimilarity(similarity); return searcher; } IndexSearcherPtr getSearcher() { RAMDirectoryPtr directory = newLucene(); PayloadSpansAnalyzerPtr analyzer = newLucene(); Collection docs = newCollection( L"xx rr yy mm pp", L"xx yy mm rr pp", L"nopayload qq ss pp np", L"one two three four five six seven eight nine ten eleven", L"nine one two three four five six seven eight eleven ten" ); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setSimilarity(similarity); for (int32_t i = 0; i < docs.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(PayloadHelper::FIELD, docs[i], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); IndexSearcherPtr searcher = newLucene(directory, true); return searcher; } }; BOOST_FIXTURE_TEST_SUITE(PayloadSpansTest, PayloadSpansFixture) BOOST_AUTO_TEST_CASE(testSpanTermQuery) { SpanTermQueryPtr stq = newLucene(newLucene(PayloadHelper::FIELD, L"seventy")); SpansPtr spans = stq->getSpans(indexReader); BOOST_CHECK(spans); checkSpans(spans, 100, 1, 1, 1); stq = newLucene(newLucene(PayloadHelper::NO_PAYLOAD_FIELD, L"seventy")); spans = stq->getSpans(indexReader); BOOST_CHECK(spans); checkSpans(spans, 100, 0, 0, 0); } BOOST_AUTO_TEST_CASE(testSpanFirst) { SpanQueryPtr match = newLucene(newLucene(PayloadHelper::FIELD, L"one")); SpanFirstQueryPtr sfq = newLucene(match, 2); SpansPtr spans = sfq->getSpans(indexReader); checkSpans(spans, 109, 1, 1, 1); // Test more complicated subclause Collection clauses = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"one")), newLucene(newLucene(PayloadHelper::FIELD, L"hundred")) ); match = newLucene(clauses, 0, true); sfq = newLucene(match, 2); checkSpans(sfq->getSpans(indexReader), 100, 2, 1, 1); match = newLucene(clauses, 0, false); sfq = newLucene(match, 2); checkSpans(sfq->getSpans(indexReader), 100, 2, 1, 1); } BOOST_AUTO_TEST_CASE(testSpanNot) { Collection clauses = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"one")), newLucene(newLucene(PayloadHelper::FIELD, L"three")) ); SpanQueryPtr spq = newLucene(clauses, 5, true); SpanNotQueryPtr snq = newLucene(spq, newLucene(newLucene(PayloadHelper::FIELD, L"two"))); checkSpans(snq->getSpans(getSpanNotSearcher()->getIndexReader()), 1, newCollection(2)); } BOOST_AUTO_TEST_CASE(testNestedSpans) { IndexSearcherPtr searcher = getSearcher(); SpanTermQueryPtr stq = newLucene(newLucene(PayloadHelper::FIELD, L"mark")); SpansPtr spans = stq->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans); checkSpans(spans, 0, Collection()); Collection clauses = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"rr")), newLucene(newLucene(PayloadHelper::FIELD, L"yy")), newLucene(newLucene(PayloadHelper::FIELD, L"xx")) ); SpanNearQueryPtr spanNearQuery = newLucene(clauses, 12, false); spans = spanNearQuery->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans); checkSpans(spans, 2, newCollection(3, 3)); clauses[0] = newLucene(newLucene(PayloadHelper::FIELD, L"xx")); clauses[1] = newLucene(newLucene(PayloadHelper::FIELD, L"rr")); clauses[2] = newLucene(newLucene(PayloadHelper::FIELD, L"yy")); spanNearQuery = newLucene(clauses, 6, true); spans = spanNearQuery->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans); checkSpans(spans, 1, newCollection(3)); clauses = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"xx")), newLucene(newLucene(PayloadHelper::FIELD, L"rr")) ); spanNearQuery = newLucene(clauses, 6, true); // xx within 6 of rr Collection clauses2 = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"yy")), spanNearQuery ); SpanNearQueryPtr nestedSpanNearQuery = newLucene(clauses2, 6, false); // yy within 6 of xx within 6 of rr spans = nestedSpanNearQuery->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans); checkSpans(spans, 2, newCollection(3, 3)); } BOOST_AUTO_TEST_CASE(testFirstClauseWithoutPayload) { IndexSearcherPtr searcher = getSearcher(); Collection clauses = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"nopayload")), newLucene(newLucene(PayloadHelper::FIELD, L"qq")), newLucene(newLucene(PayloadHelper::FIELD, L"ss")) ); SpanNearQueryPtr spanNearQuery = newLucene(clauses, 6, true); Collection clauses2 = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"pp")), spanNearQuery ); SpanNearQueryPtr snq = newLucene(clauses2, 6, false); Collection clauses3 = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"np")), snq ); SpanNearQueryPtr nestedSpanNearQuery = newLucene(clauses3, 6, false); SpansPtr spans = nestedSpanNearQuery->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans); checkSpans(spans, 1, newCollection(3)); } BOOST_AUTO_TEST_CASE(testHeavilyNestedSpanQuery) { IndexSearcherPtr searcher = getSearcher(); Collection clauses = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"one")), newLucene(newLucene(PayloadHelper::FIELD, L"two")), newLucene(newLucene(PayloadHelper::FIELD, L"three")) ); SpanNearQueryPtr spanNearQuery = newLucene(clauses, 5, true); clauses[0] = spanNearQuery; clauses[1] = newLucene(newLucene(PayloadHelper::FIELD, L"five")); clauses[2] = newLucene(newLucene(PayloadHelper::FIELD, L"six")); SpanNearQueryPtr spanNearQuery2 = newLucene(clauses, 6, true); Collection clauses2 = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"eleven")), newLucene(newLucene(PayloadHelper::FIELD, L"ten")) ); SpanNearQueryPtr spanNearQuery3 = newLucene(clauses2, 2, false); Collection clauses3 = newCollection( newLucene(newLucene(PayloadHelper::FIELD, L"nine")), spanNearQuery2, spanNearQuery3 ); SpanNearQueryPtr nestedSpanNearQuery = newLucene(clauses3, 6, false); SpansPtr spans = nestedSpanNearQuery->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans); checkSpans(spans, 2, newCollection(8, 8)); } BOOST_AUTO_TEST_CASE(testShrinkToAfterShortestMatch) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", newLucene(L"a b c d e f g h i j a k"))); writer->addDocument(doc); writer->close(); IndexSearcherPtr is = newLucene(directory, true); SpanTermQueryPtr stq1 = newLucene(newLucene(L"content", L"a")); SpanTermQueryPtr stq2 = newLucene(newLucene(L"content", L"k")); Collection sqs = newCollection(stq1, stq2); SpanNearQueryPtr snq = newLucene(sqs, 1, true); SpansPtr spans = snq->getSpans(is->getIndexReader()); TopDocsPtr topDocs = is->search(snq, 1); HashSet payloadSet = HashSet::newInstance(); for (int32_t i = 0; i < topDocs->scoreDocs.size(); ++i) { while (spans->next()) { Collection payloads = spans->getPayload(); for (Collection::iterator it = payloads.begin(); it != payloads.end(); ++it) payloadSet.add(String((wchar_t*)it->get(), it->size() / sizeof(wchar_t))); } } BOOST_CHECK_EQUAL(2, payloadSet.size()); BOOST_CHECK(payloadSet.contains(L"a:Noise:10")); BOOST_CHECK(payloadSet.contains(L"k:Noise:11")); } BOOST_AUTO_TEST_CASE(testShrinkToAfterShortestMatch2) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", newLucene(L"a b a d k f a h i k a k"))); writer->addDocument(doc); writer->close(); IndexSearcherPtr is = newLucene(directory, true); SpanTermQueryPtr stq1 = newLucene(newLucene(L"content", L"a")); SpanTermQueryPtr stq2 = newLucene(newLucene(L"content", L"k")); Collection sqs = newCollection(stq1, stq2); SpanNearQueryPtr snq = newLucene(sqs, 0, true); SpansPtr spans = snq->getSpans(is->getIndexReader()); TopDocsPtr topDocs = is->search(snq, 1); HashSet payloadSet = HashSet::newInstance(); for (int32_t i = 0; i < topDocs->scoreDocs.size(); ++i) { while (spans->next()) { Collection payloads = spans->getPayload(); for (Collection::iterator it = payloads.begin(); it != payloads.end(); ++it) payloadSet.add(String((wchar_t*)it->get(), it->size() / sizeof(wchar_t))); } } BOOST_CHECK_EQUAL(2, payloadSet.size()); BOOST_CHECK(payloadSet.contains(L"a:Noise:10")); BOOST_CHECK(payloadSet.contains(L"k:Noise:11")); } BOOST_AUTO_TEST_CASE(testShrinkToAfterShortestMatch3) { RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", newLucene(L"j k a l f k k p a t a k l k t a"))); writer->addDocument(doc); writer->close(); IndexSearcherPtr is = newLucene(directory, true); SpanTermQueryPtr stq1 = newLucene(newLucene(L"content", L"a")); SpanTermQueryPtr stq2 = newLucene(newLucene(L"content", L"k")); Collection sqs = newCollection(stq1, stq2); SpanNearQueryPtr snq = newLucene(sqs, 0, true); SpansPtr spans = snq->getSpans(is->getIndexReader()); TopDocsPtr topDocs = is->search(snq, 1); HashSet payloadSet = HashSet::newInstance(); for (int32_t i = 0; i < topDocs->scoreDocs.size(); ++i) { while (spans->next()) { Collection payloads = spans->getPayload(); for (Collection::iterator it = payloads.begin(); it != payloads.end(); ++it) payloadSet.add(String((wchar_t*)it->get(), it->size() / sizeof(wchar_t))); } } BOOST_CHECK_EQUAL(2, payloadSet.size()); BOOST_CHECK(payloadSet.contains(L"a:Noise:10")); BOOST_CHECK(payloadSet.contains(L"k:Noise:11")); } BOOST_AUTO_TEST_CASE(testPayloadSpanUtil) { RAMDirectoryPtr directory = newLucene(); PayloadSpansAnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer = newLucene(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setSimilarity(similarity); DocumentPtr doc = newLucene(); doc->add(newLucene(PayloadHelper::FIELD, L"xx rr yy mm pp", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexSearcherPtr searcher = newLucene(directory, true); IndexReaderPtr reader = searcher->getIndexReader(); PayloadSpanUtilPtr psu = newLucene(reader); Collection payloads = psu->getPayloadsForQuery(newLucene(newLucene(PayloadHelper::FIELD, L"rr"))); BOOST_CHECK_EQUAL(1, payloads.size()); BOOST_CHECK_EQUAL(String((wchar_t*)(payloads[0].get()), payloads[0].size() / sizeof(wchar_t)), L"rr:Noise:1"); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/spans/SpanExplanationsTest.cpp000066400000000000000000000113521217574114600263070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "ExplanationsFixture.h" #include "SpanQuery.h" #include "SpanTermQuery.h" #include "SpanFirstQuery.h" #include "SpanOrQuery.h" #include "SpanNearQuery.h" #include "SpanNotQuery.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SpanExplanationsTest, ExplanationsFixture) BOOST_AUTO_TEST_CASE(testST1) { SpanQueryPtr q = st(L"w1"); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testST2) { SpanQueryPtr q = st(L"w1"); q->setBoost(1000); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testST4) { SpanQueryPtr q = st(L"xx"); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testST5) { SpanQueryPtr q = st(L"xx"); q->setBoost(1000); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testSF1) { SpanQueryPtr q = sf(L"w1", 1); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSF2) { SpanQueryPtr q = sf(L"w1", 1); q->setBoost(1000); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSF4) { SpanQueryPtr q = sf(L"xx", 2); qtest(q, newCollection(2)); } BOOST_AUTO_TEST_CASE(testSF5) { SpanQueryPtr q = sf(L"yy", 2); qtest(q, Collection::newInstance()); } BOOST_AUTO_TEST_CASE(testSF6) { SpanQueryPtr q = sf(L"yy", 4); q->setBoost(1000); qtest(q, newCollection(2)); } BOOST_AUTO_TEST_CASE(testSO1) { SpanQueryPtr q = sor(L"w1", L"QQ"); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSO2) { SpanQueryPtr q = sor(L"w1", L"w3", L"zz"); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSO3) { SpanQueryPtr q = sor(L"w5", L"QQ", L"yy"); qtest(q, newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testSO4) { SpanQueryPtr q = sor(L"w5", L"QQ", L"yy"); qtest(q, newCollection(0, 2, 3)); } BOOST_AUTO_TEST_CASE(testSNear1) { SpanQueryPtr q = snear(L"w1", L"QQ", 100, true); qtest(q, Collection::newInstance()); } BOOST_AUTO_TEST_CASE(testSNear2) { SpanQueryPtr q = snear(L"w1", L"xx", 100, true); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testSNear3) { SpanQueryPtr q = snear(L"w1", L"xx", 0, true); qtest(q, newCollection(2)); } BOOST_AUTO_TEST_CASE(testSNear4) { SpanQueryPtr q = snear(L"w1", L"xx", 1, true); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testSNear5) { SpanQueryPtr q = snear(L"xx", L"w1", 0, false); qtest(q, newCollection(2)); } BOOST_AUTO_TEST_CASE(testSNear6) { SpanQueryPtr q = snear(L"w1", L"w2", L"QQ", 100, true); qtest(q, Collection::newInstance()); } BOOST_AUTO_TEST_CASE(testSNear7) { SpanQueryPtr q = snear(L"w1", L"xx", L"w2", 100, true); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testSNear8) { SpanQueryPtr q = snear(L"w1", L"xx", L"w2", 0, true); qtest(q, newCollection(2)); } BOOST_AUTO_TEST_CASE(testSNear9) { SpanQueryPtr q = snear(L"w1", L"xx", L"w2", 1, true); qtest(q, newCollection(2, 3)); } BOOST_AUTO_TEST_CASE(testSNear10) { SpanQueryPtr q = snear(L"xx", L"w1", L"w2", 0, false); qtest(q, newCollection(2)); } BOOST_AUTO_TEST_CASE(testSNear11) { SpanQueryPtr q = snear(L"w1", L"w2", L"w3", 1, true); qtest(q, newCollection(0, 1)); } BOOST_AUTO_TEST_CASE(testSNot1) { SpanQueryPtr q = snot(sf(L"w1", 10), st(L"QQ")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSNot2) { SpanQueryPtr q = snot(sf(L"w1", 10), st(L"QQ")); q->setBoost(1000); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSNot4) { SpanQueryPtr q = snot(sf(L"w1", 10), st(L"xx")); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSNot5) { SpanQueryPtr q = snot(sf(L"w1", 10), st(L"xx")); q->setBoost(1000); qtest(q, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSNot7) { SpanQueryPtr f = snear(L"w1", L"w3", 10, true); f->setBoost(1000); SpanQueryPtr q = snot(f, st(L"xx")); qtest(q, newCollection(0, 1, 3)); } BOOST_AUTO_TEST_CASE(testSNot10) { SpanQueryPtr t = st(L"xx"); t->setBoost(10000); SpanQueryPtr q = snot(snear(L"w1", L"w3", 10, true), t); qtest(q, newCollection(0, 1, 3)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/spans/SpansAdvanced2Test.cpp000066400000000000000000000132261217574114600256160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Explanation.h" #include "IndexSearcher.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "SpanTermQuery.h" #include "Term.h" #include "BooleanQuery.h" #include "QueryUtils.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "IndexReader.h" using namespace Lucene; class SpansAdvanced2Fixture : public LuceneTestFixture { public: SpansAdvanced2Fixture() { // create test index directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); addDocument(writer, L"1", L"I think it should work."); addDocument(writer, L"2", L"I think it should work."); addDocument(writer, L"3", L"I think it should work."); addDocument(writer, L"4", L"I think it should work."); addDocument(writer, L"A", L"Should we, could we, would we?"); addDocument(writer, L"B", L"It should. Should it?"); addDocument(writer, L"C", L"It shouldn't."); addDocument(writer, L"D", L"Should we, should we, should we."); writer->close(); searcher = newLucene(directory, true); // re-open the searcher since we added more docs searcher2 = newLucene(directory, true); } virtual ~SpansAdvanced2Fixture() { searcher->close(); searcher2->close(); directory->close(); } public: static const String FIELD_ID; static const String FIELD_TEXT; protected: DirectoryPtr directory; IndexSearcherPtr searcher; IndexSearcherPtr searcher2; void addDocument(IndexWriterPtr writer, const String& id, const String& text) { DocumentPtr document = newLucene(); document->add(newLucene(FIELD_ID, id, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); document->add(newLucene(FIELD_TEXT, text, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(document); } void checkHits(SearcherPtr s, QueryPtr query, const String& description, Collection expectedIds, Collection expectedScores) { QueryUtils::check(query, s); double tolerance = 1e-5f; // hits normalizes and throws things off if one score is greater than 1.0 TopDocsPtr topdocs = s->search(query, FilterPtr(), 10000); // did we get the hits we expected BOOST_CHECK_EQUAL(expectedIds.size(), topdocs->totalHits); for (int32_t i = 0; i < topdocs->totalHits; ++i) { int32_t id = topdocs->scoreDocs[i]->doc; double score = topdocs->scoreDocs[i]->score; DocumentPtr doc = s->doc(id); BOOST_CHECK_EQUAL(expectedIds[i], doc->get(FIELD_ID)); bool scoreEq = (std::abs(expectedScores[i] - score) < tolerance); if (scoreEq) { BOOST_CHECK_CLOSE_FRACTION(expectedScores[i], score, tolerance); BOOST_CHECK_CLOSE_FRACTION(s->explain(query, id)->getValue(), score, tolerance); } } } }; const String SpansAdvanced2Fixture::FIELD_ID = L"ID"; const String SpansAdvanced2Fixture::FIELD_TEXT = L"TEXT"; BOOST_FIXTURE_TEST_SUITE(SpansAdvanced2Test, SpansAdvanced2Fixture) BOOST_AUTO_TEST_CASE(testVerifyIndex) { IndexReaderPtr reader = IndexReader::open(directory, true); BOOST_CHECK_EQUAL(8, reader->numDocs()); reader->close(); } BOOST_AUTO_TEST_CASE(testSingleSpanQuery) { QueryPtr spanQuery = newLucene(newLucene(FIELD_TEXT, L"should")); Collection expectedIds = newCollection(L"B", L"D", L"1", L"2", L"3", L"4", L"A"); Collection expectedScores = newCollection(0.625, 0.45927936, 0.35355338, 0.35355338, 0.35355338, 0.35355338, 0.26516503); checkHits(searcher2, spanQuery, L"single span query", expectedIds, expectedScores); } BOOST_AUTO_TEST_CASE(testMultipleDifferentSpanQueries) { QueryPtr spanQuery1 = newLucene(newLucene(FIELD_TEXT, L"should")); QueryPtr spanQuery2 = newLucene(newLucene(FIELD_TEXT, L"we")); BooleanQueryPtr query = newLucene(); query->add(spanQuery1, BooleanClause::MUST); query->add(spanQuery2, BooleanClause::MUST); Collection expectedIds = newCollection(L"D", L"A"); Collection expectedScores = newCollection(1.0191123, 0.93163157); checkHits(searcher2, query, L"multiple different span queries", expectedIds, expectedScores); } BOOST_AUTO_TEST_CASE(testBooleanQueryWithSpanQueries) { double expectedScore = 0.73500174; QueryPtr spanQuery = newLucene(newLucene(FIELD_TEXT, L"work")); BooleanQueryPtr query = newLucene(); query->add(spanQuery, BooleanClause::MUST); query->add(spanQuery, BooleanClause::MUST); Collection expectedIds = newCollection(L"1", L"2", L"3", L"4"); Collection expectedScores = newCollection(expectedScore, expectedScore, expectedScore, expectedScore); checkHits(searcher2, query, L"two span queries", expectedIds, expectedScores); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/spans/SpansAdvancedTest.cpp000066400000000000000000000076521217574114600255420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Explanation.h" #include "IndexSearcher.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "SpanTermQuery.h" #include "Term.h" #include "BooleanQuery.h" #include "QueryUtils.h" #include "TopDocs.h" #include "ScoreDoc.h" using namespace Lucene; class SpansAdvancedFixture : public LuceneTestFixture { public: SpansAdvancedFixture() { // create test index directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); addDocument(writer, L"1", L"I think it should work."); addDocument(writer, L"2", L"I think it should work."); addDocument(writer, L"3", L"I think it should work."); addDocument(writer, L"4", L"I think it should work."); writer->close(); searcher = newLucene(directory, true); } virtual ~SpansAdvancedFixture() { searcher->close(); directory->close(); } public: static const String FIELD_ID; static const String FIELD_TEXT; protected: DirectoryPtr directory; IndexSearcherPtr searcher; void addDocument(IndexWriterPtr writer, const String& id, const String& text) { DocumentPtr document = newLucene(); document->add(newLucene(FIELD_ID, id, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); document->add(newLucene(FIELD_TEXT, text, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(document); } void checkHits(SearcherPtr s, QueryPtr query, const String& description, Collection expectedIds, Collection expectedScores) { QueryUtils::check(query, s); double tolerance = 1e-5f; // hits normalizes and throws things off if one score is greater than 1.0 TopDocsPtr topdocs = s->search(query, FilterPtr(), 10000); // did we get the hits we expected BOOST_CHECK_EQUAL(expectedIds.size(), topdocs->totalHits); for (int32_t i = 0; i < topdocs->totalHits; ++i) { int32_t id = topdocs->scoreDocs[i]->doc; double score = topdocs->scoreDocs[i]->score; DocumentPtr doc = s->doc(id); BOOST_CHECK_EQUAL(expectedIds[i], doc->get(FIELD_ID)); bool scoreEq = (std::abs(expectedScores[i] - score) < tolerance); if (scoreEq) { BOOST_CHECK_CLOSE_FRACTION(expectedScores[i], score, tolerance); BOOST_CHECK_CLOSE_FRACTION(s->explain(query, id)->getValue(), score, tolerance); } } } }; const String SpansAdvancedFixture::FIELD_ID = L"ID"; const String SpansAdvancedFixture::FIELD_TEXT = L"TEXT"; BOOST_FIXTURE_TEST_SUITE(SpansAdvancedTest, SpansAdvancedFixture) BOOST_AUTO_TEST_CASE(testBooleanQueryWithSpanQueries) { double expectedScore = 0.3884282; QueryPtr spanQuery = newLucene(newLucene(FIELD_TEXT, L"work")); BooleanQueryPtr query = newLucene(); query->add(spanQuery, BooleanClause::MUST); query->add(spanQuery, BooleanClause::MUST); Collection expectedIds = newCollection(L"1", L"2", L"3", L"4"); Collection expectedScores = newCollection(expectedScore, expectedScore, expectedScore, expectedScore); checkHits(searcher, query, L"two span queries", expectedIds, expectedScores); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/search/spans/SpansTest.cpp000066400000000000000000000361051217574114600241070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "IndexSearcher.h" #include "WhitespaceAnalyzer.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "Document.h" #include "Field.h" #include "SpanTermQuery.h" #include "Term.h" #include "CheckHits.h" #include "SpanQuery.h" #include "SpanNearQuery.h" #include "Spans.h" #include "SpanOrQuery.h" #include "DefaultSimilarity.h" #include "Scorer.h" #include "DocIdSetIterator.h" #include "Weight.h" #include "StandardAnalyzer.h" #include "TermQuery.h" #include "TopDocs.h" #include "IndexReader.h" using namespace Lucene; class SpansFixture : public LuceneTestFixture { public: SpansFixture() { docFields = Collection::newInstance(); docFields.add(L"w1 w2 w3 w4 w5"); docFields.add(L"w1 w3 w2 w3"); docFields.add(L"w1 xx w2 yy w3"); docFields.add(L"w1 w3 xx w2 yy w3"); docFields.add(L"u2 u2 u1"); docFields.add(L"u2 xx u2 u1"); docFields.add(L"u2 u2 xx u1"); docFields.add(L"u2 xx u2 yy u1"); docFields.add(L"u2 xx u1 u2"); docFields.add(L"u2 u1 xx u2"); docFields.add(L"u1 u2 xx u2"); docFields.add(L"t1 t2 t1 t3 t2 t3"); RAMDirectoryPtr directory = newLucene(); IndexWriterPtr writer= newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < docFields.size(); ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(field, docFields[i], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); searcher = newLucene(directory, true); } virtual ~SpansFixture() { searcher->close(); } protected: IndexSearcherPtr searcher; Collection docFields; public: static const String field; public: SpanTermQueryPtr makeSpanTermQuery(const String& text) { return newLucene(newLucene(field, text)); } void checkHits(QueryPtr query, Collection results) { CheckHits::checkHits(query, field, searcher, results); } void orderedSlopTest3SQ(SpanQueryPtr q1, SpanQueryPtr q2, SpanQueryPtr q3, int32_t slop, Collection expectedDocs) { bool ordered = true; SpanNearQueryPtr snq = newLucene(newCollection(q1, q2, q3), slop, ordered); checkHits(snq, expectedDocs); } void orderedSlopTest3(int32_t slop, Collection expectedDocs) { orderedSlopTest3SQ(makeSpanTermQuery(L"w1"), makeSpanTermQuery(L"w2"), makeSpanTermQuery(L"w3"), slop, expectedDocs); } void orderedSlopTest3Equal(int32_t slop, Collection expectedDocs) { orderedSlopTest3SQ(makeSpanTermQuery(L"w1"), makeSpanTermQuery(L"w3"), makeSpanTermQuery(L"w3"), slop, expectedDocs); } void orderedSlopTest1Equal(int32_t slop, Collection expectedDocs) { orderedSlopTest3SQ(makeSpanTermQuery(L"u2"), makeSpanTermQuery(L"u2"), makeSpanTermQuery(L"u1"), slop, expectedDocs); } SpansPtr orSpans(Collection terms) { Collection sqa = Collection::newInstance(terms.size()); for (int32_t i = 0; i < terms.size(); ++i) sqa[i] = makeSpanTermQuery(terms[i]); return newLucene(sqa)->getSpans(searcher->getIndexReader()); } void checkNextSpans(SpansPtr spans, int32_t doc, int32_t start, int32_t end) { BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(doc, spans->doc()); BOOST_CHECK_EQUAL(start, spans->start()); BOOST_CHECK_EQUAL(end, spans->end()); } void addDoc(IndexWriterPtr writer, const String& id, const String& text) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", id, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"text", text, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } int32_t hitCount(SearcherPtr searcher, const String& word) { return searcher->search(newLucene(newLucene(L"text", word)), 10)->totalHits; } SpanQueryPtr createSpan(const String& value) { return newLucene(newLucene(L"text", value)); } SpanQueryPtr createSpan(int32_t slop, bool ordered, Collection clauses) { return newLucene(clauses, slop, ordered); } SpanQueryPtr createSpan(int32_t slop, bool ordered, const String& term1, const String& term2) { return createSpan(slop, ordered, newCollection(createSpan(term1), createSpan(term2))); } }; const String SpansFixture::field = L"field"; BOOST_FIXTURE_TEST_SUITE(SpansTest, SpansFixture) BOOST_AUTO_TEST_CASE(testSpanNearOrdered01) { orderedSlopTest3(0, newCollection(0)); } BOOST_AUTO_TEST_CASE(testSpanNearOrdered02) { orderedSlopTest3(1, newCollection(0, 1)); } BOOST_AUTO_TEST_CASE(testSpanNearOrdered03) { orderedSlopTest3(2, newCollection(0, 1, 2)); } BOOST_AUTO_TEST_CASE(testSpanNearOrdered04) { orderedSlopTest3(3, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSpanNearOrdered05) { orderedSlopTest3(4, newCollection(0, 1, 2, 3)); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedEqual01) { orderedSlopTest3Equal(0, Collection::newInstance()); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedEqual02) { orderedSlopTest3Equal(1, newCollection(1)); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedEqual03) { orderedSlopTest3Equal(2, newCollection(1)); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedEqual04) { orderedSlopTest3Equal(3, newCollection(1, 3)); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedEqual11) { orderedSlopTest1Equal(0, newCollection(4)); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedEqual12) { orderedSlopTest1Equal(0, newCollection(4)); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedEqual13) { orderedSlopTest1Equal(1, newCollection(4, 5, 6)); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedEqual14) { orderedSlopTest1Equal(2, newCollection(4, 5, 6, 7)); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedEqual15) { orderedSlopTest1Equal(3, newCollection(4, 5, 6, 7)); } BOOST_AUTO_TEST_CASE(testSpanNearOrderedOverlap) { bool ordered = true; int32_t slop = 1; SpanNearQueryPtr snq = newLucene(newCollection(makeSpanTermQuery(L"t1"), makeSpanTermQuery(L"t2"), makeSpanTermQuery(L"t3")), slop, ordered); SpansPtr spans = snq->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(11, spans->doc()); BOOST_CHECK_EQUAL(0, spans->start()); BOOST_CHECK_EQUAL(4, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(11, spans->doc()); BOOST_CHECK_EQUAL(2, spans->start()); BOOST_CHECK_EQUAL(6, spans->end()); BOOST_CHECK(!spans->next()); } BOOST_AUTO_TEST_CASE(testSpanNearUnOrdered) { SpanNearQueryPtr snq = newLucene(newCollection(makeSpanTermQuery(L"u1"), makeSpanTermQuery(L"u2")), 0, false); SpansPtr spans = snq->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(4, spans->doc()); BOOST_CHECK_EQUAL(1, spans->start()); BOOST_CHECK_EQUAL(3, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(5, spans->doc()); BOOST_CHECK_EQUAL(2, spans->start()); BOOST_CHECK_EQUAL(4, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(8, spans->doc()); BOOST_CHECK_EQUAL(2, spans->start()); BOOST_CHECK_EQUAL(4, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(9, spans->doc()); BOOST_CHECK_EQUAL(0, spans->start()); BOOST_CHECK_EQUAL(2, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(10, spans->doc()); BOOST_CHECK_EQUAL(0, spans->start()); BOOST_CHECK_EQUAL(2, spans->end()); BOOST_CHECK(!spans->next()); SpanNearQueryPtr u1u2 = newLucene(newCollection(makeSpanTermQuery(L"u1"), makeSpanTermQuery(L"u2")), 0, false); snq = newLucene(newCollection(u1u2, makeSpanTermQuery(L"u2")), 1, false); spans = snq->getSpans(searcher->getIndexReader()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(4, spans->doc()); BOOST_CHECK_EQUAL(0, spans->start()); BOOST_CHECK_EQUAL(3, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(4, spans->doc()); BOOST_CHECK_EQUAL(1, spans->start()); BOOST_CHECK_EQUAL(3, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(5, spans->doc()); BOOST_CHECK_EQUAL(0, spans->start()); BOOST_CHECK_EQUAL(4, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(5, spans->doc()); BOOST_CHECK_EQUAL(2, spans->start()); BOOST_CHECK_EQUAL(4, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(8, spans->doc()); BOOST_CHECK_EQUAL(0, spans->start()); BOOST_CHECK_EQUAL(4, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(8, spans->doc()); BOOST_CHECK_EQUAL(2, spans->start()); BOOST_CHECK_EQUAL(4, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(9, spans->doc()); BOOST_CHECK_EQUAL(0, spans->start()); BOOST_CHECK_EQUAL(2, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(9, spans->doc()); BOOST_CHECK_EQUAL(0, spans->start()); BOOST_CHECK_EQUAL(4, spans->end()); BOOST_CHECK(spans->next()); BOOST_CHECK_EQUAL(10, spans->doc()); BOOST_CHECK_EQUAL(0, spans->start()); BOOST_CHECK_EQUAL(2, spans->end()); BOOST_CHECK(!spans->next()); } BOOST_AUTO_TEST_CASE(testSpanOrEmpty) { SpansPtr spans = orSpans(Collection::newInstance()); BOOST_CHECK(!spans->next()); SpanOrQueryPtr a = newLucene(Collection::newInstance()); SpanOrQueryPtr b = newLucene(Collection::newInstance()); BOOST_CHECK(a->equals(b)); } BOOST_AUTO_TEST_CASE(testSpanOrSingle) { SpansPtr spans = orSpans(newCollection(L"w5")); checkNextSpans(spans, 0, 4, 5); BOOST_CHECK(!spans->next()); } BOOST_AUTO_TEST_CASE(testSpanOrMovesForward) { SpansPtr spans = orSpans(newCollection(L"w1", L"xx")); BOOST_CHECK(spans->next()); int32_t doc = spans->doc(); BOOST_CHECK_EQUAL(0, doc); BOOST_CHECK(spans->skipTo(0)); doc = spans->doc(); BOOST_CHECK_EQUAL(1, doc); } BOOST_AUTO_TEST_CASE(testSpanOrDouble) { SpansPtr spans = orSpans(newCollection(L"w5", L"yy")); checkNextSpans(spans, 0, 4, 5); checkNextSpans(spans, 2, 3, 4); checkNextSpans(spans, 3, 4, 5); checkNextSpans(spans, 7, 3, 4); BOOST_CHECK(!spans->next()); } BOOST_AUTO_TEST_CASE(testSpanOrDoubleSkip) { SpansPtr spans = orSpans(newCollection(L"w5", L"yy")); BOOST_CHECK(spans->skipTo(3)); BOOST_CHECK_EQUAL(3, spans->doc()); BOOST_CHECK_EQUAL(4, spans->start()); BOOST_CHECK_EQUAL(5, spans->end()); checkNextSpans(spans, 7, 3, 4); BOOST_CHECK(!spans->next()); } BOOST_AUTO_TEST_CASE(testSpanOrUnused) { SpansPtr spans = orSpans(newCollection(L"w5", L"unusedTerm", L"yy")); checkNextSpans(spans, 0, 4, 5); checkNextSpans(spans, 2, 3, 4); checkNextSpans(spans, 3, 4, 5); checkNextSpans(spans, 7, 3, 4); BOOST_CHECK(!spans->next()); } BOOST_AUTO_TEST_CASE(testSpanOrTripleSameDoc) { SpansPtr spans = orSpans(newCollection(L"t1", L"t2", L"t3")); checkNextSpans(spans, 11, 0, 1); checkNextSpans(spans, 11, 1, 2); checkNextSpans(spans, 11, 2, 3); checkNextSpans(spans, 11, 3, 4); checkNextSpans(spans, 11, 4, 5); checkNextSpans(spans, 11, 5, 6); BOOST_CHECK(!spans->next()); } namespace TestSpanScorerZeroSloppyFreq { class SloppyFreqSimilarity : public DefaultSimilarity { public: virtual ~SloppyFreqSimilarity() { } public: virtual double sloppyFreq(int32_t distance) { return 0.0; } }; class SloppyFreqSpanNearQuery : public SpanNearQuery { public: SloppyFreqSpanNearQuery(SimilarityPtr sim, Collection clauses, int32_t slop, bool inOrder) : SpanNearQuery(clauses, slop, inOrder) { this->sim = sim; } virtual ~SloppyFreqSpanNearQuery() { } protected: SimilarityPtr sim; public: virtual SimilarityPtr getSimilarity(SearcherPtr searcher) { return sim; } }; } BOOST_AUTO_TEST_CASE(testSpanScorerZeroSloppyFreq) { bool ordered = true; int32_t slop = 1; SimilarityPtr sim = newLucene(); SpanNearQueryPtr snq = newLucene(sim, newCollection(makeSpanTermQuery(L"t1"), makeSpanTermQuery(L"t2")), slop, ordered); ScorerPtr spanScorer = snq->weight(searcher)->scorer(searcher->getIndexReader(), true, false); BOOST_CHECK_NE(spanScorer->nextDoc(), DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK_EQUAL(spanScorer->docID(), 11); double score = spanScorer->score(); BOOST_CHECK_EQUAL(score, 0.0); BOOST_CHECK_EQUAL(spanScorer->nextDoc(), DocIdSetIterator::NO_MORE_DOCS); } BOOST_AUTO_TEST_CASE(testNPESpanQuery) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT, HashSet::newInstance()), IndexWriter::MaxFieldLengthLIMITED); // Add documents addDoc(writer, L"1", L"the big dogs went running to the market"); addDoc(writer, L"2", L"the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer->close(); // Get searcher IndexReaderPtr reader = IndexReader::open(dir, true); IndexSearcherPtr searcher = newLucene(reader); // Control (make sure docs indexed) BOOST_CHECK_EQUAL(2, hitCount(searcher, L"the")); BOOST_CHECK_EQUAL(1, hitCount(searcher, L"cat")); BOOST_CHECK_EQUAL(1, hitCount(searcher, L"dogs")); BOOST_CHECK_EQUAL(0, hitCount(searcher, L"rabbit")); // This throws exception (it shouldn't) BOOST_CHECK_EQUAL(1, searcher->search(createSpan(0, true, newCollection(createSpan(4, false, L"chased", L"cat"), createSpan(L"ate"))), 10)->totalHits); reader->close(); dir->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/store/000077500000000000000000000000001217574114600202155ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/store/BufferedIndexInputTest.cpp000066400000000000000000000447601217574114600253260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "BufferedIndexInput.h" #include "_SimpleFSDirectory.h" #include "MockFSDirectory.h" #include "FSDirectory.h" #include "IndexWriter.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "Term.h" #include "TermQuery.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "Random.h" #include "MiscUtils.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BufferedIndexInputTest, LuceneTestFixture) class TestableBufferedIndexInputRead : public BufferedIndexInput { public: TestableBufferedIndexInputRead(const uint8_t* b, int32_t length) : inputBytes(b), inputLength(length), nextByte(0) { } virtual ~TestableBufferedIndexInputRead() { } protected: const uint8_t* inputBytes; int32_t inputLength; int32_t nextByte; public: virtual void readInternal(uint8_t* b, int32_t offset, int32_t length) { std::copy(inputBytes + nextByte + offset, inputBytes + nextByte + offset + length, b + offset); nextByte += length; } virtual void seekInternal(int64_t pos) { } virtual int64_t length() { return inputLength; } virtual IndexInputPtr clone() { return IndexInputPtr(); } }; BOOST_AUTO_TEST_CASE(testReadInt) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[4] = { 1, 2, 3, 4 }; std::memcpy(inputBytes.get(), input, 4); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 4); BOOST_CHECK_EQUAL(indexInput.readInt(), 16909060); } BOOST_AUTO_TEST_CASE(testReadVInt) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[4] = { 200, 201, 150, 96 }; std::memcpy(inputBytes.get(), input, 4); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 4); BOOST_CHECK_EQUAL(indexInput.readVInt(), 201696456); } BOOST_AUTO_TEST_CASE(testReadLong) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[8] = { 32, 43, 32, 96, 12, 54, 22, 96 }; std::memcpy(inputBytes.get(), input, 8); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 8); BOOST_CHECK_EQUAL(indexInput.readLong(), 2317982030106072672LL); } BOOST_AUTO_TEST_CASE(testReadVLong) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[8] = { 213, 143, 132, 196, 172, 154, 129, 96 }; std::memcpy(inputBytes.get(), input, 8); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 8); BOOST_CHECK_EQUAL(indexInput.readVLong(), 54048498881988565LL); } BOOST_AUTO_TEST_CASE(testReadString) { ByteArray inputBytes(ByteArray::newInstance(30)); uint8_t input[12] = { 11, 't', 'e', 's', 't', ' ', 's', 't', 'r', 'i', 'n', 'g' }; std::memcpy(inputBytes.get(), input, 12); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 12); BOOST_CHECK_EQUAL(indexInput.readString(), L"test string"); } BOOST_AUTO_TEST_CASE(testReadModifiedUTF8String) { ByteArray inputBytes(ByteArray::newInstance(30)); uint8_t input[12] = { 11, 't', 'e', 's', 't', ' ', 's', 't', 'r', 'i', 'n', 'g' }; std::memcpy(inputBytes.get(), input, 12); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 12); BOOST_CHECK_EQUAL(indexInput.readModifiedUTF8String(), L"test string"); } BOOST_AUTO_TEST_CASE(testReadChars) { ByteArray inputBytes(ByteArray::newInstance(30)); uint8_t input[11] = { 't', 'e', 's', 't', ' ', 's', 't', 'r', 'i', 'n', 'g' }; std::memcpy(inputBytes.get(), input, 11); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 11); ByteArray outputChars(ByteArray::newInstance(30 * sizeof(wchar_t))); indexInput.readChars((wchar_t*)outputChars.get(), 0, 11); wchar_t expected[11] = { L't', L'e', L's', L't', L' ', L's', L't', L'r', L'i', L'n', L'g' }; BOOST_CHECK_EQUAL(std::memcmp(outputChars.get(), expected, 11 * sizeof(wchar_t)), 0); } BOOST_AUTO_TEST_CASE(testSkipOneChar) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[5] = { 1, 2, 3, 4, 5 }; std::memcpy(inputBytes.get(), input, 5); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 5); indexInput.skipChars(1); BOOST_CHECK_EQUAL(indexInput.getFilePointer(), 1); } BOOST_AUTO_TEST_CASE(testSkipTwoChars) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[5] = { 1, 2, 3, 4, 5 }; std::memcpy(inputBytes.get(), input, 5); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 5); indexInput.skipChars(2); BOOST_CHECK_EQUAL(indexInput.getFilePointer(), 2); } BOOST_AUTO_TEST_CASE(testSkipTwoCharsAdditionalChar) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[5] = { 1, 132, 132, 4, 5 }; std::memcpy(inputBytes.get(), input, 5); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 5); indexInput.skipChars(2); BOOST_CHECK_EQUAL(indexInput.getFilePointer(), 3); } BOOST_AUTO_TEST_CASE(testSkipTwoCharsAdditionalTwoChars) { ByteArray inputBytes(ByteArray::newInstance(10)); uint8_t input[5] = { 1, 232, 232, 4, 5 }; std::memcpy(inputBytes.get(), input, 5); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 5); indexInput.skipChars(2); BOOST_CHECK_EQUAL(indexInput.getFilePointer(), 4); } BOOST_AUTO_TEST_CASE(testReadCollection) { ByteArray inputBytes(ByteArray::newInstance(100)); uint8_t input[88] = {0x80, 0x01, 0xff, 0x7f, 0x80, 0x80, 0x01, 0x81, 0x80, 0x01, 0x06, 'L', 'u', 'c', 'e', 'n', 'e', // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK") 0x02, 0xc2, 0xbf, 0x0a, 'L', 'u', 0xc2, 0xbf, 'c', 'e', 0xc2, 0xbf, 'n', 'e', // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES") 0x03, 0xe2, 0x98, 0xa0, 0x0c, 'L', 'u', 0xe2, 0x98, 0xa0, 'c', 'e', 0xe2, 0x98, 0xa0, 'n', 'e', // surrogate pairs // (U+1D11E "MUSICAL SYMBOL G CLEF") // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE") 0x04, 0xf0, 0x9d, 0x84, 0x9e, 0x08, 0xf0, 0x9d, 0x84, 0x9e, 0xf0, 0x9d, 0x85, 0xa0, 0x0e, 'L', 'u', 0xf0, 0x9d, 0x84, 0x9e, 'c', 'e', 0xf0, 0x9d, 0x85, 0xa0, 'n', 'e', // null bytes 0x01, 0x00, 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e'}; std::memcpy(inputBytes.get(), input, 88); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 88); indexInput.setBufferSize(10); BOOST_CHECK_EQUAL(indexInput.readVInt(), 128); BOOST_CHECK_EQUAL(indexInput.readVInt(), 16383); BOOST_CHECK_EQUAL(indexInput.readVInt(), 16384); BOOST_CHECK_EQUAL(indexInput.readVInt(), 16385); BOOST_CHECK_EQUAL(indexInput.readString(), L"Lucene"); const uint8_t question[] = {0xc2, 0xbf}; BOOST_CHECK_EQUAL(indexInput.readString(), UTF8_TO_STRING(question)); const uint8_t skull[] = {0x4c, 0x75, 0xc2, 0xbf, 0x63, 0x65, 0xc2, 0xbf, 0x6e, 0x65}; BOOST_CHECK_EQUAL(indexInput.readString(), UTF8_TO_STRING(skull)); const uint8_t gclef[] = {0xe2, 0x98, 0xa0}; BOOST_CHECK_EQUAL(indexInput.readString(), UTF8_TO_STRING(gclef)); const uint8_t eighthnote[] = {0x4c, 0x75, 0xe2, 0x98, 0xa0, 0x63, 0x65, 0xe2, 0x98, 0xa0, 0x6e, 0x65}; BOOST_CHECK_EQUAL(indexInput.readString(), UTF8_TO_STRING(eighthnote)); String readString(indexInput.readString()); #ifdef LPP_UNICODE_CHAR_SIZE_2 BOOST_CHECK_EQUAL(readString[0], 55348); BOOST_CHECK_EQUAL(readString[1], 56606); #else BOOST_CHECK_EQUAL(readString[0], 119070); #endif readString = indexInput.readString(); #ifdef LPP_UNICODE_CHAR_SIZE_2 BOOST_CHECK_EQUAL(readString[0], 55348); BOOST_CHECK_EQUAL(readString[1], 56606); BOOST_CHECK_EQUAL(readString[2], 55348); BOOST_CHECK_EQUAL(readString[3], 56672); #else BOOST_CHECK_EQUAL(readString[0], 119070); BOOST_CHECK_EQUAL(readString[1], 119136); #endif readString = indexInput.readString(); #ifdef LPP_UNICODE_CHAR_SIZE_2 BOOST_CHECK_EQUAL(readString[0], L'L'); BOOST_CHECK_EQUAL(readString[1], L'u'); BOOST_CHECK_EQUAL(readString[2], 55348); BOOST_CHECK_EQUAL(readString[3], 56606); BOOST_CHECK_EQUAL(readString[4], L'c'); BOOST_CHECK_EQUAL(readString[5], L'e'); BOOST_CHECK_EQUAL(readString[6], 55348); BOOST_CHECK_EQUAL(readString[7], 56672); BOOST_CHECK_EQUAL(readString[8], L'n'); BOOST_CHECK_EQUAL(readString[9], L'e'); #else BOOST_CHECK_EQUAL(readString[0], L'L'); BOOST_CHECK_EQUAL(readString[1], L'u'); BOOST_CHECK_EQUAL(readString[2], 119070); BOOST_CHECK_EQUAL(readString[3], L'c'); BOOST_CHECK_EQUAL(readString[4], L'e'); BOOST_CHECK_EQUAL(readString[5], 119136); BOOST_CHECK_EQUAL(readString[6], L'n'); BOOST_CHECK_EQUAL(readString[7], L'e'); #endif readString = indexInput.readString(); BOOST_CHECK_EQUAL(readString[0], 0); readString = indexInput.readString(); BOOST_CHECK_EQUAL(readString[0], L'L'); BOOST_CHECK_EQUAL(readString[1], L'u'); BOOST_CHECK_EQUAL(readString[2], 0); BOOST_CHECK_EQUAL(readString[3], L'c'); BOOST_CHECK_EQUAL(readString[4], L'e'); BOOST_CHECK_EQUAL(readString[5], 0); BOOST_CHECK_EQUAL(readString[6], L'n'); BOOST_CHECK_EQUAL(readString[7], L'e'); } BOOST_AUTO_TEST_CASE(testSkipCollection) { ByteArray inputBytes(ByteArray::newInstance(100)); uint8_t input[17] = {0x80, 0x01, 0xff, 0x7f, 0x80, 0x80, 0x01, 0x81, 0x80, 0x01, 0x06, 'L', 'u', 'c', 'e', 'n', 'e'}; std::memcpy(inputBytes.get(), input, 17); TestableBufferedIndexInputRead indexInput(inputBytes.get(), 17); BOOST_CHECK_EQUAL(indexInput.readVInt(), 128); BOOST_CHECK_EQUAL(indexInput.readVInt(), 16383); BOOST_CHECK_EQUAL(indexInput.readVInt(), 16384); BOOST_CHECK_EQUAL(indexInput.readVInt(), 16385); BOOST_CHECK_EQUAL(indexInput.readVInt(), 6); indexInput.skipChars(3); ByteArray remainingBytes(ByteArray::newInstance(4 * sizeof(wchar_t))); indexInput.readChars((wchar_t*)remainingBytes.get(), 0, 3); BOOST_CHECK_EQUAL(String((wchar_t*)remainingBytes.get(), 3), L"ene"); } // byten emulates a file - byten(n) returns the n'th byte in that file. uint8_t byten(int64_t n) { return (uint8_t)(n * n % 256); } void writeBytes(std::ofstream& file, int64_t size) { for (int64_t i = 0; i < size; ++i) file << byten(i); file.flush(); } static const int64_t TEST_FILE_LENGTH = 1024 * 1024; class MyBufferedIndexInput : public BufferedIndexInput { public: MyBufferedIndexInput() { this->len = LLONG_MAX; // an infinite file this->pos = 0; } MyBufferedIndexInput(int64_t len) { this->len = len; this->pos = 0; } protected: virtual void readInternal(uint8_t* b, int32_t offset, int32_t length) { for (int32_t i = offset; i < (offset + length); ++i) b[i] = byten(pos++); } virtual void seekInternal(int64_t pos) { this->pos = pos; } public: virtual void close() { } virtual int64_t length() { return len; } protected: int64_t pos; int64_t len; }; DECLARE_SHARED_PTR(MyBufferedIndexInput) // Call readByte() repeatedly, past the buffer boundary, and see that it is working as expected. // Our input comes from a dynamically generated/ "file" - see MyBufferedIndexInput. BOOST_AUTO_TEST_CASE(testReadByte) { MyBufferedIndexInputPtr input(newLucene()); for (int32_t i = 0; i < BufferedIndexInput::BUFFER_SIZE * 10; ++i) BOOST_CHECK_EQUAL(input->readByte(), byten(i)); } void checkReadBytes(IndexInputPtr input, int32_t size, int32_t pos) { // Just to see that "offset" is treated properly in readBytes(), we add an arbitrary offset at // the beginning of the array int32_t offset = size % 10; // arbitrary ByteArray buffer(ByteArray::newInstance(10)); buffer.resize(MiscUtils::getNextSize(offset + size)); BOOST_CHECK_EQUAL(pos, input->getFilePointer()); int64_t left = TEST_FILE_LENGTH - input->getFilePointer(); if (left <= 0) return; else if (left < size) size = (int32_t)left; input->readBytes(buffer.get(), offset, size); BOOST_CHECK_EQUAL(pos + size, input->getFilePointer()); for (int32_t i = 0; i < size; ++i) BOOST_CHECK_EQUAL(byten(pos + i), buffer[offset + i]); } void runReadBytes(IndexInputPtr input, int32_t bufferSize) { int32_t pos = 0; RandomPtr random = newLucene(); // gradually increasing size for (int32_t size = 1; size < bufferSize * 10; size = size + size / 200 + 1) { checkReadBytes(input, size, pos); pos += size; if (pos >= TEST_FILE_LENGTH) { // wrap pos = 0; input->seek(0); } } // wildly fluctuating size for (int64_t i = 0; i < 1000; ++i) { int32_t size = random->nextInt(10000); checkReadBytes(input, 1 + size, pos); pos += 1 + size; if (pos >= TEST_FILE_LENGTH) { // wrap pos = 0; input->seek(0); } } // constant small size (7 bytes) for (int32_t i = 0; i < bufferSize; ++i) { checkReadBytes(input, 7, pos); pos += 7; if (pos >= TEST_FILE_LENGTH) { // wrap pos = 0; input->seek(0); } } } void runReadBytesAndClose(IndexInputPtr input, int32_t bufferSize) { LuceneException finally; try { runReadBytes(input, bufferSize); } catch (LuceneException& e) { finally = e; } input->close(); finally.throwException(); } // Call readBytes() repeatedly, with various chunk sizes (from 1 byte to larger than the buffer size), and see // that it returns the bytes we expect. Our input comes from a dynamically generated "file" - see MyBufferedIndexInput. BOOST_AUTO_TEST_CASE(testReadBytes) { MyBufferedIndexInputPtr input(newLucene()); runReadBytes(input, BufferedIndexInput::BUFFER_SIZE); int32_t inputBufferSize = 128; String tmpInputFile(getTempDir(L"IndexInput")); std::ofstream file(StringUtils::toUTF8(tmpInputFile).c_str(), std::ios::binary | std::ios::out); writeBytes(file, TEST_FILE_LENGTH); // run test with chunk size of 10 bytes runReadBytesAndClose(newLucene(tmpInputFile, inputBufferSize, 10), inputBufferSize); // run test with chunk size of 100 MB - default runReadBytesAndClose(newLucene(tmpInputFile, inputBufferSize, FSDirectory::DEFAULT_READ_CHUNK_SIZE), inputBufferSize); FileUtils::removeFile(tmpInputFile); } // This tests that attempts to readBytes() past an EOF will fail, while reads up to the EOF will succeed. The // EOF is determined by the BufferedIndexInput's arbitrary length() value. BOOST_AUTO_TEST_CASE(testEOF) { MyBufferedIndexInputPtr input(newLucene(1024)); // see that we can read all the bytes at one go checkReadBytes(input, (int32_t)input->length(), 0); // go back and see that we can't read more than that, for small and large overflows int32_t pos = (int32_t)input->length() - 10; input->seek(pos); checkReadBytes(input, 10, pos); input->seek(pos); BOOST_CHECK_EXCEPTION(checkReadBytes(input, 11, pos), LuceneException, check_exception(LuceneException::IO)); input->seek(pos); BOOST_CHECK_EXCEPTION(checkReadBytes(input, 50, pos), LuceneException, check_exception(LuceneException::IO)); input->seek(pos); BOOST_CHECK_EXCEPTION(checkReadBytes(input, 100000, pos), LuceneException, check_exception(LuceneException::IO)); } BOOST_AUTO_TEST_CASE(testSetBufferSize) { String indexDir(getTempDir(L"testSetBufferSize")); MockFSDirectoryPtr dir = newLucene(indexDir); LuceneException finally; try { IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); for (int32_t i = 0; i < 37; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"content", L"aaa bbb ccc ddd" + StringUtils::toString(i), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); dir->allIndexInputs.clear(); IndexReaderPtr reader = IndexReader::open(dir, false); TermPtr aaa = newLucene(L"content", L"aaa"); TermPtr bbb = newLucene(L"content", L"bbb"); TermPtr ccc = newLucene(L"content", L"ccc"); BOOST_CHECK_EQUAL(reader->docFreq(ccc), 37); reader->deleteDocument(0); BOOST_CHECK_EQUAL(reader->docFreq(aaa), 37); dir->tweakBufferSizes(); reader->deleteDocument(4); BOOST_CHECK_EQUAL(reader->docFreq(bbb), 37); dir->tweakBufferSizes(); IndexSearcherPtr searcher = newLucene(reader); Collection hits = searcher->search(newLucene(bbb), FilterPtr(), 1000)->scoreDocs; dir->tweakBufferSizes(); BOOST_CHECK_EQUAL(hits.size(), 35); dir->tweakBufferSizes(); hits = searcher->search(newLucene(newLucene(L"id", L"33")), FilterPtr(), 1000)->scoreDocs; dir->tweakBufferSizes(); BOOST_CHECK_EQUAL(hits.size(), 1); hits = searcher->search(newLucene(aaa), FilterPtr(), 1000)->scoreDocs; dir->tweakBufferSizes(); BOOST_CHECK_EQUAL(hits.size(), 35); searcher->close(); reader->close(); } catch (LuceneException& e) { finally = e; } FileUtils::removeDirectory(indexDir); finally.throwException(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/store/BufferedIndexOutputTest.cpp000066400000000000000000000120151217574114600255130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "BufferedIndexOutput.h" #include "BufferedIndexInput.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BufferedIndexOutputTest, LuceneTestFixture) class TestableBufferedIndexOutput : public BufferedIndexOutput { public: using BufferedIndexOutput::writeBytes; TestableBufferedIndexOutput(uint8_t* b, int32_t length) : outputBytes(b), outputLength(length), nextByte(0) { } virtual void flushBuffer(const uint8_t* b, int32_t offset, int32_t length) { std::copy(b + offset, b + offset + length, outputBytes + nextByte + offset); nextByte += length; } virtual int64_t length() { return outputLength; } protected: uint8_t* outputBytes; int32_t outputLength; int32_t nextByte; }; BOOST_AUTO_TEST_CASE(testWriteInt) { ByteArray outputBytes(ByteArray::newInstance(10)); TestableBufferedIndexOutput indexOutput(outputBytes.get(), 10); indexOutput.writeInt(1234); indexOutput.flush(); BOOST_CHECK_EQUAL(indexOutput.getFilePointer(), 4); uint8_t expected[4] = { 0, 0, 4, 210 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 4), 0); } BOOST_AUTO_TEST_CASE(testWriteVInt) { ByteArray outputBytes(ByteArray::newInstance(10)); TestableBufferedIndexOutput indexOutput(outputBytes.get(), 10); indexOutput.writeVInt(1234); indexOutput.flush(); BOOST_CHECK_EQUAL(indexOutput.getFilePointer(), 2); uint8_t expected[2] = { 210, 9 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 2), 0); } BOOST_AUTO_TEST_CASE(testWriteLong) { ByteArray outputBytes(ByteArray::newInstance(10)); TestableBufferedIndexOutput indexOutput(outputBytes.get(), 10); indexOutput.writeLong(1234123412341234LL); indexOutput.flush(); BOOST_CHECK_EQUAL(indexOutput.getFilePointer(), 8); uint8_t expected[8] = { 0, 4, 98, 109, 191, 154, 1, 242 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 8), 0); } BOOST_AUTO_TEST_CASE(testWriteVLong) { ByteArray outputBytes(ByteArray::newInstance(10)); TestableBufferedIndexOutput indexOutput(outputBytes.get(), 10); indexOutput.writeVLong(1234123412341234LL); indexOutput.flush(); BOOST_CHECK_EQUAL(indexOutput.getFilePointer(), 8); uint8_t expected[8] = { 242, 131, 232, 252, 219, 205, 152, 2 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 8), 0); } BOOST_AUTO_TEST_CASE(testWriteString) { ByteArray outputBytes(ByteArray::newInstance(30)); TestableBufferedIndexOutput indexOutput(outputBytes.get(), 30); indexOutput.writeString(L"test string"); indexOutput.flush(); BOOST_CHECK_EQUAL(indexOutput.getFilePointer(), 12); uint8_t expected[12] = { 11, 116, 101, 115, 116, 32, 115, 116, 114, 105, 110, 103 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 12), 0); } BOOST_AUTO_TEST_CASE(testWriteChars) { ByteArray outputBytes(ByteArray::newInstance(30)); TestableBufferedIndexOutput indexOutput(outputBytes.get(), 30); indexOutput.writeChars(L"test string", 5, 6); indexOutput.flush(); BOOST_CHECK_EQUAL(indexOutput.getFilePointer(), 6); uint8_t expected[6] = { 115, 116, 114, 105, 110, 103 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 6), 0); } namespace TestCopyBytes { class SourceIndexInput : public BufferedIndexInput { public: SourceIndexInput(const uint8_t* b, int32_t length) : inputBytes(b), inputLength(length), nextByte(0) { } virtual void readInternal(uint8_t* b, int32_t offset, int32_t length) { std::copy(inputBytes + nextByte + offset, inputBytes + nextByte + offset + length, b + offset); nextByte += length; } virtual void seekInternal(int64_t pos) { } virtual int64_t length() { return inputLength; } virtual IndexInputPtr clone() { return IndexInputPtr(); } protected: const uint8_t* inputBytes; int32_t inputLength; int32_t nextByte; }; } BOOST_AUTO_TEST_CASE(testCopyBytes) { ByteArray sourceBytes(ByteArray::newInstance(32768)); std::generate(sourceBytes.get(), sourceBytes.get() + 32768, rand); BufferedIndexInputPtr indexSource(newLucene(sourceBytes.get(), 32768)); ByteArray outputBytes(ByteArray::newInstance(32768)); TestableBufferedIndexOutput indexOutput(outputBytes.get(), 32768); indexOutput.copyBytes(indexSource, 20000); indexOutput.flush(); BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), sourceBytes.get(), 20000), 0); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/store/DirectoryTest.cpp000066400000000000000000000135721217574114600235350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "Directory.h" #include "Lock.h" #include "FSDirectory.h" #include "SimpleFSDirectory.h" #include "MMapDirectory.h" #include "RAMDirectory.h" #include "IndexInput.h" #include "IndexOutput.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(DirectoryTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testDetectDirectoryClose) { RAMDirectoryPtr dir(newLucene()); dir->close(); BOOST_CHECK_EXCEPTION(dir->createOutput(L"test"), LuceneException, check_exception(LuceneException::AlreadyClosed)); } BOOST_AUTO_TEST_CASE(testDetectFSDirectoryClose) { DirectoryPtr dir = FSDirectory::open(getTempDir()); dir->close(); BOOST_CHECK_EXCEPTION(dir->createOutput(L"test"), LuceneException, check_exception(LuceneException::AlreadyClosed)); } template < class FSDirectory1, class FSDirectory2 > void TestInstantiationPair(FSDirectory1& first, FSDirectory2& second, const String& fileName, const String& lockName) { BOOST_CHECK_NO_THROW(first.ensureOpen()); IndexOutputPtr out = first.createOutput(fileName); BOOST_CHECK_NO_THROW(out->writeByte(123)); BOOST_CHECK_NO_THROW(out->close()); BOOST_CHECK_NO_THROW(first.ensureOpen()); BOOST_CHECK(first.fileExists(fileName)); BOOST_CHECK_EQUAL(first.fileLength(fileName), 1); // don't test read on MMapDirectory, since it can't really be closed and will cause a failure to delete the file. if (!first.isMMapDirectory()) { IndexInputPtr input = first.openInput(fileName); BOOST_CHECK_EQUAL(input->readByte(), 123); BOOST_CHECK_NO_THROW(input->close()); } BOOST_CHECK_NO_THROW(second.ensureOpen()); BOOST_CHECK(second.fileExists(fileName)); BOOST_CHECK_EQUAL(second.fileLength(fileName), 1); if (!second.isMMapDirectory()) { IndexInputPtr input = second.openInput(fileName); BOOST_CHECK_EQUAL(input->readByte(), 123); BOOST_CHECK_NO_THROW(input->close()); } // delete with a different dir second.deleteFile(fileName); BOOST_CHECK(!first.fileExists(fileName)); BOOST_CHECK(!second.fileExists(fileName)); LockPtr lock = first.makeLock(lockName); BOOST_CHECK(lock->obtain()); LockPtr lock2 = first.makeLock(lockName); BOOST_CHECK_EXCEPTION(lock2->obtain(1), LuceneException, check_exception(LuceneException::LockObtainFailed)); lock->release(); lock = second.makeLock(lockName); BOOST_CHECK(lock->obtain()); lock->release(); } namespace TestDirectInstantiation { class TestableSimpleFSDirectory : public SimpleFSDirectory { public: TestableSimpleFSDirectory(const String& path) : SimpleFSDirectory(path) {} virtual ~TestableSimpleFSDirectory() {} using SimpleFSDirectory::ensureOpen; bool isMMapDirectory() { return false; } }; class TestableMMapDirectory : public MMapDirectory { public: TestableMMapDirectory(const String& path) : MMapDirectory(path) {} virtual ~TestableMMapDirectory() {} using MMapDirectory::ensureOpen; bool isMMapDirectory() { return true; } }; } // Test that different instances of FSDirectory can coexist on the same // path, can read, write, and lock files. BOOST_AUTO_TEST_CASE(testDirectInstantiation) { TestDirectInstantiation::TestableSimpleFSDirectory fsDir(getTempDir()); fsDir.ensureOpen(); TestDirectInstantiation::TestableMMapDirectory mmapDir(getTempDir()); mmapDir.ensureOpen(); TestInstantiationPair(fsDir, mmapDir, L"foo.0", L"foo0.lck"); TestInstantiationPair(mmapDir, fsDir, L"foo.1", L"foo1.lck"); } BOOST_AUTO_TEST_CASE(testDontCreate) { String path(FileUtils::joinPath(getTempDir(), L"doesnotexist")); try { BOOST_CHECK(!FileUtils::fileExists(path)); SimpleFSDirectoryPtr fsDir(newLucene(path)); BOOST_CHECK(!FileUtils::fileExists(path)); } catch (...) { } FileUtils::removeDirectory(path); } void checkDirectoryFilter(DirectoryPtr dir) { String name(L"file"); dir->createOutput(name)->close(); BOOST_CHECK(dir->fileExists(name)); HashSet dirFiles(dir->listAll()); BOOST_CHECK(dirFiles.contains(name)); } BOOST_AUTO_TEST_CASE(testRAMDirectoryFilter) { checkDirectoryFilter(newLucene()); } BOOST_AUTO_TEST_CASE(testFSDirectoryFilter) { checkDirectoryFilter(newLucene(getTempDir())); } BOOST_AUTO_TEST_CASE(testCopySubdir) { String path(FileUtils::joinPath(getTempDir(), L"testsubdir")); try { FileUtils::createDirectory(path); String subpath(FileUtils::joinPath(path, L"subdir")); FileUtils::createDirectory(subpath); SimpleFSDirectoryPtr fsDir(newLucene(path)); BOOST_CHECK(newLucene(fsDir)->listAll().empty()); } catch (...) { } FileUtils::removeDirectory(path); } BOOST_AUTO_TEST_CASE(testNotDirectory) { String path(FileUtils::joinPath(getTempDir(), L"testnotdir")); SimpleFSDirectoryPtr fsDir(newLucene(path)); try { IndexOutputPtr out = fsDir->createOutput(L"afile"); out->close(); BOOST_CHECK(fsDir->fileExists(L"afile")); BOOST_CHECK_EXCEPTION(newLucene(FileUtils::joinPath(path, L"afile")), LuceneException, check_exception(LuceneException::NoSuchDirectory)); } catch (...) { } FileUtils::removeDirectory(path); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/store/FileSwitchDirectoryTest.cpp000066400000000000000000000067171217574114600255220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "FileSwitchDirectory.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "IndexReader.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(FileSwitchDirectoryTest, LuceneTestFixture) static DocumentPtr createDocument(int32_t n, const String& indexName, int32_t numFields) { StringStream buffer; DocumentPtr doc = newLucene(); doc->add(newLucene(L"id", StringUtils::toString(n), Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); doc->add(newLucene(L"indexname", indexName, Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); buffer << L"a" << n; doc->add(newLucene(L"field1", buffer.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); buffer << L" b" << n; for (int32_t i = 1; i < numFields; ++i) doc->add(newLucene(L"field" + StringUtils::toString(i + 1), buffer.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); return doc; } static void createIndexNoClose(bool multiSegment, const String& indexName, IndexWriterPtr writer) { for (int32_t i = 0; i < 100; ++i) writer->addDocument(createDocument(i, indexName, 4)); if (!multiSegment) writer->optimize(); } // Test if writing doc stores to disk and everything else to ram works. BOOST_AUTO_TEST_CASE(testBasic) { HashSet fileExtensions(HashSet::newInstance()); fileExtensions.add(L"fdt"); fileExtensions.add(L"fdx"); DirectoryPtr primaryDir(newLucene()); RAMDirectoryPtr secondaryDir(newLucene()); FileSwitchDirectoryPtr fsd(newLucene(fileExtensions, primaryDir, secondaryDir, true)); IndexWriterPtr writer(newLucene(fsd, newLucene(), IndexWriter::MaxFieldLengthLIMITED)); writer->setUseCompoundFile(false); createIndexNoClose(true, L"ram", writer); IndexReaderPtr reader = writer->getReader(); BOOST_CHECK_EQUAL(reader->maxDoc(), 100); writer->commit(); // we should see only fdx,fdt files here HashSet files = primaryDir->listAll(); BOOST_CHECK(!files.empty()); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { String ext = FileSwitchDirectory::getExtension(*file); BOOST_CHECK(fileExtensions.contains(ext)); } files = secondaryDir->listAll(); BOOST_CHECK(!files.empty()); // we should not see fdx,fdt files here for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { String ext = FileSwitchDirectory::getExtension(*file); BOOST_CHECK(!fileExtensions.contains(ext)); } reader->close(); writer->close(); files = fsd->listAll(); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) BOOST_CHECK(!file->empty()); fsd->close(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/store/IndexOutputTest.cpp000066400000000000000000000125031217574114600240520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "IndexOutput.h" #include "IndexInput.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(IndexOutputTest, LuceneTestFixture) class TestableIndexOutput : public IndexOutput { public: using IndexOutput::writeBytes; TestableIndexOutput(uint8_t* b, int32_t length) : outputBytes(b), outputLength(length), nextByte(0) { } virtual ~TestableIndexOutput() { } virtual void writeByte(uint8_t b) { outputBytes[nextByte++] = b; } virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length) { std::copy(b + offset, b + offset + length, outputBytes + nextByte + offset); nextByte += length; } virtual void flush() { } virtual void close() { } virtual int64_t getFilePointer() { return 0; } virtual void seek(int64_t pos) { } virtual int64_t length() { return 0; } int32_t getNextPosition() { return nextByte; } protected: uint8_t* outputBytes; int32_t outputLength; int32_t nextByte; }; BOOST_AUTO_TEST_CASE(testWriteInt) { ByteArray outputBytes(ByteArray::newInstance(10)); TestableIndexOutput indexOutput(outputBytes.get(), 10); indexOutput.writeInt(1234); BOOST_CHECK_EQUAL(indexOutput.getNextPosition(), 4); uint8_t expected[4] = { 0, 0, 4, 210 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 4), 0); } BOOST_AUTO_TEST_CASE(testWriteVInt) { ByteArray outputBytes(ByteArray::newInstance(10)); TestableIndexOutput indexOutput(outputBytes.get(), 10); indexOutput.writeVInt(1234); BOOST_CHECK_EQUAL(indexOutput.getNextPosition(), 2); uint8_t expected[2] = { 210, 9 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 2), 0); } BOOST_AUTO_TEST_CASE(testWriteLong) { ByteArray outputBytes(ByteArray::newInstance(10)); TestableIndexOutput indexOutput(outputBytes.get(), 10); indexOutput.writeLong(1234123412341234LL); BOOST_CHECK_EQUAL(indexOutput.getNextPosition(), 8); uint8_t expected[8] = { 0, 4, 98, 109, 191, 154, 1, 242 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 8), 0); } BOOST_AUTO_TEST_CASE(testWriteVLong) { ByteArray outputBytes(ByteArray::newInstance(10)); TestableIndexOutput indexOutput(outputBytes.get(), 10); indexOutput.writeVLong(1234123412341234LL); BOOST_CHECK_EQUAL(indexOutput.getNextPosition(), 8); uint8_t expected[8] = { 242, 131, 232, 252, 219, 205, 152, 2 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 8), 0); } BOOST_AUTO_TEST_CASE(testWriteString) { ByteArray outputBytes(ByteArray::newInstance(30)); TestableIndexOutput indexOutput(outputBytes.get(), 30); indexOutput.writeString(L"test string"); BOOST_CHECK_EQUAL(indexOutput.getNextPosition(), 12); uint8_t expected[12] = { 11, 116, 101, 115, 116, 32, 115, 116, 114, 105, 110, 103 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 12), 0); } BOOST_AUTO_TEST_CASE(testWriteChars) { ByteArray outputBytes(ByteArray::newInstance(30)); TestableIndexOutput indexOutput(outputBytes.get(), 30); indexOutput.writeChars(L"test string", 5, 6); BOOST_CHECK_EQUAL(indexOutput.getNextPosition(), 6); uint8_t expected[6] = { 115, 116, 114, 105, 110, 103 }; BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), expected, 6), 0); } namespace TestCopyBytes { class SourceIndexInput : public IndexInput { public: SourceIndexInput(const uint8_t* b, int32_t length) : inputBytes(b), inputLength(length), nextByte(0) { } virtual uint8_t readByte() { return 0; } virtual void readBytes(uint8_t* b, int32_t offset, int32_t length) { std::copy(inputBytes + nextByte + offset, inputBytes + nextByte + offset + length, b + offset); nextByte += length; } virtual void close() { } virtual int64_t getFilePointer() { return 0; } virtual void seek(int64_t pos) { } virtual int64_t length() { return 0; } virtual IndexInputPtr clone() { return IndexInputPtr(); } protected: const uint8_t* inputBytes; int32_t inputLength; int32_t nextByte; }; } BOOST_AUTO_TEST_CASE(testCopyBytes) { ByteArray sourceBytes(ByteArray::newInstance(32768)); std::generate(sourceBytes.get(), sourceBytes.get() + 32768, rand); IndexInputPtr indexSource(newLucene(sourceBytes.get(), 32768)); ByteArray outputBytes(ByteArray::newInstance(32768)); TestableIndexOutput indexOutput(outputBytes.get(), 32768); indexOutput.copyBytes(indexSource, 20000); BOOST_CHECK_EQUAL(memcmp(outputBytes.get(), sourceBytes.get(), 20000), 0); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/store/LockFactoryTest.cpp000066400000000000000000000326751217574114600240160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MockLock.h" #include "MockLockFactory.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "IndexSearcher.h" #include "WhitespaceAnalyzer.h" #include "Document.h" #include "Field.h" #include "NoLockFactory.h" #include "SimpleFSLockFactory.h" #include "NativeFSLockFactory.h" #include "SingleInstanceLockFactory.h" #include "FSDirectory.h" #include "LuceneThread.h" #include "TermQuery.h" #include "Term.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(LockFactoryTest, LuceneTestFixture) static void addDoc(IndexWriterPtr writer) { DocumentPtr doc(newLucene()); doc->add(newLucene(L"content", L"aaa", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } // Verify: we can provide our own LockFactory implementation, the right // methods are called at the right time, locks are created, etc. BOOST_AUTO_TEST_CASE(testCustomLockFactory) { DirectoryPtr dir(newLucene()); MockLockFactoryPtr lf(newLucene()); dir->setLockFactory(lf); // Lock prefix should have been set BOOST_CHECK(lf->lockPrefixSet); IndexWriterPtr writer(newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED)); // add 100 documents (so that commit lock is used) for (int32_t i = 0; i < 100; ++i) addDoc(writer); // Both write lock and commit lock should have been created BOOST_CHECK_EQUAL(lf->locksCreated.size(), 1); // # of unique locks created (after instantiating IndexWriter) BOOST_CHECK(lf->makeLockCount >= 1); // # calls to makeLock is 0 (after instantiating IndexWriter) for (MapStringLock::iterator lockName = lf->locksCreated.begin(); lockName != lf->locksCreated.end(); ++lockName) { MockLockPtr lock(boost::dynamic_pointer_cast(lockName->second)); BOOST_CHECK(lock->lockAttempts > 0); // # calls to Lock.obtain is 0 (after instantiating IndexWriter) } writer->close(); } // Verify: we can use the NoLockFactory with RAMDirectory with no exceptions raised // Verify: NoLockFactory allows two IndexWriters BOOST_AUTO_TEST_CASE(testRAMDirectoryNoLocking) { DirectoryPtr dir(newLucene()); dir->setLockFactory(NoLockFactory::getNoLockFactory()); BOOST_CHECK(dir->getLockFactory()); IndexWriterPtr writer(newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED)); IndexWriterPtr writer2; // Create a 2nd IndexWriter. This is normally not allowed but it should run through since we're not using any locks BOOST_CHECK_NO_THROW(writer2 = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED)); writer->close(); if (writer2) writer2->close(); } // Verify: SingleInstanceLockFactory is the default lock for RAMDirectory // Verify: RAMDirectory does basic locking correctly (can't create two IndexWriters) BOOST_AUTO_TEST_CASE(testDefaultRAMDirectory) { DirectoryPtr dir(newLucene()); LockFactoryPtr lockFactory(dir->getLockFactory()); BOOST_CHECK(boost::dynamic_pointer_cast(lockFactory)); IndexWriterPtr writer(newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED)); IndexWriterPtr writer2; // Create a 2nd IndexWriter - This should fail BOOST_CHECK_EXCEPTION(writer2 = newLucene(dir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED), LuceneException, check_exception(LuceneException::LockObtainFailed)); writer->close(); if (writer2) writer2->close(); } // test string file instantiation BOOST_AUTO_TEST_CASE(testSimpleFSLockFactory) { BOOST_CHECK_NO_THROW(newLucene(L"test")); } namespace LockFactoryTest { DECLARE_SHARED_PTR(WriterThread) DECLARE_SHARED_PTR(SearcherThread) class WriterThread : public LuceneThread { public: WriterThread(int32_t numIteration, DirectoryPtr dir) { this->numIteration = numIteration; this->dir = dir; this->hitException = false; } virtual ~WriterThread() { } LUCENE_CLASS(WriterThread); public: bool hitException; protected: DirectoryPtr dir; int32_t numIteration; public: virtual void run() { WhitespaceAnalyzerPtr analyzer = newLucene(); IndexWriterPtr writer; for (int32_t i = 0; i < numIteration; ++i) { try { writer = newLucene(dir, analyzer, false, IndexWriter::MaxFieldLengthLIMITED); } catch (IOException& e) { if (e.getError().find(L" timed out:") == String::npos) { hitException = true; BOOST_FAIL("Stress Test Index Writer: creation hit unexpected IO exception: " << e.getError()); break; } else { // lock obtain timed out } } catch (LuceneException& e) { hitException = true; BOOST_FAIL("Stress Test Index Writer: creation hit unexpected exception: " << e.getError()); break; } if (writer) { try { addDoc(writer); } catch (LuceneException& e) { hitException = true; BOOST_FAIL("Stress Test Index Writer: addDoc hit unexpected exception: " << e.getError()); break; } try { writer->close(); } catch (LuceneException& e) { hitException = true; BOOST_FAIL("Stress Test Index Writer: close hit unexpected exception: " << e.getError()); break; } } } } }; class SearcherThread : public LuceneThread { public: SearcherThread(int32_t numIteration, DirectoryPtr dir) { this->numIteration = numIteration; this->dir = dir; this->hitException = false; } virtual ~SearcherThread() { } LUCENE_CLASS(SearcherThread); public: bool hitException; protected: DirectoryPtr dir; int32_t numIteration; public: virtual void run() { IndexSearcherPtr searcher; QueryPtr query = newLucene(newLucene(L"content", L"aaa")); for (int32_t i = 0; i < numIteration; ++i) { try { searcher = newLucene(dir, false); } catch (LuceneException& e) { hitException = true; BOOST_FAIL("Stress Test Index Searcher: creation hit unexpected exception: " << e.getError()); break; } if (searcher) { Collection hits; try { hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; } catch (LuceneException& e) { hitException = true; BOOST_FAIL("Stress Test Index Searcher: search hit unexpected exception: " << e.getError()); break; } try { searcher->close(); } catch (LuceneException& e) { hitException = true; BOOST_FAIL("Stress Test Index Searcher: close hit unexpected exception: " << e.getError()); break; } } } } }; } static void _testStressLocks(LockFactoryPtr lockFactory, const String& indexDir) { FSDirectoryPtr fs1 = FSDirectory::open(indexDir, lockFactory); // First create a 1 doc index IndexWriterPtr w = newLucene(fs1, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); addDoc(w); w->close(); LockFactoryTest::WriterThreadPtr writer = newLucene(100, fs1); LockFactoryTest::SearcherThreadPtr searcher = newLucene(100, fs1); writer->start(); searcher->start(); writer->join(); searcher->join(); BOOST_CHECK(!writer->hitException); BOOST_CHECK(!searcher->hitException); FileUtils::removeDirectory(indexDir); } // Verify: do stress test, by opening IndexReaders and IndexWriters over and over in 2 threads and making sure // no unexpected exceptions are raised BOOST_AUTO_TEST_CASE(testStressLocks) { _testStressLocks(LockFactoryPtr(), getTempDir(L"index.TestLockFactory6")); } // Verify: do stress test, by opening IndexReaders and IndexWriters over and over in 2 threads and making sure // no unexpected exceptions are raised, but use NativeFSLockFactory BOOST_AUTO_TEST_CASE(testStressLocksNativeFSLockFactory) { String dir(getTempDir(L"index.TestLockFactory7")); _testStressLocks(newLucene(dir), dir); } // Verify: NativeFSLockFactory works correctly BOOST_AUTO_TEST_CASE(testNativeFSLockFactory) { NativeFSLockFactoryPtr f(newLucene(getTempDir())); f->setLockPrefix(L"test"); LockPtr l(f->makeLock(L"commit")); LockPtr l2(f->makeLock(L"commit")); BOOST_CHECK(l->obtain()); BOOST_CHECK(!l2->obtain()); l->release(); BOOST_CHECK(l2->obtain()); l2->release(); // Make sure we can obtain first one again, test isLocked() BOOST_CHECK(l->obtain()); BOOST_CHECK(l->isLocked()); BOOST_CHECK(l2->isLocked()); l->release(); BOOST_CHECK(!l->isLocked()); BOOST_CHECK(!l2->isLocked()); } // Verify: NativeFSLockFactory works correctly if the lock file exists BOOST_AUTO_TEST_CASE(testNativeFSLockFactoryLockExists) { String lockFile = getTempDir(L"test.lock"); std::ofstream lockStream; lockStream.open(StringUtils::toUTF8(lockFile).c_str(), std::ios::binary | std::ios::in | std::ios::out); lockStream.close(); LockPtr l = newLucene(getTempDir())->makeLock(L"test.lock"); BOOST_CHECK(l->obtain()); l->release(); BOOST_CHECK(!l->isLocked()); if (FileUtils::fileExists(lockFile)) FileUtils::removeFile(lockFile); } BOOST_AUTO_TEST_CASE(testNativeFSLockReleaseByOtherLock) { NativeFSLockFactoryPtr f = newLucene(getTempDir()); f->setLockPrefix(L"test"); LockPtr l = f->makeLock(L"commit"); LockPtr l2 = f->makeLock(L"commit"); BOOST_CHECK(l->obtain()); BOOST_CHECK(l2->isLocked()); BOOST_CHECK_EXCEPTION(l2->release(), LockReleaseFailedException, check_exception(LuceneException::LockReleaseFailed)); l->release(); } // Verify: NativeFSLockFactory assigns null as lockPrefix if the lockDir is inside directory BOOST_AUTO_TEST_CASE(testNativeFSLockFactoryPrefix) { String fdir1(getTempDir(L"TestLockFactory.8")); String fdir2(getTempDir(L"TestLockFactory.8.Lockdir")); DirectoryPtr dir1(FSDirectory::open(fdir1, newLucene(fdir1))); // same directory, but locks are stored somewhere else. The prefix of the lock factory should != null DirectoryPtr dir2(FSDirectory::open(fdir1, newLucene(fdir2))); String prefix1(dir1->getLockFactory()->getLockPrefix()); BOOST_CHECK(prefix1.empty()); // Lock prefix for lockDir same as directory should be null String prefix2(dir2->getLockFactory()->getLockPrefix()); BOOST_CHECK(!prefix2.empty()); // Lock prefix for lockDir outside of directory should be not null FileUtils::removeDirectory(fdir1); FileUtils::removeDirectory(fdir2); } // Verify: default LockFactory has no prefix (ie write.lock is stored in index) BOOST_AUTO_TEST_CASE(testDefaultFSLockFactoryPrefix) { // Make sure we get null prefix String dirName(getTempDir(L"TestLockFactory.10")); DirectoryPtr dir(FSDirectory::open(dirName)); String prefix(dir->getLockFactory()->getLockPrefix()); BOOST_CHECK(prefix.empty()); // Default lock prefix should be null FileUtils::removeDirectory(dirName); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/store/MMapDirectoryTest.cpp000066400000000000000000000042771217574114600243120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "MMapDirectory.h" #include "FSDirectory.h" #include "StandardAnalyzer.h" #include "IndexWriter.h" #include "IndexSearcher.h" #include "Document.h" #include "Field.h" #include "Random.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(MMapDirectoryTest, LuceneTestFixture) static RandomPtr rndToken = newLucene(); String randomToken() { static const wchar_t* alphabet = L"abcdefghijklmnopqrstuvwxyz"; int32_t tl = 1 + rndToken->nextInt(7); StringStream sb; for (int32_t cx = 0; cx < tl; ++cx) sb << alphabet[rndToken->nextInt(25)]; return sb.str(); } String randomField() { int32_t fl = 1 + rndToken->nextInt(3); StringStream fb; for (int32_t fx = 0; fx < fl; ++fx) fb << randomToken() << L" "; return fb.str(); } BOOST_AUTO_TEST_CASE(testMmapIndex) { String storePathname(FileUtils::joinPath(getTempDir(), L"testLuceneMmap")); FSDirectoryPtr storeDirectory(newLucene(storePathname)); // plan to add a set of useful stopwords, consider changing some of the interior filters. StandardAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT, HashSet()); IndexWriterPtr writer = newLucene(storeDirectory, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); IndexSearcherPtr searcher = newLucene(storeDirectory, true); for (int32_t dx = 0; dx < 1000; ++dx) { String f(randomField()); DocumentPtr doc = newLucene(); doc->add(newLucene(L"data", f, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } searcher->close(); writer->close(); FileUtils::removeDirectory(storePathname); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/store/MockFSDirectory.cpp000066400000000000000000000050271217574114600237340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "MockFSDirectory.h" #include "NoLockFactory.h" #include "SimpleFSDirectory.h" #include "BufferedIndexInput.h" #include "Random.h" namespace Lucene { MockFSDirectory::MockFSDirectory(const String& path) { allIndexInputs = Collection::newInstance(); lockFactory = newLucene(); dir = newLucene(path); rand = newLucene(); } MockFSDirectory::~MockFSDirectory() { } IndexInputPtr MockFSDirectory::openInput(const String& name) { return openInput(name, BufferedIndexInput::BUFFER_SIZE); } void MockFSDirectory::tweakBufferSizes() { for (Collection::iterator ii = allIndexInputs.begin(); ii != allIndexInputs.end(); ++ii) { BufferedIndexInputPtr bii(boost::dynamic_pointer_cast(*ii)); int32_t bufferSize = 1024 + (int32_t)std::abs(rand->nextInt() % 32768); bii->setBufferSize(bufferSize); } } IndexInputPtr MockFSDirectory::openInput(const String& name, int32_t bufferSize) { // Make random changes to buffer size bufferSize = 1 + (int32_t)std::abs(rand->nextInt() % 10); IndexInputPtr f(dir->openInput(name, bufferSize)); allIndexInputs.add(f); return f; } IndexOutputPtr MockFSDirectory::createOutput(const String& name) { return dir->createOutput(name); } void MockFSDirectory::close() { dir->close(); } void MockFSDirectory::deleteFile(const String& name) { dir->deleteFile(name); } void MockFSDirectory::touchFile(const String& name) { dir->touchFile(name); } uint64_t MockFSDirectory::fileModified(const String& name) { return dir->fileModified(name); } bool MockFSDirectory::fileExists(const String& name) { return dir->fileExists(name); } HashSet MockFSDirectory::listAll() { return dir->listAll(); } int64_t MockFSDirectory::fileLength(const String& name) { return dir->fileLength(name); } } LucenePlusPlus-rel_3.0.4/src/test/store/MockLock.cpp000066400000000000000000000014551217574114600224300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "MockLock.h" namespace Lucene { MockLock::MockLock() { lockAttempts = 0; } MockLock::~MockLock() { } bool MockLock::obtain() { ++lockAttempts; return true; } void MockLock::release() { // do nothing } bool MockLock::isLocked() { return false; } String MockLock::toString() { return L"MockLock"; } } LucenePlusPlus-rel_3.0.4/src/test/store/MockLockFactory.cpp000066400000000000000000000021731217574114600237560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "MockLockFactory.h" #include "MockLock.h" namespace Lucene { MockLockFactory::MockLockFactory() { locksCreated = MapStringLock::newInstance(); lockPrefixSet = false; makeLockCount = 0; } MockLockFactory::~MockLockFactory() { } void MockLockFactory::setLockPrefix(const String& lockPrefix) { LockFactory::setLockPrefix(lockPrefix); lockPrefixSet = true; } LockPtr MockLockFactory::makeLock(const String& lockName) { LockPtr lock(newLucene()); SyncLock createdLock(&locksCreated); locksCreated.put(lockName, lock); ++makeLockCount; return lock; } void MockLockFactory::clearLock(const String& lockName) { } } LucenePlusPlus-rel_3.0.4/src/test/store/MockRAMDirectory.cpp000066400000000000000000000233451217574114600240460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "MockRAMDirectory.h" #include "RAMFile.h" #include "MockRAMOutputStream.h" #include "MockRAMInputStream.h" #include "TestPoint.h" #include "Random.h" #include "MiscUtils.h" namespace Lucene { MockRAMDirectory::MockRAMDirectory() { maxSize = 0; maxUsedSize = 0; randomIOExceptionRate = 0; noDeleteOpenFile = true; preventDoubleWrite = true; crashed = false; init(); } MockRAMDirectory::MockRAMDirectory(DirectoryPtr dir) : RAMDirectory(dir) { maxSize = 0; maxUsedSize = 0; randomIOExceptionRate = 0; noDeleteOpenFile = true; preventDoubleWrite = true; crashed = false; init(); } MockRAMDirectory::~MockRAMDirectory() { } void MockRAMDirectory::init() { SyncLock syncLock(this); if (!openFiles) { openFiles = MapStringInt::newInstance(); openFilesDeleted = HashSet::newInstance(); } if (!createdFiles) createdFiles = HashSet::newInstance(); if (!unSyncedFiles) unSyncedFiles = HashSet::newInstance(); } void MockRAMDirectory::setPreventDoubleWrite(bool value) { preventDoubleWrite = value; } void MockRAMDirectory::sync(const String& name) { TestScope testScope(L"MockRAMDirectory", L"sync"); SyncLock syncLock(this); maybeThrowDeterministicException(); if (crashed) boost::throw_exception(IOException(L"cannot sync after crash")); unSyncedFiles.remove(name); } void MockRAMDirectory::crash() { SyncLock syncLock(this); crashed = true; openFiles = MapStringInt::newInstance(); openFilesDeleted = HashSet::newInstance(); HashSet crashFiles(unSyncedFiles); unSyncedFiles.clear(); int32_t count = 0; for (HashSet::iterator it = crashFiles.begin(); it != crashFiles.end(); ++it) { RAMFilePtr file(fileMap.get(*it)); if (count % 3 == 0) deleteFile(*it, true); else if (count % 3 == 1) { // Zero out file entirely int32_t numBuffers = file->numBuffers(); for (int32_t i = 0; i < numBuffers; ++i) MiscUtils::arrayFill(file->getBuffer(i).get(), 0, file->getBuffer(i).size(), 0); } else if (count % 3 == 2) { // Truncate the file file->setLength(file->getLength() / 2); } ++count; } } void MockRAMDirectory::clearCrash() { SyncLock syncLock(this); crashed = false; } void MockRAMDirectory::setMaxSizeInBytes(int64_t maxSize) { this->maxSize = maxSize; } int64_t MockRAMDirectory::getMaxSizeInBytes() { return maxSize; } int64_t MockRAMDirectory::getMaxUsedSizeInBytes() { return maxUsedSize; } void MockRAMDirectory::resetMaxUsedSizeInBytes() { maxUsedSize = getRecomputedActualSizeInBytes(); } void MockRAMDirectory::setNoDeleteOpenFile(bool value) { noDeleteOpenFile = value; } bool MockRAMDirectory::getNoDeleteOpenFile() { return noDeleteOpenFile; } void MockRAMDirectory::setRandomIOExceptionRate(double rate, int64_t seed) { randomIOExceptionRate = rate; // seed so we have deterministic behaviour randomState = newLucene(seed); } double MockRAMDirectory::getRandomIOExceptionRate() { return randomIOExceptionRate; } void MockRAMDirectory::maybeThrowIOException() { if (randomIOExceptionRate > 0.0) { int32_t number = std::abs(randomState->nextInt() % 1000); if (number < randomIOExceptionRate * 1000) boost::throw_exception(IOException(L"a random IO exception")); } } void MockRAMDirectory::deleteFile(const String& name) { deleteFile(name, false); } void MockRAMDirectory::deleteFile(const String& name, bool forced) { TestScope testScope(L"MockRAMDirectory", L"deleteFile"); SyncLock syncLock(this); maybeThrowDeterministicException(); if (crashed && !forced) boost::throw_exception(IOException(L"cannot delete after crash")); unSyncedFiles.remove(name); if (!forced && noDeleteOpenFile) { if (openFiles.contains(name)) { openFilesDeleted.add(name); boost::throw_exception(IOException(L"MockRAMDirectory: file \"" + name + L"\" is still open: cannot delete")); } else openFilesDeleted.remove(name); } RAMDirectory::deleteFile(name); } HashSet MockRAMDirectory::getOpenDeletedFiles() { SyncLock syncLock(this); HashSet openFilesDeleted = HashSet::newInstance(this->openFilesDeleted.begin(), this->openFilesDeleted.end()); return openFilesDeleted; } IndexOutputPtr MockRAMDirectory::createOutput(const String& name) { SyncLock syncLock(this); if (crashed) boost::throw_exception(IOException(L"cannot createOutput after crash")); init(); if (preventDoubleWrite && createdFiles.contains(name) && name != L"segments.gen") boost::throw_exception(IOException(L"file \"" + name + L"\" was already written to")); if (noDeleteOpenFile && openFiles.contains(name)) boost::throw_exception(IOException(L"MockRAMDirectory: file \"" + name + L"\" is still open: cannot overwrite")); RAMFilePtr file(newLucene(shared_from_this())); if (crashed) boost::throw_exception(IOException(L"cannot createOutput after crash")); unSyncedFiles.add(name); createdFiles.add(name); RAMFilePtr existing(fileMap.get(name)); // Enforce write once if (existing && name != L"segments.gen" && preventDoubleWrite) boost::throw_exception(IOException(L"file " + name + L" already exists")); else { if (existing) { _sizeInBytes -= existing->getSizeInBytes(); existing->_directory.reset(); } fileMap.put(name, file); } return newLucene(shared_from_this(), file, name); } IndexInputPtr MockRAMDirectory::openInput(const String& name) { SyncLock syncLock(this); MapStringRAMFile::iterator file = fileMap.find(name); if (file == fileMap.end()) boost::throw_exception(FileNotFoundException(name)); else { MapStringInt::iterator openFile = openFiles.find(name); if (openFile != openFiles.end()) ++openFile->second; else openFiles.put(name, 1); } return newLucene(shared_from_this(), name, file->second); } int64_t MockRAMDirectory::getRecomputedSizeInBytes() { SyncLock syncLock(this); int64_t size = 0; for (MapStringRAMFile::iterator file = fileMap.begin(); file != fileMap.end(); ++file) size += file->second->getSizeInBytes(); return size; } int64_t MockRAMDirectory::getRecomputedActualSizeInBytes() { SyncLock syncLock(this); int64_t size = 0; for (MapStringRAMFile::iterator file = fileMap.begin(); file != fileMap.end(); ++file) size += file->second->length; return size; } void MockRAMDirectory::close() { SyncLock syncLock(this); if (!openFiles) { openFiles = MapStringInt::newInstance(); openFilesDeleted = HashSet::newInstance(); } if (noDeleteOpenFile && !openFiles.empty()) { // RuntimeException instead of IOException because RAMDirectory does not throw IOException currently boost::throw_exception(RuntimeException(L"MockRAMDirectory: cannot close: there are still open files")); } } void MockRAMDirectory::failOn(MockDirectoryFailurePtr fail) { SyncLock syncLock(this); if (!failures) failures = Collection::newInstance(); failures.add(fail); } void MockRAMDirectory::maybeThrowDeterministicException() { SyncLock syncLock(this); if (failures) { for (Collection::iterator failure = failures.begin(); failure != failures.end(); ++failure) (*failure)->eval(shared_from_this()); } } MockDirectoryFailure::MockDirectoryFailure() { doFail = false; } MockDirectoryFailure::~MockDirectoryFailure() { } void MockDirectoryFailure::eval(MockRAMDirectoryPtr dir) { } MockDirectoryFailurePtr MockDirectoryFailure::reset() { return shared_from_this(); } void MockDirectoryFailure::setDoFail() { doFail = true; } void MockDirectoryFailure::clearDoFail() { doFail = false; } } LucenePlusPlus-rel_3.0.4/src/test/store/MockRAMInputStream.cpp000066400000000000000000000036221217574114600243510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "MockRAMInputStream.h" #include "MockRAMDirectory.h" namespace Lucene { MockRAMInputStream::MockRAMInputStream() { this->isClone = false; } MockRAMInputStream::MockRAMInputStream(MockRAMDirectoryPtr dir, const String& name, RAMFilePtr f) : RAMInputStream(f) { this->isClone = false; this->name = name; this->_dir = dir; } MockRAMInputStream::~MockRAMInputStream() { } void MockRAMInputStream::close() { RAMInputStream::close(); if (!isClone) { MockRAMDirectoryPtr dir(_dir); SyncLock dirLock(dir); MapStringInt::iterator openFile = dir->openFiles.find(name); // Could be null when MockRAMDirectory.crash() was called if (openFile != dir->openFiles.end()) { if (openFile->second == 1) { dir->openFiles.remove(name); dir->openFilesDeleted.remove(name); } else --openFile->second; } } } LuceneObjectPtr MockRAMInputStream::clone(LuceneObjectPtr other) { LuceneObjectPtr clone = RAMInputStream::clone(other ? other : newLucene()); MockRAMInputStreamPtr cloneInputStream(boost::dynamic_pointer_cast(clone)); cloneInputStream->_dir = _dir; cloneInputStream->name = name; cloneInputStream->isClone = true; return cloneInputStream; } } LucenePlusPlus-rel_3.0.4/src/test/store/MockRAMOutputStream.cpp000066400000000000000000000060531217574114600245530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "MockRAMOutputStream.h" #include "MockRAMDirectory.h" namespace Lucene { MockRAMOutputStream::MockRAMOutputStream(MockRAMDirectoryPtr dir, RAMFilePtr f, const String& name) : RAMOutputStream(f) { this->first = true; this->singleByte = ByteArray::newInstance(1); this->_dir = dir; this->name = name; } MockRAMOutputStream::~MockRAMOutputStream() { } void MockRAMOutputStream::close() { RAMOutputStream::close(); MockRAMDirectoryPtr dir(_dir); // Now compute actual disk usage & track the maxUsedSize in the MockRAMDirectory int64_t size = dir->getRecomputedActualSizeInBytes(); if (size > dir->maxUsedSize) dir->maxUsedSize = size; } void MockRAMOutputStream::flush() { MockRAMDirectoryPtr(_dir)->maybeThrowDeterministicException(); RAMOutputStream::flush(); } void MockRAMOutputStream::writeByte(uint8_t b) { singleByte[0] = b; writeBytes(singleByte.get(), 0, 1); } void MockRAMOutputStream::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { MockRAMDirectoryPtr dir(_dir); int64_t freeSpace = dir->maxSize - dir->sizeInBytes(); int64_t realUsage = 0; // If MockRAMDir crashed since we were opened, then don't write anything if (dir->crashed) boost::throw_exception(IOException(L"MockRAMDirectory was crashed; cannot write to " + name)); // Enforce disk full if (dir->maxSize != 0 && freeSpace <= length) { // Compute the real disk free. This will greatly slow down our test but makes it more accurate realUsage = dir->getRecomputedActualSizeInBytes(); freeSpace = dir->maxSize - realUsage; } if (dir->maxSize != 0 && freeSpace <= length) { if (freeSpace > 0 && freeSpace < length) { realUsage += freeSpace; RAMOutputStream::writeBytes(b, offset, (int32_t)freeSpace); } if (realUsage > dir->maxUsedSize) dir->maxUsedSize = realUsage; boost::throw_exception(IOException(L"fake disk full at " + StringUtils::toString(dir->getRecomputedActualSizeInBytes()) + L" bytes when writing " + name)); } else RAMOutputStream::writeBytes(b, offset, length); dir->maybeThrowDeterministicException(); if (first) { // Maybe throw random exception; only do this on first write to a new file first = false; dir->maybeThrowIOException(); } } } LucenePlusPlus-rel_3.0.4/src/test/store/RAMDirectoryTest.cpp000066400000000000000000000176301217574114600240740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "RAMFile.h" #include "RAMOutputStream.h" #include "RAMInputStream.h" #include "FSDirectory.h" #include "IndexInput.h" #include "IndexOutput.h" #include "IndexWriter.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "Document.h" #include "WhitespaceAnalyzer.h" #include "Field.h" #include "MockRAMDirectory.h" #include "LuceneThread.h" #include "FileUtils.h" using namespace Lucene; class RAMDirectoryTestFixture : public LuceneTestFixture { public: RAMDirectoryTestFixture() { indexDir = FileUtils::joinPath(getTempDir(), L"RAMDirIndex"); DirectoryPtr dir(FSDirectory::open(indexDir)); IndexWriterPtr writer(newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED)); // add enough document so that the index will be larger than RAMDirectory::READ_BUFFER_SIZE docsToAdd = 500; // add some documents for (int32_t i = 0; i < docsToAdd; ++i) { DocumentPtr doc(newLucene()); doc->add(newLucene(L"content", intToEnglish(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } BOOST_CHECK_EQUAL(docsToAdd, writer->maxDoc()); writer->close(); dir->close(); } virtual ~RAMDirectoryTestFixture() { FileUtils::removeDirectory(indexDir); } protected: String indexDir; int32_t docsToAdd; }; class TestRAMDirectoryThread : public LuceneThread { public: TestRAMDirectoryThread(IndexWriterPtr writer, int32_t num) { this->writer = writer; this->num = num; } LUCENE_CLASS(TestRAMDirectoryThread); public: static const int32_t numThreads; static const int32_t docsPerThread; protected: IndexWriterPtr writer; int32_t num; public: virtual void run() { for (int32_t j = 1; j < docsPerThread; ++j) { DocumentPtr doc(newLucene()); doc->add(newLucene(L"sizeContent", intToEnglish((num * docsPerThread) + j), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); try { writer->addDocument(doc); } catch (LuceneException& e) { boost::throw_exception(RuntimeException(e.getError())); } } } }; const int32_t TestRAMDirectoryThread::numThreads = 10; const int32_t TestRAMDirectoryThread::docsPerThread = 40; typedef boost::shared_ptr TestRAMDirectoryThreadPtr; /// Fake a huge ram file by using the same byte buffer for all buffers under INT_MAX. class DenseRAMFile : public RAMFile { public: DenseRAMFile() { capacity = 0; singleBuffers = MapIntByteArray::newInstance(); } LUCENE_CLASS(DenseRAMFile); public: static const int64_t MAX_VALUE; protected: int64_t capacity; MapIntByteArray singleBuffers; protected: virtual ByteArray newBuffer(int32_t size) { capacity += size; if (capacity <= MAX_VALUE) { // below INT_MAX we reuse buffers ByteArray buf(singleBuffers.get(size)); if (!buf) { buf = ByteArray::newInstance(size); singleBuffers.put(size, buf); } return buf; } return RAMFile::newBuffer(size); } }; const int64_t DenseRAMFile::MAX_VALUE = 2 * (int64_t)INT_MAX; typedef boost::shared_ptr DenseRAMFilePtr; BOOST_FIXTURE_TEST_SUITE(RAMDirectoryTest, RAMDirectoryTestFixture) BOOST_AUTO_TEST_CASE(testRAMDirectory) { DirectoryPtr dir(FSDirectory::open(indexDir)); MockRAMDirectoryPtr ramDir(newLucene(dir)); // close the underlaying directory dir->close(); // Check size BOOST_CHECK_EQUAL(ramDir->sizeInBytes(), ramDir->getRecomputedSizeInBytes()); // open reader to test document count IndexReaderPtr reader(IndexReader::open(ramDir, true)); BOOST_CHECK_EQUAL(docsToAdd, reader->numDocs()); // open search to check if all doc's are there IndexSearcherPtr searcher = newLucene(reader); // search for all documents for (int32_t i = 0; i < docsToAdd; ++i) { DocumentPtr doc = searcher->doc(i); BOOST_CHECK(doc->getField(L"content")); } // cleanup reader->close(); searcher->close(); } BOOST_AUTO_TEST_CASE(testRAMDirectorySize) { DirectoryPtr dir(FSDirectory::open(indexDir)); MockRAMDirectoryPtr ramDir(newLucene(dir)); dir->close(); IndexWriterPtr writer(newLucene(ramDir, newLucene(), false, IndexWriter::MaxFieldLengthLIMITED)); writer->optimize(); BOOST_CHECK_EQUAL(ramDir->sizeInBytes(), ramDir->getRecomputedSizeInBytes()); Collection threads(Collection::newInstance(TestRAMDirectoryThread::numThreads)); for (int32_t i = 0; i < TestRAMDirectoryThread::numThreads; ++i) threads[i] = newLucene(writer, i); for (int32_t i = 0; i < TestRAMDirectoryThread::numThreads; ++i) threads[i]->start(); for (int32_t i = 0; i < TestRAMDirectoryThread::numThreads; ++i) threads[i]->join(); writer->optimize(); BOOST_CHECK_EQUAL(ramDir->sizeInBytes(), ramDir->getRecomputedSizeInBytes()); writer->close(); } BOOST_AUTO_TEST_CASE(testIllegalEOF) { RAMDirectoryPtr dir(newLucene()); IndexOutputPtr o(dir->createOutput(L"out")); ByteArray b(ByteArray::newInstance(1024)); o->writeBytes(b.get(), 0, 1024); o->close(); IndexInputPtr i(dir->openInput(L"out")); i->seek(1024); i->close(); dir->close(); } /// Test huge RAMFile with more than INT_MAX bytes. BOOST_AUTO_TEST_CASE(testHugeFile) { DenseRAMFilePtr f(newLucene()); // output part RAMOutputStreamPtr out(newLucene(f)); ByteArray b1(ByteArray::newInstance(RAMOutputStream::BUFFER_SIZE)); ByteArray b2(ByteArray::newInstance(RAMOutputStream::BUFFER_SIZE / 3)); for (int32_t i = 0; i < b1.size(); ++i) b1[i] = (uint8_t)(i & 0x0007f); for (int32_t i = 0; i < b2.size(); ++i) b2[i] = (uint8_t)(i & 0x0003f); int64_t n = 0; BOOST_CHECK_EQUAL(n, out->length()); // output length must match while (n <= DenseRAMFile::MAX_VALUE - b1.size()) { out->writeBytes(b1.get(), 0, b1.size()); out->flush(); n += b1.size(); BOOST_CHECK_EQUAL(n, out->length()); // output length must match } int32_t m = b2.size(); int64_t l = 12; for (int32_t j = 0; j < l; ++j) { for (int32_t i = 0; i < b2.size(); ++i) b2[i]++; out->writeBytes(b2.get(), 0, m); out->flush(); n += m; BOOST_CHECK_EQUAL(n, out->length()); // output length must match } out->close(); // input part RAMInputStreamPtr in(newLucene(f)); BOOST_CHECK_EQUAL(n, in->length()); // input length must match for (int32_t j = 0; j < l; ++j) { int64_t loc = n - (l - j) * m; in->seek(loc / 3); in->seek(loc); for (int32_t i = 0; i < m; ++i) { uint8_t bt = in->readByte(); uint8_t expected = (uint8_t)(1 + j + (i & 0x0003f)); BOOST_CHECK_EQUAL(expected, bt); // must read same value that was written } } } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/testfiles/000077500000000000000000000000001217574114600210635ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/000077500000000000000000000000001217574114600233575ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.cfs/000077500000000000000000000000001217574114600243625ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.cfs/_12.cfs000066400000000000000000000022251217574114600254410ustar00rootroot00000000000000 _12.fnm_12.frq_12.prxE_12.fdxm_12.fdtS_12.tiir_12.tis_12.f0_12.f1_12.f2_12.tvx'_12.tvd=_12.tvfcontent2contentid .\30%here is more content with aaa aaa aaa31%here is more content with aaa aaa aaa32%here is more content with aaa aaa aaa33%here is more content with aaa aaa aaa34%here is more content with aaa aaa aaa aaacontent hereismorewith301234uuuuu|||||||||| Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.cfs/_a.cfs000066400000000000000000000036541217574114600254460ustar00rootroot00000000000000 _a.fnm_a.frq3_a.prx_a.fdx_a.fdt_a.tii_a.tisd_a.f0n_a.f1x_a.f2_a.tvx_a.tvd_a.tvfcontent2contentid -Z;h0%here is more content with aaa aaa aaa1%here is more content with aaa aaa aaa2%here is more content with aaa aaa aaa3%here is more content with aaa aaa aaa4%here is more content with aaa aaa aaa5%here is more content with aaa aaa aaa6%here is more content with aaa aaa aaa7%here is more content with aaa aaa aaa8%here is more content with aaa aaa aaa9%here is more content with aaa aaa aaaaaa  content here is more with 0 123456789uuuuuuuuuu|||||||||||||||||||| "&Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.cfs/_a.del000066400000000000000000000000121217574114600254200ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.cfs/_l.cfs000066400000000000000000000036671217574114600254650ustar00rootroot00000000000000 _l.fnm_l.frq3_l.prx_l.fdx_l.fdt_l.tii_l.tiso_l.f0y_l.f1_l.f2_l.tvx_l.tvd _l.tvfcontent2contentid .\Bp10%here is more content with aaa aaa aaa11%here is more content with aaa aaa aaa12%here is more content with aaa aaa aaa13%here is more content with aaa aaa aaa14%here is more content with aaa aaa aaa15%here is more content with aaa aaa aaa16%here is more content with aaa aaa aaa17%here is more content with aaa aaa aaa18%here is more content with aaa aaa aaa19%here is more content with aaa aaa aaaaaa  content here is more with 10 123456789uuuuuuuuuu|||||||||||||||||||| "&Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.cfs/_w.cfs000066400000000000000000000036671217574114600255000ustar00rootroot00000000000000 _w.fnm_w.frq3_w.prx_w.fdx_w.fdt_w.tii_w.tiso_w.f0y_w.f1_w.f2_w.tvx_w.tvd _w.tvfcontent2contentid .\Bp20%here is more content with aaa aaa aaa21%here is more content with aaa aaa aaa22%here is more content with aaa aaa aaa23%here is more content with aaa aaa aaa24%here is more content with aaa aaa aaa25%here is more content with aaa aaa aaa26%here is more content with aaa aaa aaa27%here is more content with aaa aaa aaa28%here is more content with aaa aaa aaa29%here is more content with aaa aaa aaaaaa  content here is more with 20 123456789uuuuuuuuuu|||||||||||||||||||| "&Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.cfs/_w.s1000066400000000000000000000000121217574114600252250ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.cfs/deletable000066400000000000000000000000041217574114600262200ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.cfs/segments000066400000000000000000000000611217574114600261270ustar00rootroot00000000000000S['_a _l _w _12LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/000077500000000000000000000000001217574114600247175ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.f0000066400000000000000000000000051217574114600255220ustar00rootroot00000000000000uuuuuLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.f1000066400000000000000000000000051217574114600255230ustar00rootroot00000000000000|||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.f2000066400000000000000000000000051217574114600255240ustar00rootroot00000000000000|||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.fdt000066400000000000000000000003461217574114600260020ustar00rootroot0000000000000030%here is more content with aaa aaa aaa31%here is more content with aaa aaa aaa32%here is more content with aaa aaa aaa33%here is more content with aaa aaa aaa34%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.fdx000066400000000000000000000000501217574114600257760ustar00rootroot00000000000000.\LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.fnm000066400000000000000000000000301217574114600257730ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.frq000066400000000000000000000000551217574114600260120ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.prx000066400000000000000000000000621217574114600260310ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.tii000066400000000000000000000000371217574114600260070ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.tis000066400000000000000000000001721217574114600260210ustar00rootroot00000000000000 aaacontent hereismorewith301234LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.tvd000066400000000000000000000000261217574114600260150ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.tvf000066400000000000000000000005301217574114600260170ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_12.tvx000066400000000000000000000000541217574114600260420ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.del000066400000000000000000000000121217574114600257550ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.f0000066400000000000000000000000121217574114600255160ustar00rootroot00000000000000uuuuuuuuuuLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.f1000066400000000000000000000000121217574114600255170ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.f2000066400000000000000000000000121217574114600255200ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.fdt000066400000000000000000000007021217574114600257740ustar00rootroot000000000000000%here is more content with aaa aaa aaa1%here is more content with aaa aaa aaa2%here is more content with aaa aaa aaa3%here is more content with aaa aaa aaa4%here is more content with aaa aaa aaa5%here is more content with aaa aaa aaa6%here is more content with aaa aaa aaa7%here is more content with aaa aaa aaa8%here is more content with aaa aaa aaa9%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.fdx000066400000000000000000000001201217574114600257720ustar00rootroot00000000000000-Z;hLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.fnm000066400000000000000000000000301217574114600257710ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.frq000066400000000000000000000001321217574114600260040ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.prx000066400000000000000000000001441217574114600260300ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.tii000066400000000000000000000000371217574114600260050ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.tis000066400000000000000000000002341217574114600260160ustar00rootroot00000000000000aaa  content here is more with 0 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.tvd000066400000000000000000000000521217574114600260120ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.tvf000066400000000000000000000012541217574114600260210ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_a.tvx000066400000000000000000000001241217574114600260360ustar00rootroot00000000000000 "&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.f0000066400000000000000000000000121217574114600255310ustar00rootroot00000000000000uuuuuuuuuuLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.f1000066400000000000000000000000121217574114600255320ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.f2000066400000000000000000000000121217574114600255330ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.fdt000066400000000000000000000007141217574114600260120ustar00rootroot0000000000000010%here is more content with aaa aaa aaa11%here is more content with aaa aaa aaa12%here is more content with aaa aaa aaa13%here is more content with aaa aaa aaa14%here is more content with aaa aaa aaa15%here is more content with aaa aaa aaa16%here is more content with aaa aaa aaa17%here is more content with aaa aaa aaa18%here is more content with aaa aaa aaa19%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.fdx000066400000000000000000000001201217574114600260050ustar00rootroot00000000000000.\BpLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.fnm000066400000000000000000000000301217574114600260040ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.frq000066400000000000000000000001321217574114600260170ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.prx000066400000000000000000000001441217574114600260430ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.tii000066400000000000000000000000371217574114600260200ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.tis000066400000000000000000000002351217574114600260320ustar00rootroot00000000000000aaa  content here is more with 10 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.tvd000066400000000000000000000000521217574114600260250ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.tvf000066400000000000000000000012541217574114600260340ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_l.tvx000066400000000000000000000001241217574114600260510ustar00rootroot00000000000000 "&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.f0000066400000000000000000000000121217574114600255440ustar00rootroot00000000000000uuuuuuuuuuLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.f1000066400000000000000000000000121217574114600255450ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.f2000066400000000000000000000000121217574114600255460ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.fdt000066400000000000000000000007141217574114600260250ustar00rootroot0000000000000020%here is more content with aaa aaa aaa21%here is more content with aaa aaa aaa22%here is more content with aaa aaa aaa23%here is more content with aaa aaa aaa24%here is more content with aaa aaa aaa25%here is more content with aaa aaa aaa26%here is more content with aaa aaa aaa27%here is more content with aaa aaa aaa28%here is more content with aaa aaa aaa29%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.fdx000066400000000000000000000001201217574114600260200ustar00rootroot00000000000000.\BpLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.fnm000066400000000000000000000000301217574114600260170ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.frq000066400000000000000000000001321217574114600260320ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.prx000066400000000000000000000001441217574114600260560ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.tii000066400000000000000000000000371217574114600260330ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.tis000066400000000000000000000002351217574114600260450ustar00rootroot00000000000000aaa  content here is more with 20 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.tvd000066400000000000000000000000521217574114600260400ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.tvf000066400000000000000000000012541217574114600260470ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/_w.tvx000066400000000000000000000001241217574114600260640ustar00rootroot00000000000000 "&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/deletable000066400000000000000000000000041217574114600265550ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/19.nocfs/segments000066400000000000000000000000611217574114600264640ustar00rootroot00000000000000S\T'_a _l _w _12LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.cfs/000077500000000000000000000000001217574114600243525ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.cfs/_12.cfs000066400000000000000000000022251217574114600254310ustar00rootroot00000000000000 _12.fnm_12.frq_12.prxE_12.fdxm_12.fdtS_12.tiir_12.tis_12.f0_12.f1_12.f2_12.tvx'_12.tvd=_12.tvfcontent2contentid .\30%here is more content with aaa aaa aaa31%here is more content with aaa aaa aaa32%here is more content with aaa aaa aaa33%here is more content with aaa aaa aaa34%here is more content with aaa aaa aaa aaacontent hereismorewith301234uuuuu|||||||||| Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.cfs/_a.cfs000066400000000000000000000036541217574114600254360ustar00rootroot00000000000000 _a.fnm_a.frq3_a.prx_a.fdx_a.fdt_a.tii_a.tisd_a.f0n_a.f1x_a.f2_a.tvx_a.tvd_a.tvfcontent2contentid -Z;h0%here is more content with aaa aaa aaa1%here is more content with aaa aaa aaa2%here is more content with aaa aaa aaa3%here is more content with aaa aaa aaa4%here is more content with aaa aaa aaa5%here is more content with aaa aaa aaa6%here is more content with aaa aaa aaa7%here is more content with aaa aaa aaa8%here is more content with aaa aaa aaa9%here is more content with aaa aaa aaaaaa  content here is more with 0 123456789uuuuuuuuuu|||||||||||||||||||| "&Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.cfs/_a.del000066400000000000000000000000121217574114600254100ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.cfs/_l.cfs000066400000000000000000000036671217574114600254550ustar00rootroot00000000000000 _l.fnm_l.frq3_l.prx_l.fdx_l.fdt_l.tii_l.tiso_l.f0y_l.f1_l.f2_l.tvx_l.tvd _l.tvfcontent2contentid .\Bp10%here is more content with aaa aaa aaa11%here is more content with aaa aaa aaa12%here is more content with aaa aaa aaa13%here is more content with aaa aaa aaa14%here is more content with aaa aaa aaa15%here is more content with aaa aaa aaa16%here is more content with aaa aaa aaa17%here is more content with aaa aaa aaa18%here is more content with aaa aaa aaa19%here is more content with aaa aaa aaaaaa  content here is more with 10 123456789uuuuuuuuuu|||||||||||||||||||| "&Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.cfs/_w.cfs000066400000000000000000000036671217574114600254700ustar00rootroot00000000000000 _w.fnm_w.frq3_w.prx_w.fdx_w.fdt_w.tii_w.tiso_w.f0y_w.f1_w.f2_w.tvx_w.tvd _w.tvfcontent2contentid .\Bp20%here is more content with aaa aaa aaa21%here is more content with aaa aaa aaa22%here is more content with aaa aaa aaa23%here is more content with aaa aaa aaa24%here is more content with aaa aaa aaa25%here is more content with aaa aaa aaa26%here is more content with aaa aaa aaa27%here is more content with aaa aaa aaa28%here is more content with aaa aaa aaa29%here is more content with aaa aaa aaaaaa  content here is more with 20 123456789uuuuuuuuuu|||||||||||||||||||| "&Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.cfs/_w.s1000066400000000000000000000000121217574114600252150ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.cfs/deletable000066400000000000000000000000041217574114600262100ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.cfs/segments000066400000000000000000000000611217574114600261170ustar00rootroot00000000000000QL'_a _l _w _12LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/000077500000000000000000000000001217574114600247075ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.f0000066400000000000000000000000051217574114600255120ustar00rootroot00000000000000uuuuuLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.f1000066400000000000000000000000051217574114600255130ustar00rootroot00000000000000|||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.f2000066400000000000000000000000051217574114600255140ustar00rootroot00000000000000|||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.fdt000066400000000000000000000003461217574114600257720ustar00rootroot0000000000000030%here is more content with aaa aaa aaa31%here is more content with aaa aaa aaa32%here is more content with aaa aaa aaa33%here is more content with aaa aaa aaa34%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.fdx000066400000000000000000000000501217574114600257660ustar00rootroot00000000000000.\LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.fnm000066400000000000000000000000301217574114600257630ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.frq000066400000000000000000000000551217574114600260020ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.prx000066400000000000000000000000621217574114600260210ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.tii000066400000000000000000000000371217574114600257770ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.tis000066400000000000000000000001721217574114600260110ustar00rootroot00000000000000 aaacontent hereismorewith301234LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.tvd000066400000000000000000000000261217574114600260050ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.tvf000066400000000000000000000005301217574114600260070ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_12.tvx000066400000000000000000000000541217574114600260320ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.del000066400000000000000000000000121217574114600257450ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.f0000066400000000000000000000000121217574114600255060ustar00rootroot00000000000000uuuuuuuuuuLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.f1000066400000000000000000000000121217574114600255070ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.f2000066400000000000000000000000121217574114600255100ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.fdt000066400000000000000000000007021217574114600257640ustar00rootroot000000000000000%here is more content with aaa aaa aaa1%here is more content with aaa aaa aaa2%here is more content with aaa aaa aaa3%here is more content with aaa aaa aaa4%here is more content with aaa aaa aaa5%here is more content with aaa aaa aaa6%here is more content with aaa aaa aaa7%here is more content with aaa aaa aaa8%here is more content with aaa aaa aaa9%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.fdx000066400000000000000000000001201217574114600257620ustar00rootroot00000000000000-Z;hLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.fnm000066400000000000000000000000301217574114600257610ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.frq000066400000000000000000000001321217574114600257740ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.prx000066400000000000000000000001441217574114600260200ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.tii000066400000000000000000000000371217574114600257750ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.tis000066400000000000000000000002341217574114600260060ustar00rootroot00000000000000aaa  content here is more with 0 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.tvd000066400000000000000000000000521217574114600260020ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.tvf000066400000000000000000000012541217574114600260110ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_a.tvx000066400000000000000000000001241217574114600260260ustar00rootroot00000000000000 "&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.f0000066400000000000000000000000121217574114600255210ustar00rootroot00000000000000uuuuuuuuuuLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.f1000066400000000000000000000000121217574114600255220ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.f2000066400000000000000000000000121217574114600255230ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.fdt000066400000000000000000000007141217574114600260020ustar00rootroot0000000000000010%here is more content with aaa aaa aaa11%here is more content with aaa aaa aaa12%here is more content with aaa aaa aaa13%here is more content with aaa aaa aaa14%here is more content with aaa aaa aaa15%here is more content with aaa aaa aaa16%here is more content with aaa aaa aaa17%here is more content with aaa aaa aaa18%here is more content with aaa aaa aaa19%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.fdx000066400000000000000000000001201217574114600257750ustar00rootroot00000000000000.\BpLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.fnm000066400000000000000000000000301217574114600257740ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.frq000066400000000000000000000001321217574114600260070ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.prx000066400000000000000000000001441217574114600260330ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.tii000066400000000000000000000000371217574114600260100ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.tis000066400000000000000000000002351217574114600260220ustar00rootroot00000000000000aaa  content here is more with 10 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.tvd000066400000000000000000000000521217574114600260150ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.tvf000066400000000000000000000012541217574114600260240ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_l.tvx000066400000000000000000000001241217574114600260410ustar00rootroot00000000000000 "&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.f0000066400000000000000000000000121217574114600255340ustar00rootroot00000000000000uuuuuuuuuuLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.f1000066400000000000000000000000121217574114600255350ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.f2000066400000000000000000000000121217574114600255360ustar00rootroot00000000000000||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.fdt000066400000000000000000000007141217574114600260150ustar00rootroot0000000000000020%here is more content with aaa aaa aaa21%here is more content with aaa aaa aaa22%here is more content with aaa aaa aaa23%here is more content with aaa aaa aaa24%here is more content with aaa aaa aaa25%here is more content with aaa aaa aaa26%here is more content with aaa aaa aaa27%here is more content with aaa aaa aaa28%here is more content with aaa aaa aaa29%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.fdx000066400000000000000000000001201217574114600260100ustar00rootroot00000000000000.\BpLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.fnm000066400000000000000000000000301217574114600260070ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.frq000066400000000000000000000001321217574114600260220ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.prx000066400000000000000000000001441217574114600260460ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.tii000066400000000000000000000000371217574114600260230ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.tis000066400000000000000000000002351217574114600260350ustar00rootroot00000000000000aaa  content here is more with 20 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.tvd000066400000000000000000000000521217574114600260300ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.tvf000066400000000000000000000012541217574114600260370ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/_w.tvx000066400000000000000000000001241217574114600260540ustar00rootroot00000000000000 "&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/deletable000066400000000000000000000000041217574114600265450ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/20.nocfs/segments000066400000000000000000000000611217574114600264540ustar00rootroot00000000000000Q'_a _l _w _12LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.cfs/000077500000000000000000000000001217574114600243535ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.cfs/_0.cfs000066400000000000000000000036251217574114600253540ustar00rootroot00000000000000 _0.fnm_0.frq_0.prx|_0.fdx_0.fdt_0.tii_0.tisI_0.nrmk_0.tvx_0.tvd_0.tvfcontent2contentid -Z;h0%here is more content with aaa aaa aaa1%here is more content with aaa aaa aaa2%here is more content with aaa aaa aaa3%here is more content with aaa aaa aaa4%here is more content with aaa aaa aaa5%here is more content with aaa aaa aaa6%here is more content with aaa aaa aaa7%here is more content with aaa aaa aaa8%here is more content with aaa aaa aaa9%here is more content with aaa aaa aaaaaa  content here is more with 0 123456789NRMuuuuuuuuuu|||||||||||||||||||| "&Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.cfs/_0_1.del000066400000000000000000000000121217574114600255500ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.cfs/_1.cfs000066400000000000000000000036401217574114600253520ustar00rootroot00000000000000 _1.fnm_1.frq_1.prx|_1.fdx_1.fdt_1.tii_1.tisT_1.nrmv_1.tvx_1.tvd_1.tvfcontent2contentid .\Bp10%here is more content with aaa aaa aaa11%here is more content with aaa aaa aaa12%here is more content with aaa aaa aaa13%here is more content with aaa aaa aaa14%here is more content with aaa aaa aaa15%here is more content with aaa aaa aaa16%here is more content with aaa aaa aaa17%here is more content with aaa aaa aaa18%here is more content with aaa aaa aaa19%here is more content with aaa aaa aaaaaa  content here is more with 10 123456789NRMuuuuuuuuuu|||||||||||||||||||| "&Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.cfs/_2.cfs000066400000000000000000000036401217574114600253530ustar00rootroot00000000000000 _2.fnm_2.frq_2.prx|_2.fdx_2.fdt_2.tii_2.tisT_2.nrmv_2.tvx_2.tvd_2.tvfcontent2contentid .\Bp20%here is more content with aaa aaa aaa21%here is more content with aaa aaa aaa22%here is more content with aaa aaa aaa23%here is more content with aaa aaa aaa24%here is more content with aaa aaa aaa25%here is more content with aaa aaa aaa26%here is more content with aaa aaa aaa27%here is more content with aaa aaa aaa28%here is more content with aaa aaa aaa29%here is more content with aaa aaa aaaaaa  content here is more with 20 123456789NRMuuuuuuuuuu|||||||||||||||||||| "&Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.cfs/_2_1.s1000066400000000000000000000000121217574114600253310ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.cfs/_3.cfs000066400000000000000000000021611217574114600253510ustar00rootroot00000000000000 _3.fnm_3.frq_3.prx_3.fdxE_3.fdt+_3.tiiJ_3.tis_3.nrm_3.tvx_3.tvd_3.tvfcontent2contentid .\30%here is more content with aaa aaa aaa31%here is more content with aaa aaa aaa32%here is more content with aaa aaa aaa33%here is more content with aaa aaa aaa34%here is more content with aaa aaa aaa aaacontent hereismorewith301234NRMuuuuu|||||||||| Haaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.cfs/segments.gen000066400000000000000000000000241217574114600266670ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.cfs/segments_a000066400000000000000000000002001217574114600264130ustar00rootroot00000000000000S4_0 _1 _2 _3LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/000077500000000000000000000000001217574114600247105ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.fdt000066400000000000000000000007021217574114600257040ustar00rootroot000000000000000%here is more content with aaa aaa aaa1%here is more content with aaa aaa aaa2%here is more content with aaa aaa aaa3%here is more content with aaa aaa aaa4%here is more content with aaa aaa aaa5%here is more content with aaa aaa aaa6%here is more content with aaa aaa aaa7%here is more content with aaa aaa aaa8%here is more content with aaa aaa aaa9%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.fdx000066400000000000000000000001201217574114600257020ustar00rootroot00000000000000-Z;hLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.fnm000066400000000000000000000000301217574114600257010ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.frq000066400000000000000000000001321217574114600257140ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.nrm000066400000000000000000000000421217574114600257200ustar00rootroot00000000000000NRMuuuuuuuuuu||||||||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.prx000066400000000000000000000001441217574114600257400ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.tii000066400000000000000000000000371217574114600257150ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.tis000066400000000000000000000002341217574114600257260ustar00rootroot00000000000000aaa  content here is more with 0 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.tvd000066400000000000000000000000521217574114600257220ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.tvf000066400000000000000000000012541217574114600257310ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0.tvx000066400000000000000000000001241217574114600257460ustar00rootroot00000000000000 "&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_0_1.del000066400000000000000000000000121217574114600261050ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.fdt000066400000000000000000000007141217574114600257100ustar00rootroot0000000000000010%here is more content with aaa aaa aaa11%here is more content with aaa aaa aaa12%here is more content with aaa aaa aaa13%here is more content with aaa aaa aaa14%here is more content with aaa aaa aaa15%here is more content with aaa aaa aaa16%here is more content with aaa aaa aaa17%here is more content with aaa aaa aaa18%here is more content with aaa aaa aaa19%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.fdx000066400000000000000000000001201217574114600257030ustar00rootroot00000000000000.\BpLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.fnm000066400000000000000000000000301217574114600257020ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.frq000066400000000000000000000001321217574114600257150ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.nrm000066400000000000000000000000421217574114600257210ustar00rootroot00000000000000NRMuuuuuuuuuu||||||||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.prx000066400000000000000000000001441217574114600257410ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.tii000066400000000000000000000000371217574114600257160ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.tis000066400000000000000000000002351217574114600257300ustar00rootroot00000000000000aaa  content here is more with 10 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.tvd000066400000000000000000000000521217574114600257230ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.tvf000066400000000000000000000012541217574114600257320ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_1.tvx000066400000000000000000000001241217574114600257470ustar00rootroot00000000000000 "&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.fdt000066400000000000000000000007141217574114600257110ustar00rootroot0000000000000020%here is more content with aaa aaa aaa21%here is more content with aaa aaa aaa22%here is more content with aaa aaa aaa23%here is more content with aaa aaa aaa24%here is more content with aaa aaa aaa25%here is more content with aaa aaa aaa26%here is more content with aaa aaa aaa27%here is more content with aaa aaa aaa28%here is more content with aaa aaa aaa29%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.fdx000066400000000000000000000001201217574114600257040ustar00rootroot00000000000000.\BpLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.fnm000066400000000000000000000000301217574114600257030ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.frq000066400000000000000000000001321217574114600257160ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.nrm000066400000000000000000000000421217574114600257220ustar00rootroot00000000000000NRMuuuuuuuuuu||||||||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.prx000066400000000000000000000001441217574114600257420ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.tii000066400000000000000000000000371217574114600257170ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.tis000066400000000000000000000002351217574114600257310ustar00rootroot00000000000000aaa  content here is more with 20 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.tvd000066400000000000000000000000521217574114600257240ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.tvf000066400000000000000000000012541217574114600257330ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2.tvx000066400000000000000000000001241217574114600257500ustar00rootroot00000000000000 "&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_2_1.s1000066400000000000000000000000121217574114600256660ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.fdt000066400000000000000000000003461217574114600257130ustar00rootroot0000000000000030%here is more content with aaa aaa aaa31%here is more content with aaa aaa aaa32%here is more content with aaa aaa aaa33%here is more content with aaa aaa aaa34%here is more content with aaa aaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.fdx000066400000000000000000000000501217574114600257070ustar00rootroot00000000000000.\LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.fnm000066400000000000000000000000301217574114600257040ustar00rootroot00000000000000content2contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.frq000066400000000000000000000000551217574114600257230ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.nrm000066400000000000000000000000231217574114600257220ustar00rootroot00000000000000NRMuuuuu||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.prx000066400000000000000000000000621217574114600257420ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.tii000066400000000000000000000000371217574114600257200ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.tis000066400000000000000000000001721217574114600257320ustar00rootroot00000000000000 aaacontent hereismorewith301234LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.tvd000066400000000000000000000000261217574114600257260ustar00rootroot00000000000000HLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.tvf000066400000000000000000000005301217574114600257300ustar00rootroot00000000000000aaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithaaacontent hereismorewithLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/_3.tvx000066400000000000000000000000541217574114600257530ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/segments.gen000066400000000000000000000000241217574114600272240ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/21.nocfs/segments_6000066400000000000000000000002001217574114600266750ustar00rootroot00000000000000SҴ_0 _1 _2 _3LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.cfs/000077500000000000000000000000001217574114600243545ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.cfs/_0.cfs000066400000000000000000000007231217574114600253510ustar00rootroot00000000000000y_0.fnm_0.frq_0.prx_0.fdx_0.fdt1_0.tiiT_0.tis_0.nrmcontentid  #(-0123456789   aaa 0 123456789NRM||||||||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.cfs/_0_1.del000066400000000000000000000000121217574114600255510ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.cfs/_1.cfs000066400000000000000000000007361217574114600253560ustar00rootroot00000000000000y_1.fnm_1.frq_1.prx_1.fdx_1.fdt;_1.tii^_1.tis_1.nrmcontentid  $*0610111213141516171819   aaa 10 123456789NRM||||||||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.cfs/_2.cfs000066400000000000000000000007361217574114600253570ustar00rootroot00000000000000y_2.fnm_2.frq_2.prx_2.fdx_2.fdt;_2.tii^_2.tis_2.nrmcontentid  $*0620212223242526272829   aaa 20 123456789NRM||||||||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.cfs/_2_1.s0000066400000000000000000000000121217574114600253310ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.cfs/_3.cfs000066400000000000000000000005271217574114600253560ustar00rootroot00000000000000y_3.fnm_3.frq_3.prx_3.fdx_3.fdt_3.tii_3.tisI_3.nrmcontentid  3031323334  aaa301234NRM||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.cfs/segments.gen000066400000000000000000000000241217574114600266700ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.cfs/segments_a000066400000000000000000000001701217574114600264220ustar00rootroot00000000000000Sģ_0 _1 _2 _3LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/000077500000000000000000000000001217574114600247115ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_0.fdt000066400000000000000000000000621217574114600257040ustar00rootroot000000000000000123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_0.fdx000066400000000000000000000001201217574114600257030ustar00rootroot00000000000000 #(-LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_0.fnm000066400000000000000000000000161217574114600257060ustar00rootroot00000000000000contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_0.frq000066400000000000000000000000241217574114600257150ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_0.nrm000066400000000000000000000000301217574114600257160ustar00rootroot00000000000000NRM||||||||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_0.prx000066400000000000000000000000241217574114600257360ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_0.tii000066400000000000000000000000431217574114600257130ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_0.tis000066400000000000000000000001471217574114600257320ustar00rootroot00000000000000  aaa 0 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_0_1.del000066400000000000000000000000121217574114600261060ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_1.fdt000066400000000000000000000000741217574114600257100ustar00rootroot0000000000000010111213141516171819LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_1.fdx000066400000000000000000000001201217574114600257040ustar00rootroot00000000000000 $*06LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_1.fnm000066400000000000000000000000161217574114600257070ustar00rootroot00000000000000contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_1.frq000066400000000000000000000000241217574114600257160ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_1.nrm000066400000000000000000000000301217574114600257170ustar00rootroot00000000000000NRM||||||||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_1.prx000066400000000000000000000000241217574114600257370ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_1.tii000066400000000000000000000000431217574114600257140ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_1.tis000066400000000000000000000001501217574114600257250ustar00rootroot00000000000000  aaa 10 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_2.fdt000066400000000000000000000000741217574114600257110ustar00rootroot0000000000000020212223242526272829LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_2.fdx000066400000000000000000000001201217574114600257050ustar00rootroot00000000000000 $*06LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_2.fnm000066400000000000000000000000161217574114600257100ustar00rootroot00000000000000contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_2.frq000066400000000000000000000000241217574114600257170ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_2.nrm000066400000000000000000000000301217574114600257200ustar00rootroot00000000000000NRM||||||||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_2.prx000066400000000000000000000000241217574114600257400ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_2.tii000066400000000000000000000000431217574114600257150ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_2.tis000066400000000000000000000001501217574114600257260ustar00rootroot00000000000000  aaa 20 123456789LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_2_1.s0000066400000000000000000000000121217574114600256660ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_3.fdt000066400000000000000000000000361217574114600257100ustar00rootroot000000000000003031323334LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_3.fdx000066400000000000000000000000501217574114600257100ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_3.fnm000066400000000000000000000000161217574114600257110ustar00rootroot00000000000000contentidLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_3.frq000066400000000000000000000000121217574114600257150ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_3.nrm000066400000000000000000000000161217574114600257250ustar00rootroot00000000000000NRM||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_3.prx000066400000000000000000000000121217574114600257360ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_3.tii000066400000000000000000000000431217574114600257160ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/_3.tis000066400000000000000000000001051217574114600257270ustar00rootroot00000000000000 aaa301234LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/segments.gen000066400000000000000000000000241217574114600272250ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/22.nocfs/segments_6000066400000000000000000000001701217574114600267040ustar00rootroot00000000000000SŔ_0 _1 _2 _3LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.cfs/000077500000000000000000000000001217574114600243555ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.cfs/_0.cfs000066400000000000000000000411241217574114600253520ustar00rootroot00000000000000 _0.fdtd_0.fdx|_0.tvx_0.tvf7w_0.tvd8_0.fnm9_0.frq n  6f.^& !+5?IS]gq{%/9CMW  Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠ ;D/;D/;D/;D/;D/ ;D/ ;D/ ;D/;D/;D/;D/;D/;D/;D/;D/;D/;D/;D/ ;D/";D/$;D/&;D/(;D/);D/+;D/-;D//;D/1;D/3;D/4;D/6;D/8;D/:;D/<;D/=;D/contentidautf8utf8content2fieⱷld- 0!#%')+-/13579;=?ACE 6 ## Lucene#)##abcd#)##☠#)##aaa#)###)#Fcontent#Li#here#)##is#)##more#)##with#)##field#)##name#)##on-ascii#)##with#)##0)#1012345678920123456789301234456789## Lucene#)##abcd#)##☠#)## NRM||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.cfs/_0_1.del000066400000000000000000000000151217574114600255550ustar00rootroot00000000000000#LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.cfs/_0_1.s0000066400000000000000000000000431217574114600253340ustar00rootroot00000000000000|||||||||||||||||||||~|||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.cfs/segments.gen000066400000000000000000000000241217574114600266710ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.cfs/segments_4000066400000000000000000000001351217574114600263470ustar00rootroot00000000000000 ´_0#LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/000077500000000000000000000000001217574114600247125ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.fdt000066400000000000000000000122761217574114600257170ustar00rootroot00000000000000Lucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name0Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name1Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name2Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name3Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name4Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name5Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name6Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name7Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name8Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name9Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name10Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name11Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name12Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name13Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name14Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name15Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name16Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name17Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name18Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name19Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name20Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name21Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name22Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name23Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name24Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name25Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name26Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name27Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name28Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name29Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name30Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name31Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name32Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name33Lucene ☠ abcdLucene ☠ abcd%here is more content with aaa aaa aaafield with non-ascii name34Lucene ☠ abcdLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.fdx000066400000000000000000000004301217574114600257100ustar00rootroot00000000000000.\!O~F v  > n  6f.^&LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.fnm000066400000000000000000000000571217574114600257140ustar00rootroot00000000000000contentidautf8utf8content2fieⱷldLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.frq000066400000000000000000000015211217574114600257210ustar00rootroot00000000000000- 0!#%')+-/13579;=?ACE LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.nrm000066400000000000000000000003261217574114600257270ustar00rootroot00000000000000NRM||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.prx000066400000000000000000000014021217574114600257400ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.tii000066400000000000000000000000431217574114600257140ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.tis000066400000000000000000000010011217574114600257210ustar00rootroot000000000000006 ## Lucene#)##abcd#)##☠#)##aaa#)###)#Fcontent#Li#here#)##is#)##more#)##with#)##field#)##name#)##on-ascii#)##with#)##0)#1012345678920123456789301234456789## Lucene#)##abcd#)##☠#)##LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.tvd000066400000000000000000000005411217574114600257270ustar00rootroot00000000000000;D/;D/;D/;D/;D/ ;D/ ;D/ ;D/;D/;D/;D/;D/;D/;D/;D/;D/;D/;D/ ;D/";D/$;D/&;D/(;D/);D/+;D/-;D//;D/1;D/3;D/4;D/6;D/8;D/:;D/<;D/=;D/LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.tvf000066400000000000000000000177371217574114600257500ustar00rootroot00000000000000  Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠   Lucene abcd☠ aaacontent hereismorewithfieldnameon-ascii with  Lucene abcd☠ LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0.tvx000066400000000000000000000004341217574114600257540ustar00rootroot00000000000000 !+5?IS]gq{%/9CMWLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0_1.del000066400000000000000000000000151217574114600261120ustar00rootroot00000000000000#LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/_0_1.s0000066400000000000000000000000431217574114600256710ustar00rootroot00000000000000|||||||||||||||||||||~|||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/segments.gen000066400000000000000000000000241217574114600272260ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/23.nocfs/segments_3000066400000000000000000000001351217574114600267030ustar00rootroot00000000000000 _0#LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/000077500000000000000000000000001217574114600243565ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/_0.cfs000066400000000000000000000017141217574114600253540ustar00rootroot00000000000000[_0.nrm_0.tis_0.fnm_0.tii_0.frq_0.prxNRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxx  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ aaa  content here is more with field name on-ascii with 0 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ contentidautf8utf8content2fieⱷld  LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/_0.cfx000066400000000000000000000326051217574114600253640ustar00rootroot00000000000000L_0.tvx_0.tvf u_0.tvd!_0.fdt4i_0.fdx $p,K4&<DLT\ md Hl #t | jE gBd ? Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ 4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name0Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name1Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name2Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name3Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name4Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name5Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name6Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name7Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name8Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name9Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name10Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name11Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name12Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name13Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name14Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name15Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name16Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name17Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name18Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name19Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name20Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name21Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name22Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name23Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name24Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name25Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name26Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name27Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name28Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name29Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name30Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name31Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name32Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name33Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name34Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd(:L^r $ 8 L ` t &:NLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/_0_1.del000066400000000000000000000000121217574114600255530ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/_1.cfs000066400000000000000000000017151217574114600253560ustar00rootroot00000000000000[_1.prx7_1.tisi_1.frq;_1.tii^_1.fnm_1.nrm  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ aaa  content here is more with field name on-ascii with 10 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠   contentidautf8utf8content2fieⱷldNRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxxLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/_2.cfs000066400000000000000000000017151217574114600253570ustar00rootroot00000000000000[_2.tis_2.prxi_2.frq;_2.tii^_2.nrm_2.fnm  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ aaa  content here is more with field name on-ascii with 20 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠   NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxxcontentidautf8utf8content2fieⱷldLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/_2_1.s0000066400000000000000000000000121217574114600253330ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/_3.cfs000066400000000000000000000012651217574114600253600ustar00rootroot00000000000000[_3.nrm}_3.tii_3.frq _3.fnm8_3.prx_3.tisNRM||||||||||xxxxxxxxxxuuuuuxxxxx  contentidautf8utf8content2fieⱷld Lu𝄞ce𝅘𝅥𝅮neab񕰗cd☠aaacontent hereismorewithfieldnameon-asciiwith301234Lu𝄞ce𝅘𝅥𝅮neab񕰗cd☠LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/_4.cfs000066400000000000000000000003461217574114600253600ustar00rootroot00000000000000j_4.fdxv_4.tii_4.frq_4.nrm_4.fnm_4.fdt_4.tis NRM|content3Acontent4@aaaaaa aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/segments.gen000066400000000000000000000000241217574114600266720ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.cfs/segments_4000066400000000000000000000003621217574114600263520ustar00rootroot00000000000000"dXq_0 _0_1 _0_2 _0_3_0_4rLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/000077500000000000000000000000001217574114600247135ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.fdt000066400000000000000000000113301217574114600257060ustar00rootroot00000000000000Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name0Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name1Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name2Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name3Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name4Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name5Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name6Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name7Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name8Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name9Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name10Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name11Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name12Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name13Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name14Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name15Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name16Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name17Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name18Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name19Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name20Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name21Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name22Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name23Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name24Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name25Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name26Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name27Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name28Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name29Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name30Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name31Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name32Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name33Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii name34Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.fdx000066400000000000000000000004341217574114600257150ustar00rootroot00000000000000(:L^r $ 8 L ` t &:NLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.fnm000066400000000000000000000000571217574114600257150ustar00rootroot00000000000000contentidautf8utf8content2fieⱷldLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.frq000066400000000000000000000003221217574114600257200ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.nrm000066400000000000000000000001001217574114600257160ustar00rootroot00000000000000NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxxLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.prx000066400000000000000000000003341217574114600257440ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.tii000066400000000000000000000000431217574114600257150ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.tis000066400000000000000000000004611217574114600257330ustar00rootroot00000000000000  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ aaa  content here is more with field name on-ascii with 0 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.tvd000066400000000000000000000004341217574114600257310ustar00rootroot000000000000004D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.tvf000066400000000000000000000167651217574114600257510ustar00rootroot00000000000000 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0.tvx000066400000000000000000000010641217574114600257550ustar00rootroot00000000000000 $p,K4&<DLT\ md Hl #t | jE gBd ?LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_0_1.del000066400000000000000000000000121217574114600261100ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_1.fnm000066400000000000000000000000571217574114600257160ustar00rootroot00000000000000contentidautf8utf8content2fieⱷldLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_1.frq000066400000000000000000000003221217574114600257210ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_1.nrm000066400000000000000000000001001217574114600257170ustar00rootroot00000000000000NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxxLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_1.prx000066400000000000000000000003341217574114600257450ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_1.tii000066400000000000000000000000431217574114600257160ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_1.tis000066400000000000000000000004621217574114600257350ustar00rootroot00000000000000  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ aaa  content here is more with field name on-ascii with 10 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_2.fnm000066400000000000000000000000571217574114600257170ustar00rootroot00000000000000contentidautf8utf8content2fieⱷldLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_2.frq000066400000000000000000000003221217574114600257220ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_2.nrm000066400000000000000000000001001217574114600257200ustar00rootroot00000000000000NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxxLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_2.prx000066400000000000000000000003341217574114600257460ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_2.tii000066400000000000000000000000431217574114600257170ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_2.tis000066400000000000000000000004621217574114600257360ustar00rootroot00000000000000  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ aaa  content here is more with field name on-ascii with 20 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_2_1.s0000066400000000000000000000000121217574114600256700ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_3.fnm000066400000000000000000000000571217574114600257200ustar00rootroot00000000000000contentidautf8utf8content2fieⱷldLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_3.frq000066400000000000000000000001511217574114600257230ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_3.nrm000066400000000000000000000000421217574114600257260ustar00rootroot00000000000000NRM||||||||||xxxxxxxxxxuuuuuxxxxxLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_3.prx000066400000000000000000000001561217574114600257510ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_3.tii000066400000000000000000000000431217574114600257200ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_3.tis000066400000000000000000000004171217574114600257370ustar00rootroot00000000000000 Lu𝄞ce𝅘𝅥𝅮neab񕰗cd☠aaacontent hereismorewithfieldnameon-asciiwith301234Lu𝄞ce𝅘𝅥𝅮neab񕰗cd☠LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_4.fdt000066400000000000000000000000211217574114600257050ustar00rootroot00000000000000aaaaaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_4.fdx000066400000000000000000000000141217574114600257130ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_4.fnm000066400000000000000000000000251217574114600257140ustar00rootroot00000000000000content3Acontent4@LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_4.frq000066400000000000000000000000011217574114600257160ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_4.nrm000066400000000000000000000000051217574114600257260ustar00rootroot00000000000000NRM|LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_4.tii000066400000000000000000000000431217574114600257210ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/_4.tis000066400000000000000000000000411217574114600257310ustar00rootroot00000000000000 aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/segments.gen000066400000000000000000000000241217574114600272270ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/24.nocfs/segments_4000066400000000000000000000003621217574114600267070ustar00rootroot00000000000000"dY _0 _0_1 _0_2 _0_3_0_4f.LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/000077500000000000000000000000001217574114600243635ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/_0.cfs000066400000000000000000000022151217574114600253560ustar00rootroot00000000000000[_0.frq<_0.nrm_0.tii_0.tisR_0.fnm_0.prx NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxx|||||||||||||||    Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ Xthis is a compressed field and should appear in 3.0 as an uncompressed field after merge 2873aaa  content here is more with field name on-ascii with 0 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ contentidautf8utf8content2fieⱷld compressedcompressedSizeLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/_0.cfx000066400000000000000000000366331217574114600253760ustar00rootroot00000000000000L_0.tvx_0.tvf u_0.tvd!_0.fdt<_0.fdx $p,K4&<DLT\ md Hl #t | jE gBd ? Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ 4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/0Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq731Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec282Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq733Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec284Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq735Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec286Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq737Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec288Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq739Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2810Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7311Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2812Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7313Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2814Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7315Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2816Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7317Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2818Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7319Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2820Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7321Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2822Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7323Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2824Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7325Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2826Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7327Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2828Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7329Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2830Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7331Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2832Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7333Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2834Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq73es  0 B  T/fAxSeLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/_0_1.del000066400000000000000000000000121217574114600255600ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/_1.cfs000066400000000000000000000022161217574114600253600ustar00rootroot00000000000000[_1.fnm_1.tisK_1.nrm_1.prx_1.tii_1.frqcontentidautf8utf8content2fieⱷld compressedcompressedSize   Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ Xthis is a compressed field and should appear in 3.0 as an uncompressed field after merge 2873aaa  content here is more with field name on-ascii with 10 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxx|||||||||||||||  LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/_2.cfs000066400000000000000000000022161217574114600253610ustar00rootroot00000000000000[_2.frq<_2.prx'_2.tiiJ_2.tis_2.nrm>_2.fnm     Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ Xthis is a compressed field and should appear in 3.0 as an uncompressed field after merge 2873aaa  content here is more with field name on-ascii with 20 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxx|||||||||||||||contentidautf8utf8content2fieⱷld compressedcompressedSizeLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/_2_1.s0000066400000000000000000000000121217574114600253400ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/_3.cfs000066400000000000000000000015361217574114600253660ustar00rootroot00000000000000[_3.prx_3.nrm_3.frqn_3.tis_3.fnm;_3.tiiNRM||||||||||xxxxxxxxxxuuuuuxxxxx||||||||  Lu𝄞ce𝅘𝅥𝅮neab񕰗cd☠Xthis is a compressed field and should appear in 3.0 as an uncompressed field after merge2873aaacontent hereismorewithfieldnameon-asciiwith301234Lu𝄞ce𝅘𝅥𝅮neab񕰗cd☠contentidautf8utf8content2fieⱷld compressedcompressedSize LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/_4.cfs000066400000000000000000000003531217574114600253630ustar00rootroot00000000000000j_4.frqk_4.fdt|_4.tii_4.fnm_4.fdx_4.nrm_4.tisaaaaaa content3Acontent4NRM| aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/segments.gen000066400000000000000000000000241217574114600266770ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.cfs/segments_4000066400000000000000000000016751217574114600263670ustar00rootroot00000000000000$},q_0 _0 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush_1 _0 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush_2 _0 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush_3_0 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush_4 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/000077500000000000000000000000001217574114600247205ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.fdt000066400000000000000000000153561217574114600257270ustar00rootroot000000000000000Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq731Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec282Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq733Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec284Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq735Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec286Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq737Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec288Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq739Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2810Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7311Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2812Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7313Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2814Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7315Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2816Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7317Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2818Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7319Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2820Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7321Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2822Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7323Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2824Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7325Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2826Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7327Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2828Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7329Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2830Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7331Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2832Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq7333Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii namexcdbfaec2834Lu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cdLu𝄞ce𝅘𝅥𝅮ne ☠ ab񕰗cd%here is more content with aaa aaa aaafield with non-ascii nameIxeA /(_ZC=0Y[`#F7g3J!uO(H ٹb(Nyq73LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.fdx000066400000000000000000000004341217574114600257220ustar00rootroot00000000000000es  0 B  T/fAxSeLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.fnm000066400000000000000000000001201217574114600257110ustar00rootroot00000000000000contentidautf8utf8content2fieⱷld compressedcompressedSizeLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.frq000066400000000000000000000003411217574114600257260ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.nrm000066400000000000000000000001241217574114600257310ustar00rootroot00000000000000NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxx|||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.prx000066400000000000000000000003531217574114600257520ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.tii000066400000000000000000000000431217574114600257220ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.tis000066400000000000000000000006371217574114600257450ustar00rootroot00000000000000   Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ Xthis is a compressed field and should appear in 3.0 as an uncompressed field after merge 2873aaa  content here is more with field name on-ascii with 0 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.tvd000066400000000000000000000004341217574114600257360ustar00rootroot000000000000004D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/4D/LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.tvf000066400000000000000000000167651217574114600257560ustar00rootroot00000000000000 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠  Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ aaacontent hereismorewithfieldnameon-ascii with Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd☠ LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0.tvx000066400000000000000000000010641217574114600257620ustar00rootroot00000000000000 $p,K4&<DLT\ md Hl #t | jE gBd ?LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_0_1.del000066400000000000000000000000121217574114600261150ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_1.fnm000066400000000000000000000001201217574114600257120ustar00rootroot00000000000000contentidautf8utf8content2fieⱷld compressedcompressedSizeLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_1.frq000066400000000000000000000003411217574114600257270ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_1.nrm000066400000000000000000000001241217574114600257320ustar00rootroot00000000000000NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxx|||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_1.prx000066400000000000000000000003531217574114600257530ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_1.tii000066400000000000000000000000431217574114600257230ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_1.tis000066400000000000000000000006401217574114600257400ustar00rootroot00000000000000   Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ Xthis is a compressed field and should appear in 3.0 as an uncompressed field after merge 2873aaa  content here is more with field name on-ascii with 10 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_2.fnm000066400000000000000000000001201217574114600257130ustar00rootroot00000000000000contentidautf8utf8content2fieⱷld compressedcompressedSizeLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_2.frq000066400000000000000000000003411217574114600257300ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_2.nrm000066400000000000000000000001241217574114600257330ustar00rootroot00000000000000NRM||||||||||||||||||||xxxxxxxxxxxxxxxxxxxxuuuuuuuuuuxxxxxxxxxx|||||||||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_2.prx000066400000000000000000000003531217574114600257540ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_2.tii000066400000000000000000000000431217574114600257240ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_2.tis000066400000000000000000000006401217574114600257410ustar00rootroot00000000000000   Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ Xthis is a compressed field and should appear in 3.0 as an uncompressed field after merge 2873aaa  content here is more with field name on-ascii with 20 123456789 Lu𝄞ce𝅘𝅥𝅮ne ab񕰗cd ☠ LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_2_1.s0000066400000000000000000000000121217574114600256750ustar00rootroot00000000000000|~||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_3.fnm000066400000000000000000000001201217574114600257140ustar00rootroot00000000000000contentidautf8utf8content2fieⱷld compressedcompressedSizeLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_3.frq000066400000000000000000000001611217574114600257310ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_3.nrm000066400000000000000000000000541217574114600257360ustar00rootroot00000000000000NRM||||||||||xxxxxxxxxxuuuuuxxxxx||||||||LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_3.prx000066400000000000000000000001661217574114600257570ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_3.tii000066400000000000000000000000431217574114600257250ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_3.tis000066400000000000000000000005751217574114600257510ustar00rootroot00000000000000 Lu𝄞ce𝅘𝅥𝅮neab񕰗cd☠Xthis is a compressed field and should appear in 3.0 as an uncompressed field after merge2873aaacontent hereismorewithfieldnameon-asciiwith301234Lu𝄞ce𝅘𝅥𝅮neab񕰗cd☠LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_4.fdt000066400000000000000000000000211217574114600257120ustar00rootroot00000000000000aaaaaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_4.fdx000066400000000000000000000000141217574114600257200ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_4.fnm000066400000000000000000000000321217574114600257170ustar00rootroot00000000000000content3Acontent4LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_4.frq000066400000000000000000000000011217574114600257230ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_4.nrm000066400000000000000000000000051217574114600257330ustar00rootroot00000000000000NRM|LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_4.tii000066400000000000000000000000431217574114600257260ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/_4.tis000066400000000000000000000000411217574114600257360ustar00rootroot00000000000000 aaaLucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/segments.gen000066400000000000000000000000241217574114600272340ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/legacyindex/29.nocfs/segments_4000066400000000000000000000016751217574114600267240ustar00rootroot00000000000000$}.e_0 _0 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush_1 _0 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush_2 _0 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush_3_0 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush_4 java.version1.5.0_21lucene.version 2.9.1-devos Windows XPos.archx86 java.vendorSun Microsystems Inc. os.version5.1sourceflush+LucenePlusPlus-rel_3.0.4/src/test/testfiles/memory/000077500000000000000000000000001217574114600223735ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/memory/testqueries.txt000066400000000000000000000027501217574114600255150ustar00rootroot00000000000000# # queries extracted from TestQueryParser.java # Apache Apach~ AND Copy* a AND b (a AND b) c OR (a AND b) a AND NOT b a AND -b a AND !b a && b a && ! b a OR b a || b a OR !b a OR ! b a OR -b +term -term term foo:term AND field:anotherTerm term AND "phrase phrase" "hello there" germ term^2.0 (term)^2.0 (germ term)^2.0 term^2.0 term^2 "germ term"^2.0 "term germ"^2 (foo OR bar) AND (baz OR boo) ((a OR b) AND NOT c) OR d +(apple "steve jobs") -(foo bar baz) +title:(dog OR cat) -author:"bob dole" a&b a&&b .NET "term germ"~2 "term germ"~2 flork "term"~2 "~2 germ" "term germ"~2^2 3 term 1.0 1 2 term term1 term2 term* term*^2 term~ term~0.7 term~^2 term^2~ term*germ term*germ^3 term* Term* TERM* term* Term* TERM* // Then 'full' wildcard queries: te?m Te?m TE?M Te?m*gerM te?m Te?m TE?M Te?m*gerM term term term term +stop term term -stop term drop AND stop AND roll term phrase term term AND NOT phrase term stop [ a TO c] [ a TO c ] { a TO c} { a TO c } { a TO c }^2.0 [ a TO c] OR bar [ a TO c] AND bar ( bar blar { a TO c}) gack ( bar blar { a TO c}) +weltbank +worlbank +weltbank\n+worlbank weltbank \n+worlbank weltbank \n +worlbank +weltbank\r+worlbank weltbank \r+worlbank weltbank \r +worlbank +weltbank\r\n+worlbank weltbank \r\n+worlbank weltbank \r\n +worlbank weltbank \r \n +worlbank +weltbank\t+worlbank weltbank \t+worlbank weltbank \t +worlbank term term term term +term term term term +term term +term +term -term term term on^1.0 "hello"^2.0 hello^2.0 "on"^1.0 the^3 LucenePlusPlus-rel_3.0.4/src/test/testfiles/memory/testqueries2.txt000066400000000000000000000000511217574114600255670ustar00rootroot00000000000000term term* term~ Apache Apach~ AND Copy* LucenePlusPlus-rel_3.0.4/src/test/testfiles/russian/000077500000000000000000000000001217574114600225475ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/russian/resUTF8.htm000066400000000000000000000011111217574114600245130ustar00rootroot00000000000000[вмест][сил][электромагнитн][энерг][имел][представлен][скаж][жрец][древн][египт][знан][хран][тайн][узк][круг][посвящен][всяк][времен][виток][прин][соб][нов][технолог][сам][дел][раскрыва][потаен][знан][прежн][век][говор][нов][информац][станов][доступн][широк][круг][пользовател][тех][случа][сознан][обществ][готов][восприня][воспользова]LucenePlusPlus-rel_3.0.4/src/test/testfiles/russian/stemsUTF8.txt000066400000000000000000024012771217574114600251270ustar00rootroot00000000000000а абиссин абонемент абонирова абонир абонир абрикос август август август авдот авдот авдот авдот авдот авенантненьк аверк аверк авиатор авиатор авиац ав авраам австр австрийск автобиограф автограф автомоб автомобил автор автор авторитет авторитет авторитет автор автор аг агадир агамемнон агаповн агаф агаф агаф агаф агаф агент агон агрон агроном ад ад ад адамант адвокат адвокат адвокатск адвокатск адвокатск административн административн администратор адмирал ад адрес адрес адресн адресова адрес адрес адрианопол адриатическ адск адск ажн азарт азарт азбук азвольт азиатск азиатчин аз аз азра аз а а айвазовск айд ак академ академ акац акац акварел аккомпанемент аккомпанирова аккорд аккорд аккорд аккуратн аккуратн аккуратн аккуратн акмеизм аксельбант аксин аксиом акт акт акт актер актер актер актер активн актрис актрис актрис акулин акулин акулин акулин акулин акурат акушер акцент акциз акц акц акц акц ал ал алгебр алгебр алгебр алебастр алек александр александр александринск александрович александровн александровн александровн александровн александровн александр александр алексевн алексе алексеев алексеевич алексеевич алексеевич алексеев алексеев алексеев алексеич алексеич алекс алексе алел ал ален ален ал аленьк алеш алешк алеют але ал алл аллегор алл алл алл алле алле алмаз алмаз алмаз алмаз ал ал алоэ алтар алтар алтар алчущ ал ал ал ал ал ал альбертыч альб альбом альм альтшауер альян амал амал амал амал амал амбар амбар амбар амбар амбиц амбиц америк американск америк америк америк аметист амин амн ан анализ анализ анализирова анализир анализ анализ анало анамн ананас ананас анастас анатом анатом ангажемент ангар ангел ангел ангелин ангел ангел ангелочк ангел ангел ангельск ангельск ангельск англ английск английск английск английск английск английск англича англичан англичан англичанин англ андр андре андреевич андреевн андреевн андреевн андреевн андреевн андреев андре андреич андреич андреич андреич андр андр андре андрюш андрюшанчик андрюш андрюш андрюш андрюшк андрюшк андрюшк андрюш ан ан анекдот анекдот анечк ан анис анис анис аниськ анис анис ан ан ан ан ан антверп антип антип антипк антипк антип антон антон антракт антракт анфилад анфис анфис анчоус ан ан апат апатич апатическ апатическ апатическ апат апат апелляц апельсиннича апельсин аплодир аплодиру аполлон апоплексическ апоплекс аппетит аппетитн аппетит апраксин апрел апрел апрельск апрел аптек аптекар аптек аптек аптек аптечн апухтинск ар аравийск арапк арапк арбуз арбуз аргумент ар арендатор аренд аренд арен арен ар арест арестант арестант арестант арестантск арестантск арестант арестова арестовыва арест арест арест ар аринушк аристократическ аристократическ аристократическ аристократическ аристократическ аристократическ арифметик арифметик арифметик ариш ар аркад аркадин аркадин аркад аркад арк арк армейск арм арм арм арм армяк армяк армяк армячишк аромат аромат ароматн ароматн ароматн аромат аромат артел артельн артельщик артельщик артельщик артельщик артем артем артист артист артистическ артистическ артистк артистк артистк артист артист артур арф арф арф арф архангел архангельск археолог арх архив архимедов архитектор архитектор архитектор архитектур аршин аршин аршин асессор асессорш аскет ассигнац ассигнац ассоциац астм астрахан астраханск астр астров астров астров астроном а атак атлас атласн атласн атласн атласн атмосфер атмосфер атом атом атрибут аттестат аттестат а аудитор аукцион аукцион аул аус афанас афанас афанас афанас афанасьевич афанасьевич афанасьевич афиш африк африк африк афросиньюшк афросиньюшк аффектац ах аха ахан аха ахиллес ахиллесовск ахиллес ахилл ахматов ахнул ахнул ахнул ахнут ацтек аэропла аэропла б ба баб баб бабенк бабк баб бабок бабочк бабочк бабочк баб бабушек бабушк бабушк бабушк бабушкин бабушкин бабушкин бабушк бабушк баб баб баб баб баб баб багаж багров багров багров багров багров багрян багрян бад бает базар базар базар базилик баиньк ба байр байрон байрон бакалеев бакалеев бакен бакенбард бакенбард бакенбард бакенбард бакенбард бал бал балага балага балалайк балалайк бал бал бал балерин балет балет балкан балка балк балкон балкон балкон балкон балкон балкон баллад баллотирова балова балован балова баловен баловник баловн баловств балоч бал бал бал бальзак бальтазар банальн бан бан банк банк банк банк банкирск банкирш банк банковск банк баночк бараба барабан барабан барабан барак бара бара баран баранин баранин баранин баранин баран баран барашк барашк барж барин барин барин барин барин баркан барк барк бармгерциг барон барон барон барон барон барон барск барск барск барск барск барск барск барск барск барств бартол бархат бархат бархат бархатн бархатн бархатн бархатн бархат бархатц барчонк барчонк барчонок барщин барщин бар барын барынин барын барын барыш барыш барышен барышн барышн барышн барышнин барышн барышн барышн барыш бас баск баск басма басн баснословн бас баст басурман батальон батальон батар батарейн батарейн батар батаре батистов батюшк батюшк батюшк батюшк батюшков батюшк бах бахром бахром бахром бахр бахром бахус бац баш башк башлык башмак башмак башмачк башмачонк башн башн баюка бден бдительн беат беатрич бег бег бега бега бега бега бега бега бега бега бега бег бега бега бега бег бег бег бегл бегл бегл бегл бег беготн беготн беготн беготн бегств бег бегун бегут бегущ бегущ бегущ бед бед бед бед бед бедн бедн бедн бедн бедн бедн беднел бедненьк бедненьк бедн бедн бедн бедн бедн бедн бедност бедност бедност бедн бедн бедн бедн бедн бедн бедн бедняжк бедняжк бедняжк бедняк бедняк бедова бед бедр бедр бедр бедствен бедств бедств бедств бедств бед бед бежа бежа бежа бежа бежа беж беж беж без безбожн безбожник безбожник безбожник безбожн безбожн безбольн безбородкин безбрежн безвер безвестн безвестн безвкусиц безвкусн безвозвратн безвольн безвред безвыездн безвыходн безвыходн безграмотн безграмотн безграничн безграничн безграничн безграничн безграничн безграничн безгрешн бездарн бездарн бездарн бездарн бездейств бездейств бездейств безделиц безделиц безделк бездн бездн бездн бездн бездн бездн бездомн бездомн бездомн бездон бездон бездон бездон бездон бездон бездорож бездорож бездушн бездыхан бездыха бездыхан безжалостн безжалостн беззабот беззаботн беззаботн беззаботн беззаботн беззаботн беззаботн беззаботн беззаветн беззаветн беззакатн беззакатн беззащитн беззащитн беззащитн беззвездн беззвездн безземельн беззлоб беззуб беззуб безличн безличн безличн безлюдн безлюдн безлюд безмерн безмерн безмозгл безмолв безмолвн безмолвн безмолвн безмолвн безмолвн безмолвн безмолвн безмолвств безмужн безмятежн безмятежн безмятежн безмятежн безнадежн безнадежн безнадежн безнадежн безнадежн безнадежн безнадежн безнадежн безнаказа безначальн безначальн безнос безнравствен безнравствен безнравствен безнравствен безнравствен без безобидн безобидн безоблачн безоблачн безоблачн безобраз безобраз безобраз безобраз безобразн безобразн безобразн безобразн безобразн безобразник безобразнича безобразнича безобразн безобразн безобразн безобразн безобразн безобразн безобразн безобразн безобразн безобразн безобразн безобразн безобраз безопасн безответн безответн безответн безотрадн безотрадн безотрадн безотчетн безотчетн безотчетн безотчетн безошибочн безошибочн безполезн безрадостн безрадостн безразличн безрассудн безрассудн безрассудн безропотн безукоризнен безукоризнен безукоризнен безум безум безумн безумн безумн безумн безумн безумн безумн безумн безумн безумн безумн безумн безумн безумствова безумств безумц безумц безум безупречн безусловн безутешн безучаст безучастн безучастн безызвестн безымен безысходн безысходн безысходн безысходн безысходн бе бе бейт бекет бекеш бел бел бел бел белеет белел беленьк беленьк беленьк беленьк беленьк белес беле белизн белизн белизн белильн белинск бел беллетрист беллетрист беллетрист беллетрист белобрыс белобрыс беловат беловодов бел бел бел белокур белокуреньк белокуреньк белокур белокур белокур белокур бел бел белоручк белоснежн белоснежн бел бел бел бел бел бел бел бельведерск бельг бел бел бельмес бел бензол берг берг бердичев берег берег берег берег берег берег берегл берегл берегл берег берег берег беред бережет бережет бережеш бережн березк березк березняк березняк березов березов берез бер беремен берет берет берет береч береч береш бер бер берлин берлог берлог бер бер берут берут бер бес бесед бесед бесед беседк беседк беседк беседк беседова беседова беседова бесед бесед бесед бесед бесенок бес бес бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бесконечн бескорыстн бескорыстн бескрайн бескровн бескровн бесноват беспамятн беспамятн беспамятств беспамятств беспереч беспечальн беспечн беспечн беспечн беспечн беспечн беспечн беспечн бесплодн бесплодн бесплодн бесплодн бесповоротн бесподобн беспоко беспоко беспоко беспоко беспоко беспоко беспоко беспоко беспоко беспоко беспоко беспоко беспоко беспок беспокойн беспокойн беспокойн беспокойн беспокойн беспокойн беспокойн беспокойн беспокойн беспокойн беспокойств беспокойств беспокойств беспокойств беспок беспокойт беспокойт беспок беспок беспоко бесполезн бесполезн бесполезн бесполезн бесполезн бесполезн бесполезн бесполезн бесполезн беспол беспомощ беспомощн беспомощн беспомощн беспомощн беспомощн беспорядк беспорядк беспорядк беспорядк беспорядок беспорядочн беспорядочн беспорядочн беспорядочн беспошлин беспощадн беспощадн беспредельн беспредельн беспредельн беспредельн беспредельн беспредметн беспрекословн беспремен беспрерывн беспрерывн беспрерывн беспрерывн беспрерывн беспрерывн беспрерывн беспрерывн беспрерывн беспреста беспримерн беспристрастн бесприютн беспутн бессвязн бессвязн бессердеч бессил бессил бессил бессил бессильн бессильн бессильн бессильн бессильн бессильн бессильн бессил бесследн бессловесн бессловн бессмен бессмертн бессмертн бессмертн бессмерт бессмыслен бессмыслен бессмыслен бессмыслен бессмыслен бессмыслен бессмыслен бессмыслен бессмыслиц бессмыслиц бессмыслиц бессмыслиц бесснежн бессовестн бессознательн бессознательн бессознательн бессознательн бессознательн бессониц бессон бессонниц бессон бессон бессон бессон бесспорн бесспорн бесспорн бесстраст бесстрастн бесстрастн бесстрастн бесстрашн бесстрашн бесстыдник бесстыдник бесстыдн бесстыдн бесстыдн бессчетн бестактн бест бестолков бестолков бестолков бестужев бесхарактерн бесцвет бесцветн бесцветн бесцветн бесцветн бесцельн бесцельн бесцельн бесцельн бесцен бесцен бесцеремон бесчеловеч бесчеловечн бесчест бесчест бесчестн бесчестн бесчестн бесчестн бесчест бесчест бесчинств бесчислен бесчислен бесчислен бесчувств бесчувствен бесчувствен бесчувствен бесчувствен бесчувствен бесчувствен бесчувств бесшумн бетхов бешен бешен бешен бешен бешен бешенств бешенств бешенств бешенств бешен бешен библейск библиотек библиотек библиотекар библиотек библ библ бива бива бивш бивш биен биен биен биен бил бил бил билет билет билет билет билетик билетик билетик билет билет билет билет бил биллиард биллиард биллиард бил бил бил бил бильярд бильярд бильярдн бильярдн бильярдн бинокл бинокл биограф бирж бирж бис бисер бисер бисквит бисквит битв битв битв битв битк бит бит бит бит бит бит бифштекс бифштекс бицепс бич бич бичурин биш бла благ благ благ благ благ благовещен благовидн благовол благовон благовон благовоспита благовоспита благоговеет благоговейн благоговейн благоговейн благоговел благоговен благоговен благоговет благогов благодар благодар благодар благодар благодар благодар благодар благодарн благодарн благодарн благодарн благодарн благодарн благодарн благодарн благодар благодар благодат благода благодетел благодетел благодетел благодетел благодетельн благодетельн благодетельствова благодетельствова благодетельств благодетел благодеян благодеян благодеян благодушн благолеп благонадежн благонамерен благонамерен благонравн благонравн благополучн благоприобретен благоприобретен благоприятн благоприятн благоприятн благоприятствова благоразум благоразум благоразумн благоразумн благоразумн благоразумн благород благород благородн благородн благородн благородн благородн благородн благородн благородн благородн благородн благородн благородн благородн благородн благородн благородств благородств благородств благородств благосклон благосклон благосклон благословен благословен благословен благословен благословен благословен благослов благослов благослов благослов благослов благослов благослов благословл благословля благословля благословля благословля благословл благослов благослов благосостоян благосостоян благост благост благотворител благотворительн благотворительн благотворн благотворн благоугодн благоусмотрен благоуха благоуха благоуха благоухан благоуха благоуха благочестив блажен блажен блажен блажен блажен блажен блажен блаженств блаженств блаженств блаженствова блаженствова блаженствова блаженств блаженств блаженств блажн блаж блаж бланбек бланбек блед бледн бледн бледневш бледн бледнеет бледнел бледненьк бледнет бледнеют бледне бледн бледн бледн бледн бледн бледност бледночернильн бледн бледн бледн бледн бледн бледн бледн блекл блеск блеск блеск блеск блеснет блесн блеснувш блеснул блеснул блеснул блеснул блеснут блестел блестел блест блестк блест блестя блестя блестя блестя блестя блестя блестя блестя блестя блещет блещеш блещут блещущ блещущ блещущ блеющ ближайш ближайш ближайш ближайш ближайш ближ ближн ближн ближн близ близ близк близк близк близк близк близк близк близк близк близк близк близк близлежа близок близорук близост близ блин блин блин блин блиста блиста блиста блиста блиста блистан блистательн блистательн блистательн блиста блиста блиста блиста бло блок блонд блонд блондин блондинк блондинк блондин блонд блох блудниц блужда блужда блужда блужда блужда блужд блуз блуз блю блюд блюд блюд блюд блюдечк блюдечк блюдечк блюд блюд блюдц блюст блюстител бо боб бобик бобик бобик боб бобров боб бог бог богат богат богат богатеет богатеют богат богат богат богат богат богатств богатств богат богат богат богат богат богатыр богатыр богатыр богат богач богач богач богач богданович богданыч бог богем богем богемск бог богин бог богомол богомольн богомольн богомольн богородиц богородиц богохульник бог бодр бодр бодр бодр бодр бодр бодр бодрост бодрост бодрост бодрствова бодрствова бодрств бодр бодр бодр бодяг боев бо боец бож божеств божествен божествен божествен божествен божеств бож бож бож бож бож бож бож бож бож бож бож бож бо бо бо бо бойк бойк бойк бойк бойк бойк бойк бойн бо бойт бок бок бока бока бокал бокал бокал бок бок бок бокл бок боков боков боков боков бок бок болва болван болван бол бол болеет болезн болезн болезнен болезнен болезнен болезнен болезнен болезнен болезнен болезнен болезнен болезнен болезнен болезнен болезн болезн болезн болезн болезн болезн болел болел болел болел бол болет болеют бол бол болос болот болот болотист болотн болотн болотн болот болот болта болта болта болта болта болта болта болта болта болта болта болта болт болтл болтлив болтлив болтлив болтовн болтовн болтовн болтовн болтун болтунишк болтушк бол больн больн больн больн больниц больниц больниц больниц больниц больн больн больн больн больн больн больн больн больн больн больн больн больн больш больш больш большеголов больш больш больш больш больш больш больш большинств большинств большинств больш больш больш больш больш больш больш больш бол бол бомб бонжур бон бор борел борет борис борис борисович борис бормота бормота бормота бормотан бормотан бормочет бормочеш бор бород бород бородк бород бород бород борозд борол борол борол борол борот борт борт борт борт борц борьб борьб борьб борьб борьб борьб борют бор бос босеньк босик боскет бос босоног бостон бос бос ботаник ботвин ботвин ботинк ботинк ботинк ботиночк бочк бочк бочк бочк бочонк бочонок бо бо бо боя боя боязлив боязлив боязлив боязн боязн боязн боя боя боя бо бо боя брав брав брак брак брак брак брак брак брал брал брал брал брал бранд бран бран бран бран бран бран бран бран бран бран бранч бран бран бран бран бран браслет браслет браслет браслет брат брат брат братец брат братишк брат братск братств брат братц братц братц братц братц брат брат брат брат брат брат брачн брачн бревн брег бред бред бред бред бред бред бред бред бред бред бред бред бреет брежж бреж брезга брезга брезглив брезгова брезж брел брелок брелок бремен бремен брем брен брен бренча бренч бретер бригад бригад бригад бриллиант бриллиант бриллиантов брил брит брит бритв бритв брит брит брит бров бров бров бров бров бров брод брод брод брод брод брод брод бродяг бродяг бродяг бродяжнича бродяжничеств брод бродяч бронз бронзов броса броса броса броса броса броса броса броса броса броса броса броса броса броса броса броса брос брос брос брос брос брос брос брос брос брос брос брос брос брос брос брос брос брос бро бросьт брос брос брош брош брошен брошен брошен брошен брош брош брошк брошк брош брош брош брошюр брошюр брошюр бррр брудершафт брусничн брут брызг брызга брызг брызжут брызнув брызнул брызнул брызнул брызнут брюжж брюзг брюзглив брюзглив брюзглив брюзглив брюзжа брюзж брюк брюкв брюк брюнет брюнет брюсов брюх брюх брюшк бряка брякн брякнул брякнул брякнут бряца буб бубенчик бубенчик бубенчук бубнов бубн бугорк бугорк буд буд будемт будет будет будеш буд буд буд буд буд будиру буд буд будн будн будн будничн будничн будничн будничн будничн будничн будн будн будок будораж будочник будочник будт буд будуар будуар будут будуч будущ будущ будущ будущ будущ будущ будущ будущ будущ будущ будущн будущн будущ буд будьт буд буд буерак бузин буйн буйн буйн буйн буйн буйн буйн буйн буйн буйствен буйств буйств букашк букв буквальн буквальн букв букв букв букв букет букет букет букет букет бук букинист булавк булавк булав булк булк булк булок булочек булочк булочник булочн булт бул бульвар бульвар бульвар бульвар бульдог бульон бумаг бумаг бумаг бумаг бумаг бумаг бумаг бумаг бумаг бумажечк бумажк бумажк бумажк бумажк бумажк бумажник бумажник бумажн бумажн бумажн бумб бунтова бунт бунчук бура бурд бур бур буржуазн бур буркал бурмейстер бурн бурн бурн бурн бурнус бурнусик бурнусик бурн бурн бурн бур бур бурсак бурш бурш бур бур бурья бурьян бурьян бур бур бур бур бутончик бутошник бутылк бутылк бутылк бутылк бутылк бутылк бутылок бутылочк бутылочк бутылочн буфер буфет буфет буфет буфет буфетчик буфетчик буфет буффон бух бухт бушева буш буш буя буян буян бы быв быва быва быва быва быва быва быва быва бывал бывал бывал бывал бывал быва быва быва бывш бывш бывш бывш бывш бывш бывш бывш бывш бывш бывш бывш бывш был был был был былинк был был был был был был был быстр быстр быстр быстрин быстр быстр быстр быстрот быстрот быстрот быстрот быстр быстр быстр быт быт быт быт бытийствен быт бытност быт быт быт быт бьем бьет бьет бьеш бью бьют бьют бьющ бьющ бюджет бюджет бюргер бюргер бюргер бюргерск бюргерск бюр бюст бюст в вавиловк вагнер вагон вагон вагон вагон вагон вагон важн важн важн важн важнича важн важн важн важн важн важн важност важност важност важност важн важн важн важн важн важн ваз ваз вакс вакханк вал валанда валентин валерианов валер валет вал вал вал вал вал вальдшнеп вальс вальс вальс вальсишк вальтер валя валя валя валя валя валя валя вам вам вампир вампирствен ван ван ванечк ванечкин ванечк ван ван ванил ванил ван ваньк ван ванюш ванюш ванюш ванюш ван варвар варвар варвар вар варен вареник вареник варен варен варен варенц варен варен варен варен вар вариац вар вар варин вар вар варшав варшав варшав варшавск варшав варьяц варьяц вар вар варяг вар вас васил василис василис васил васильев васильевич васильевн васильевн васильевн васильевск васильевск васильевск васильевск васильев васил васильич васильич васьк васьк васьк ватаг ватерло ватн ват ваточн ваточн ватрушк ватрушк ват ват вафл вахрушин вахрушин ваш ваш ваш ваш ваш ваш ваш ваш ваш ваш ваш ваш ваш вбега вбежа вбеж вбива вбил вблиз вбок ввел ввел вверга вверга вверен ввер ввернет вверн ввернул ввернут вверх вверх вверя ввест ввид ввод ввод ввож ввоз ввы ввяза вглуб вглядевш вглядел вгляд вглядыва вглядыва вглядыва вглядыва вглядыв вдава вдавлен вдал вдалек вдал вдал вдво вдво вдвойн вдева вдесятер вдобавок вдов вдов вдовеет вдовец вдов вдовиц вдовиц вдов вдов вдов вдогонк вдол вдохновен вдохновен вдохновен вдохновен вдохновен вдохновен вдохновен вдохновен вдохновен вдохновен вдохновен вдохнул вдохнут вдребезг вдруг вдругоряд вдума вдума вдумчив вдумчив вдумыва вдумыва вдумыв ве веда веда веда веда веда веда веда веда вед веден веден веден ведет ведеш вед вед ведом ведом ведом ведом ведомств ведр ведр ведр ведр вед ведут ведущ ведущ ведущ ведущ вед ведьм ведьм ведьм вед веер веет вежд вежл вежлив вежлив вежлив вежлив вежлив вежлив везд везет везеш везл везт вез везув везущ век век век век век век век век вековечн вековечн веков веков веков вексел векселек вексел вексел век вел вел велел велел велел велен вел велет вел велик велик велика великан великан велика велик велик велик велик велик велик велик великодуш великодуш великодушн великодушн великодушн великодушн великодушн великодушн великодушн великодушн великодушн велик велик великолеп великолепн великолепн великолепн великолепн великолепн великолепн великолепн великолепн великолепн великолепн великолепн великолепн великолепн велик велик великонек велик вел вел величав величав величав величав величайш величайш величайш величайш величайш величайш величайш величайш велича велича величествен величествен величеств велич велич величин величин величин величин велич вел велосипед вел вельмож вельмож вельмож вел вел венгерск вен венерическ венец венецейск венецианск венецианск венец веник веник веничек веничк венк венк венк венк венк венок венц венц венц венча венча венча венча венчальн венча венча вен вер вер верблюд верб вер веревк веревк веревк веревк веревк веревочк веревочн вер верениц верениц верениц верет вер вер вер вер вер вер вер вер верн верн верн верн верн вернет вернет вернеш вернеш верн верн верн верн верн верн верн верн верност верност верност верн вернув вернувш вернул вернул вернул верн вернут вернут вернут верн верн верн верн верн верн верова верова верова верован верова вер вероломн вероломств вероломств вер вероятн вероятн вероятн вероятн вероятн верс верст верст верст верстов верст верст вертел вертел вертеп вертер вертет верт верт верт верт верт вертляв вертун верт верт верт вер вер верует вер веруеш вер вер вер вер вер вер веру верф верх верхлев верхлев верхлев верхлевск верхлевск верхн верхн верхн верхн верхн верхн верхн верхн верхн верхов верхов верховск верх верх верхушк верхушк верша вершин вершин вершинин вершинин вершинин вершинин вершинин вершинин вершин вершк вершок вер вер верьт вер вер вер вес вес весел весел весел весел весел веселе весел весел весел весел весел весел весел весел весел весел весел весел весел весел весел весел весельчак весел весен весен весен весен вес весл весл весл весн весн весн весн весн весталк вест вестник вест вест вест вест вес вес ве весьм ветв ветв ветв ветер ветерк ветерк ветерок ветк ветк ветк ветк ветк ветк ветл веток ветр ветр ветрен ветрен ветрен ветров ветр ветр ветх ветх ветх ветх ветх ветх ветчин ветчин вечер вечер вечер вечер вечер вечереет вечерел вечерк вечерн вечерн вечерн вечерн вечерн вечерн вечерн вечерн вечер вечерок вечер вечер вечн вечн вечн вечн вечн вечн вечност вечност вечн вечн вечн вечн вечн вечн веша веша веша веша веша веш веш вешн вешн вешн вешн вещ вещ вещан вещ вещ веществ веществен вещ вещ вещ вещиц вещиц вещ вещ вея вея веян веян взад взаимн взаимн взаимн взаимн взаимн взаимност взаимност взаимност взаимн взаимн взаимн взаимн взайм взам взаперт взаправд взапуск взбега взбежа взбежа взбес взбес взбес взбес взбешен взбира взбира взбир взбит взбит взборозд взбунтова взбунт взведет взведет взвел взвел взвес взвес взвешива взвешива взвешив взвива взвизг взвизгнул взвизгнул взвизг взвизг взвил взвил взвод взводим взвод взволнова взволнова взволнова взволнова взволнова взволнова взволнова взволнова взволнова взволнова взволнова взволнова взволнова взволн взвыл взгляд взгляд взгляд взгляд взгляд взгляд взгляд взгляд взглядыва взглядыва взглядыва взглядыва взглядыван взглядыва взглядыв взглянет взглянеш взглян взглян взглян взглянув взглянул взглянул взглянул взглянут вздернет вздернут вздор вздор вздорн вздорн вздорн вздорожа вздор вздор вздох вздох вздох вздохнет вздохн вздохн вздохнув вздохнул вздохнул вздохнул вздохнут вздох вздрагива вздрагива вздрагива вздрагива вздрагива вздрагиван вздрагива вздрагива вздрагива вздрагив вздремнет вздремнул вздремнул вздрогнет вздрогнув вздрогнул вздрогнул вздрогнул вздрогнут вздул вздул вздума вздума вздума вздума вздума вздума вздума вздыма вздыма вздыха вздыха вздыха вздыхател вздыха вздыха вздыха вздых взир взламыва взлет взлета взлетел взлет взлеч взлом взлохмач взмахнет взмахн взмахнув взмахнул взмах взметут взнос взобра взобьет взовьет взойдет взойд взойд взойт взор взор взор взор взор взор взор взошел взошл взошл взросл взросл взросл взросл взрыв взрыв взрыв взрыв взрыв взъерошен взыва взыва взыва взыва взыв взыгра взыскан взыскан взыскан взыскан взыскан взыскан взыскательн взыскательн взыскательн взыска взыскива взыскива взыщ взяв взявш взял взял взял взял взял взял взял взял взятк взяток взяточник взяточник взяточник взят взят взят взят взят вив вид вид вида вида вида вида вида вида вида вида вида вид вид видевш видевш видел видел видел видел видел видел видемш вид вид виден виден виден виден виден виден виден виден виден виден виден видет видет вид видим видим видим видим видим видим видим вид видим видим видим вид вид вид вид вид видн видн виднел виднел виднел видн видн видн видн вид видоизменен видоизменя вид вид вид видыва видыва вид вид вид видя виж виж визав визг визглив визглив визг визжа визжал визжат визжа визж виз визит визит виз визитик визитн визитн визитн визитн визит визит визит вил виланд вил вилк вилк вилк вилк вилл вил вильн виля вил вин вин вин вин вин вин винительн вин вин вин вин вин виноват виноват виноват виноват виноват виноват виноват виноват виноват винов виновник виновник виновниц виновн виновн виноград виноград виноградн виноград вин винт винт винт винтов винтообразн винтообразн винт вин винц винч вин вин вин виртуоз виртуоз висевш висел висел висел висет вис вис виск виск виск виск висл висл висл виснет висок висяч вися вися вися витиеват витиеват вит витрин вифан вифлеем вихор вихраст вихр вихрев вихр вихр вихр вихр вихр виц вицмундир вицмундир вицмундир виш вишен вишер вишнев вишнев вишнев вишнев вишн вишн вишн виш ви вкладыва включ вколачива вколот вконец вкопа вкопа вко вкрадчив вкрадыва вкрал вкрал вкрив вкруг вкус вкус вкус вкус вкус вкусн вкусн вкусн вкусн вкус вкус вкуша вл влага влага влаг влаг владевш владеет владел владел владелец владельц владетельн владет влад владе владимир владимир владимирк владимирк владимирович владимир владычеств владычествова владычествова владычиц владычиц влажн влажн влажн влажн властвова властвова властв власт властелин власт власт властительн властн властн власт влачат влев влез влеза влез влезет влезеш влезл влезт влезут влекл влекл влеком влекущ влекущ влетел влетел влетел влет влечен влечен влечен влечен влечет влечеш влива влива влива влил влия влия влиян влиян влиян влиян влиян вложен влож влож влож влопа влюб влюб влюб влюб влюбл влюбл влюблен влюблен влюблен влюблен влюблен влюблен влюблен влюблен влюблен влюблен влюбл влюбл влюбля влюбля влюбля влюбля влюбл вмест вмест вмеша вмеша вмеща вна внакидк внакладочк вначал вне внезапн внезапн внезапн внезапн внезапн внезапн внезапн внезапн внезапн внезапн внезапн внезапн внезапн внезапн внемл внемлют внес внесен внес внесл внесл внест внес вне внешн внешн внешн внешн внешн внешн внешн вниз вниз вника вника вника вник вникн вникнул вникнут внима внима вниман вниман вниман вниман вниман вниман вниман внимател внимательн внимательн внимательн внимательн внима внима вним внов внов внос внос внос внос вно внук внук внутрен внутрен внутрен внутрен внутрен внутрен внутрен внутрен внутрен внутрен внутрен внутрен внутрен внутр внутр внучат внучк внуша внуша внуша внуша внушен внушен внушен внушен внуш внуш внуш внушительн внушительн внушительн внушительн внуш внят внятн внятн внятн во вобл вовек вовлеч воврем вовс вогна вод вод водворен водвор водвор водвор водворя водвор вод водевил вод вод вод вод вод вод вод вод водиц водк водк водк водк водн водовоз водоворот водоем вод водопад водопад водочк водочк вод вод вод водян водянист водянист водян водян водян водян вод воедин воен воен воен воен воен воен воен воен воен воет вожак вожд вожд вожж воз воз возалка воз возблагодар возблагодар возбуд возбуд возбуд возбудим возбуд возбуд возбужда возбужда возбужда возбужда возбужда возбужда возбужд возбужден возбужден возбужден возбужден возбужден возбужден возвед возведен возвел возвел возвест возвещ возврат возврат возврат возврат возврат возврат возврат возврат возврат возврат возврат возвратн возвратн возврат возвраща возвраща возвраща возвраща возвраща возвраща возвраща возвраща возвраща возвраща возвраща возвраща возвраща возвра возвращен возвращен возвращен возвращен возвращ возвыс возвыс возвыс возвыша возвыша возвыша возвыша возвыша возвыш возвыш возвышен возвышен возглаголет возглагол возглас возглас воздвигл воздвигнут воздел возделыва воздержан воздержн воздусех воздух воздух воздух воздух воздух воздушн воздушн воздушн воздушн воздыхан возжаждеш воззва воззван воззрен воззрен воззрен воз воз воз воз воз воз воз возл возлелея возлож возлюб возлюб возлюб возлюблен возлюблен возлюблен возлюблен возлюбля возмезд возмезд возможн возможн возможн возможн возможн возможн возможн возмут возмут возмутительн возмутител возмут возмут возмуща возмуща возмущ возмущ возмущ вознаград вознагражда вознагражд вознагражден вознагражден вознамер возненавидел возненавидел возненавид возненавид возненавиж вознесенск вознес вознес возн возник возника возника возника возника возника возникл возникл возникнет возникновен возникш вознос вознос возн возн возн возобнов возобнов возобновля возобновля воз воз возоп возража возража возражен возражен возраз возраз возраз возраст возраст возраста возраста возраста возраста возраста возраст возраст возраст возраст возрод возрожда возрожден возрожден возросл возросл воз возымел возымет возьм возьмет возьмет возьмет возьмет возьмеш возьм возьм возьм возьмут возьмут воз воин воин воин воин воистин во войд войдет войдеш войд войд войд войдут войд войлок войлок войлок войн войн войн войн войницк войницк войницк войницк войницк войницк войн войн войск войск войск войт вокза вокзал вокзал вокруг вол волг волг вол вол вол вол вол волк волк волк волн волн волн волн волнен волнен волнен волнен волнен волнен волнен волнен волнен волнова волнова волнова волнова волнова волнова волнова волнова волн волн волн волну волн волнует волнует волн волнуеш волнуеш волн волн волн волн волн волн волн волн волну волн вол волокит волокит волок вол волос волос волос волос волосат волос волосенк волосик волоск волоск волост волос волоч волчк волчок волч волшебн волшебник волшебниц волшебниц волшебниц волшебн волшебн волшебн волшебств вол вольн вольн вольнодумн вольнодумц вольн вольн вольн вольн вольн вольн вольн вольн вольтер вол вол вон вон вонза вонза вонза вонз вонз вонз вон вон вон вонюч вонюч вообража вообража вообража вообража вообража вообража вообража вообража воображ воображен воображен воображен воображен воображен воображен воображен вообраз вообраз вообраз вообраз вообраз вообраз вообраз вообраз вообраз вообраз вообраз вообщ вооружа вооружа вооруж вооружен вооружен вооружен вооруж вооруж воп воп вопиющ вопиющ вопл вопл вопл вопл воплот воплот воплоща воплощен воплощен вопл вопл вопл вопл вопрек вопрос вопрос вопрос вопрос вопрос вопрос вопросик вопросительн вопросительн вопросительн вопросительн вопрос вопрос вопрос вопрос вопроша вопроша вопроша вопроша вопьет вор вор ворва ворва ворва ворвет вороб воробушек воробушк вор воровал ворова воровк воровк воровск ворож вор ворон ворон воронк ворон ворон ворон ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот ворот воротник воротник воротник воротничк воротничк воротничок ворот ворот ворох вороча вороча вороча вороча вороча вороча вороча вороча вороча вороча вороча вороча вороч вороч вороч вор ворует вор ворч ворча ворча ворча ворчан ворчан ворча ворч ворч ворчлив ворч вор восемнадцат восемнадца восем восемьдес восемьсот воск воскликнул воскликнул восклица восклица восклицан восклицан восклицан восклицан восклицан восклицан восклицан восклиц восклон воскрес воскреса воскресен воскресен воскресен воскресен воскресен воскресен воскресен воскрес воскрес воскрес воскресл воскресл воскреснет воскреснув воскресн воскресн воскресн воскресш воскрес воскреша воскреша вослед воспален воспален воспален воспален воспален воспален воспален воспита воспита воспита воспита воспитан воспитан воспитан воспитан воспита воспита воспита воспита воспита воспита воспитыва воспитыва воспитыва воспитыв воспламен воспламен воспламеня воспользова воспользова воспользова воспользова воспольз воспоминан воспоминан воспоминан воспоминан воспоминан воспоминан воспоминан воспоминан воспоминан воспоминан воспослед воспрещ восприимч восприимчив восприимчив восприимчив восприимчив восприня воспринят воспроизвод воспротив воспрянут восскорбел восста восстава восста восста восста восстанет восстанов восстанов восстанов восстанов восстановл восстановля восстановл восстанут восстан востер восток восток восторг восторг восторг восторг восторга восторг восторг восторг восторжен восторжен восторжен восторжен восторжен восторжен восторжен восторжен восторжен восторжен восторжен восточн восточн востреньк востреньк востр востроног востр востр востр восхвал восхитительн восхища восхища восхищен восхищен восхищен восход восход восход восходн восхожден восьмерк восьм восьмидесят восьмидесят восьмилетн восьм восьм восьм восьм восьм вот воткнул воткнут вотчин воцар воцар воцаря вочеловеч вошедш вошедш вошедш вошедш вошедш вошедш вошедш вошел вошл вошл вошл вош воюет воющ во вояж вояж вояж вояжир впавш впада впада впада впада впад впадеш впал впал впал впаст вперв вперв вперегонк вперед вперед впер впер впечатлен впечатлен впечатлен впечатлен впечатлен впечатлительн впечатлительн впива впив впил впил вплел вплест вплета вплот вповалк вполглаз вполголос вполза вполза вполн вполовин вполуоборот впопад впопых впор впоследств впотьм вправд вправ вправ впред впридач впрок впроч впряга впряж впрям впуска впуска впуска впуст впуст впятер вр враг враг враг враг врагин враг враг враг вражд враждебн враждебн враждебн враждебн враждебн враждебн враждебн враждебн вражд вражд вражеск вражеск враж враж враж враздроб вразум вразумихин врал вран вран врасплох врат врач врач врач врачебн врач враща враща враща вревск вред вред вред вред вредн вредн вредн вредн вредн вредн вред вреза врезыва врем врем врем времен времен времен времен времен времен времен времен времен времен времечк врем врет врет вреш ври врод врожден врожден врожден вроз вросл вру врубел врубел врубел врут вручен вруч вруч врыва врыва врыва врыв врыт вряд всаж все всеблаг всевдоним всевозможн всевозможн всевышн всевышн всегд всегдашн всегдашн всегдашн всегдашн всегдашн всег вседержител вседневн всеевропейск всезнан всезнан всезна всезна все вселен вселен вселенск всел всел вселитературн всел вселя вселя всем всем всемирн всемирн всемирн всем всенаполня всенародн всенижайш всен всенощн всеобщ всеобщ всеобщ всеобщ всеобъемлем всеоруж всепоглоща всесветн всесмета всесторон всеуслышан всех всецел все вскакива вскакива вскакива вскакива вскакив вскач вскинув вскинул вскинул вскинул вскинут вскинут вскип вскипевш вскипел вскипел всклокочен всклочен вско всколос всколыхнул вскольз вскор вскоч вскоч вскоч вскоч вскоч вскоч вскоч вскрикива вскрикива вскрикиван вскрикива вскрикив вскрикнет вскрикн вскрикнул вскрикнул вскрикнул вскрикнут вскрикнут вскрича вскрича вскруж вскрыва вскрыва вскрыл вскрыт всласт вслед вследств вслух вслушива вслушива вслушива вслушива вслушив всматрива всматрива всматрива всматрив всплескив всплеснув всплеснул всплеснул всплеснул всплыва всплывут всплыл вспоен всполохнут всполош всполош вспомина вспомина вспомина вспомина вспомина вспомина вспомина вспомина вспомина вспомина вспомина вспомина вспомин вспомн вспомн вспомн вспомн вспомн вспомн вспомн вспомн вспомн вспомн вспомн вспомн вспомн вспомн вспомн вспоможен вспоможен вспоможен вспорхнул вспотевш вспрыгива вспрыгнул вспрянул вспугнул вспугнут вспухл вспухш вспыл вспыл вспыльчив вспыльчив вспыльчив вспыл вспыхива вспыхива вспыхива вспыхива вспыхнет вспыхнув вспыхнул вспыхнул вспыхнул вспыхнул вспыхнут вспышк вспышк вспышк вспят встав встава встава встава встава встава встава встав встав встав вставля вставш вставш вставш встает встает встаеш встал встал встал встанемт встанет встанеш встан встанут встан встаньт встар встат вста встают встревож встревожен встревожен встревожен встревожен встревожен встревожен встревожен встревож встревож встревож встревож встревож встрепа встрепенет встрепенул встрепенул встрепенул встрепенут встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встрет встреч встреч встреча встреча встреча встреча встреча встреча встреча встреча встреча встреча встреча встреча встреча встреча встреча встреч встреча встреча встреча встреча встреч встреч встреч встречен встреч встречн встречн встречн встречн встречн встречн встречн встреч встряхива встряхн встряхнул встряхнул встряхнут встряхнут вступа вступа вступа вступа вступ вступ вступ вступ вступ вступ вступ вступ вступ вступ вступлен вступлен всунув всунул всунут всхлипнут всхлипыва всхлипыва всхлипыва всхлипыва всхлипыв всход всход всход всю всюд вся всяк всяк всяк всяк всяк всяк всяк всяк всяк всяк всяк всяк всяк всяк всяк всяческ втайн втащ втащ втесн втира втихомолк втолкова втопта втор вторга вторичн вторичн вторник втор втор втор втор второп второстепен втор втор втор втор втро втун втыка втягива втягиван втягива втягива втянет втянет втянув втянул вуа вуал вулка вулканическ вулканическ вход вход вход вход вход вход вход вход вход вход вход вход вход вход вход входя входя входя входя входя вхож вцеп вцеп вцеп вчер вчерашн вчерашн вчерашн вчерашн вчерашн вчерашн вчерашн вчерашн вчерашн вчерашн вчерашн вчерн вчетвер вчуж вше вши въел въел въеха въеха въеха вы выбега выбега выбежа выбежа выбежа выбежа выбеж выбеж выберет выбер выберут выбива выбив выб выбира выбира выбира выбира выбира выбира выбир выб выбор выборгск выборгск выбор выбор выбра выбра выбра выбра выбра выбра выбра выбрасыва выбрасыва выбра выбра выбр выбр выбрит выбрит выбрит выброс выброс выбр выбросьт выброшен выведа выведа выведен выведет выведеш вывед вывед выведут вывезен вывезен вывезт вывел вывел вывел вывел вывернул вывернул вывернул вывернут вывернут выверт вывеск вывеск вывеск вывест выветрива выветр вывеш вывод вывод вывод вывод вывод вывод вывод вывод вывод вывож выворот выворот выворот выгада выгада выглад выгляд выглядыва выглядыва выглядыва выглядыва выглядыва выглядыва выглядыв выглянет выглянув выглянул выглянул выгна выгна выгна выгна выгнут выговарива выговарива выговарива выговарива выговарива выговарива выговарива выговарив выговор выговор выговор выговор выговор выговор выговор выгод выгод выгод выгодн выгодн выгодн выгодн выгодн выгодн выгод выгод выгон выгон выгоня выгор выгребл выдава выдава выдава выдава выдава выдав выдав выдавлива выдад выда выда выда выда выда выда выда выда выдаст выдаст выда выда выда выда выдвинул выдела выдела выделк выделыва выделыв выдергива выдержа выдержа выдержа выдержа выдержа выдержива выдержива выдержива выдержива выдержив выдерж выдерж выдерж выдерж выдернул выдернул выдох выдра выдрессирова выдума выдума выдума выдума выдума выдума выдума выдума выдума выдума выдума выдумк выдумк выдумк выдумок выдумыва выдумыва выдумыва выдумыва выдумыва выд выдьт выед выед выезд выездн выездн выезжа выезжа выезжа выезжа выезжа выезжа выезжа выеха выеха выеха выеха выжда выжда выжеч выжж выжиг выжида выжид выж выж выжима выжмет вызва вызва вызва вызва вызва вызва вызва вызва выздоравлива выздоравлива выздоравлив выздоровевш выздоровееш выздоровел выздоровет выздоровлен выздоровлен выздоровлен выздоровл выз вызов вызовет вызыва вызыва вызыва вызыва вызыва вызыва вызыва вызыва вызыв выигра выигра выигра выигра вы выйд выйдемт выйдет выйдеш выйд выйд выйдут выйд выйт выказа выказа выказыва выказыва выказыва выказыва выказыва выкат выкат выкладк выкладыва выковыва выколачива выколот выколоч выкопа выкопа выкрас выкрикива выкрикива выкрич выкупа выкуп выкуп выкуплен выкуплен выкупл выкурен выкурива выкур выл выл вылазк вылака вылез вылеза вылез вылеплен вылерианов вылета вылетел вылетел вылетел вылет вылеч вылеч вылеч выл вылит выл вылож вылож вылож вылож вылож вылуп выл вымаза выманива выман вымел вымерл вымета вымет вымолв вымолв вымоч вымоют вымпел вым вым вым вым вым вымысл вымысл вымыт вымыт вымыт вымыт вым вым вынес вынес вынесен вынесет вынес вынесл вынесл вынест вынес вынет вынима вынима вынима выним вынос вынос вынос вынос вынос вынос выносим вынос вынос вынос вынош вынув вынуд вынужден вынул вынул вынул вынул вынут вынут вынут выпада выпада выпада выпада выпаден выпадет выпа выпа выпа выпа выпачка выпачка выпачка выпачка выпева выпева вып вып выпива выпива выпива выпивательн выпивк вып вып вып вып выпилива выписа выписа выписа выписк выписыва выписыва выписыва вып выпит выпит выпит выпит выпит выпит вып выплака выплака выплака выплакива выплат выплат выплач выплеснет выпл выполн выправ выпрашива выпровод выпрос выпрос выпрос выпрос выпрош выпрыгнул выпрыгнут выпрям выпрям выпрямля выпрямл выпрямл выпукл выпукл выпукл выпукл выпукл выпуска выпуска выпуск выпуск выпуст выпуст выпуст выпуст выпуст выпуст выпуст выпута выпутыва выпуч выпуч выпуч выпущ выпущен выпущ выпущ выпыта выпыта выпытыва выпытыва выпытыв вып выпьет выпьеш вып выпьют выработа выработа выработа выработан выработа выработа выработа выработыва выработыва выработыва выража выража выража выража выража выража выража выража выража выража выража выража выража выраж выражен выражен выражен выражен выражен выражен выраж выражен выражен выраженьиц выраз выраз выраз выраз выраз выраз выраз выразительн выразительн выразительн выразительн выраз выраз выраз выраста выраста вырастет выраст вырва вырва вырва вырва вырва вырва вырва вырва вырва вырва вырва вырва вырва вырва вырвет вырв выреза выреза выреза вырожден вырожден вырожден вырон вырон вырос выросл выросл выросл выросш выруб выруга выруга выруча выруч выручен выруч выруч выруч выруч выручк вырыва вырыва вырыва вырыва вырыва вырыва вырыва вырыв высасыва высасыва высвобод высвобод высвобод высвобожда высел выселк выселк высечет высидел выскаблива выскажет выскажет выскаж выскаж высказа высказа высказа высказа высказа высказа высказа высказыва высказыва высказыва высказыва высказыва высказыва высказыва высказыва высказыва высказыв выскольза выскользнул выскоч выскоч выскоч выскоч выскоч выскочк выскочк выскребыва высла высла выслеж выслуж выслуж выслуша выслуша выслуша выслуша выслуша выслуша выслуша выслушива выслушива высматрива высматрива высматрив высморка высморка высмотрет высовыва высок высок высокеньк высокеньк высок высок высок высок высок высок высок высокоблагород высок высок высок высок высокомер высокомер высокомерн высокомерн высокомерн высокомерн высокомерн высок высокопарн высокород высокоуважа высок высот высот высот высох высохл высохл высохл высохш высохш высочайш высочайш выспа высп выспл выспрос выстав выстав выстав выстав выстав выстав выставл выставл выставл выставля выставля выставля выстоя выстрада выстрада выстрел выстрел выстрел выстрел выстрел выстрел выстрижен выстро выстро выступа выступа выступа выступ выступ выступ выступ выступ выступ выступ выступлен высунув высунул высунут высуш высуш высчита высш высш высш высш высш высш высш высыла высыпа высыпа высыпа вы вытаращ вытаращ вытаращ вытаращ вытаращ вытаскива вытаскива вытаскива вытаскив вытащ вытащ вытащ выта вытащ вытащ вытащ вытер вытерл вытерпел вытерпел вытерпел вытерпет вытерп вытерплива вытерпл вытира вытира вытир вытолка выторгова выточ выточ выточен выточ выт вытягива вытягив вытягушкин вытянет вытянув вытянул вытянул выуд выучен выуч выучива выуч выуч выуч выуч выуч выучк выуч выхват выхват выхват выхлопота выхлопота выхлопочет выход выход выход выход выход выход выход выход выход выход выход выход выход выход выход выход выход выходк выходк выходк выходк выходн выход выход выход выход выход выходя выходя выходя выхож выцвел выцветш выцветш вычеркнут вычерт вычет вычет вычин вычисля вычист вычита вычитан вычита вычита вычитыв вычищен вычищен вычищен вычищ вышвырнул выш вышедш вышедш вышедш вышедш вышел вышесказа вышеупомянут вышиб вышива вышиван вышив вышин вышит вышл вышл вышл вышл вышл выщипа выяснен выясн выясн выясня вьет вьюг вьюг вьюг вьюг вьюг вьюг вьюн вьют вьющ вьющ вяжет вяжут вяз вязанк вяза вяза вяземск вязк вязников вязнут вяз вял вял вял вял вял вял вял вял вянут вячеслав г га гава гаван гаванск гаврилович гаврилович гаврилович гаврилыч гаврилыч гаврилыч гаг гада гада гада гадан гада гада гад гаденьк гадк гадк гадк гадк гадк гадк гад гадок гадостн гадост гад га гаев гаев гаев гаж газел газет газет газет газет газет газетк газетн газетн газетн газет газет газет газ газ гайдамак галате галдеж галдел галер галере галер галер галере галере галиле галицийск галк галл галлер галл галлюсинац галлюцинац галстук галстук галстук галстучек галчат гам гамбринус гамлет гамлет гамлетовск гам гам ганц гарантирова гар гармон гармон гармоник гармоник гармоник гармоник гармонирова гармонирова гармонирова гармоническ гармон гарнитур гарнитур гарпагон гарус гар гас гас гасл гасл гаснет гасн гаснул гаснут гаснут гаснущ гастрон гастрономическ гастрономическ гат гатчинск гвалт гвозд гвозд гвоздик гвоздичк гвоздочк гвозд гвозд гг где ге гедрюкт ге гелиотроп гельд геморр генера генера генералишк генералишк генерал генерал генерал генерал генеральск генеральск гениальн гениальн гениальн гениальн гениальн ген ген ген генриет гену географическ географ географ геологическ геометр геометр георг геран геран геран герасимович герасимович герасимович герасимыч герасимыч гербов гербов гербов гердер геркула геркулесов герман герман германск геро геро героин гер геройск геройств гер геро герр гертруд герц герц гершел гет гиацинт гибел гибел гибельн гибельн гибельн гибел гибк гибл гибнет гибнеш гибн гибнут гибнут гибок гигантск гигант гимн гимн гимназ гимназист гимназист гимназическ гимназ гимнастик гимнастик гимнастик гимн гиппиус гирлянд гир гитар гитар гитарн гитар гитар глав глав глав главн главн главн главн главн главн главн главн главн главн главн главн главн главн главн главн глав глав глав глагол глад гладиатор глад глад глад глад гладк гладк гладк гладковыбрит гладк гладк глад гладьт глажен глаз глаз глаз глаз глаз глазел глазенк глазеночк глазеющ глазе глазищ глазк глазк глазок глаз глаз глас глас гласн гласност глас глас глашата глин глин глин глин глинян глинян глинян глисс глобус гложет гложут глота глотк глотк глотк глотнул глоток глохнут глубж глуб глубин глубин глубин глубин глубин глубок глубок глубок глубок глубок глубок глубок глубок глубок глубок глубок глубок глубок глубокомыслен глубокомыслен глубок глубочайш глубочайш глубочайш глубочайш глуб глум глуп глуп глуп глуп глуп глуп глуп глуп глуп глупеньк глупеньк глуп глуповат глуповат глуп глуп глуп глуп глуп глупопоспешн глупост глупост глупост глупост глупост глупост глупост глуп глуп глупц глуп глуп глуп глуп глуп глуп глух глух глухар глух глух глух глух глух глух глух глух глух глух глух глухонем глухот глух глуш глуш глуш глуш глыб глыб глядевш глядел глядел глядел глядет гляд гляд гляд гляд гляд гляд гляд гляд гляд гляд глядя гляж гляж глянет глянец глянув глянул глянул глянцевит гм гнал гнал гнат гнев гнев гнева гнева гнева гневн гневн гневност гневн гневн гнев гнев гнед гнезд гнезд гнезд гнезд гнет гнет гнетет гнетет гнет гнеш гниен гниет гниеш гнил гнил гнил гнил гнил гнил гнил гнил гнусав гнусн гнусн гнусн гнусн гнусност гнусн гнусн гнусн гнусн гнусн го говет говор говорен говор говор говор говор говор говор говор говор говор говор говор говорил говор говор говор говор говор говор говор говор говор говор говорящ говоря говоря говоря говоря говоря говоря говядин говядин говядин говядин гоголевск гогол год год год год год год годик годин годин год годичн год годн годн год годов годовщин годов год годочек год год год гол голгоф гол голик голландск голландск голландц гол гол голов голов голов голов головк головк головк головл головн головн головн головн головн головн головн голов головокружен голов голов головушк голов голод голод голод голодн голодн голодн голодн голодн голодн голодн голодн голодн голод гол голос голос голос голос голос голос голосист голоск голос голосок голос голос голос голуб голубин голубин голубк голубоват голубоват голуб голуб голуб голубоок голуб голубушк голуб голубчик голуб голуб голуб голуб голуб голубятн голубятн гол гол гол гол гол гол гол гол гольштейн гом гомер гомерид гомоз гомоз гонг гондол гондол гонен гонен гон гон гоним гоним гоним гоним гон гон гонорар гончар гоня гоня гоня гоня гон гоня гоп гор гор горазд горазд гор гор гор горбат горб горд горд горд горделив горд горд горд горд горд горд горд горд горд горд гордост гордост гордост гордост горд горд горд горд горд гордын горд горд горд гордячк гор горева горева горевш горевш горел горел горел горелк горел гор горемык горен горен горенк горен горестн горестн горестн горест горест горет гореч гореч гореч горж гор горизонт горизонт горизонтальн горизонт горизонт горизонт горист гор горк горл горлан горлан горл горл горлов горл горл горлышк горн горнист горнист горниц горниц горничн горничн горничн горн горн горн горн город город город город город город городишк городишк городишк городк городнич городнич городов городов городов городов городок город городск городск городск городск городск городск город город гор гороскоп горох горох горохов горохов горохов горох горох горошек горошин горошк горсточк горст гортан горта гор горчиц горш горшк горшк горшк горшочк гор горьк горьк горьк горьк горьк горьк горьк горьк горьк горьк гор горю горюнов горюнов горюнов горюч горюч горюч гор гор горяч горяч горяч горяч горяч горяч горячешн горяч горяч горяч горяч горяч горяч горяч горяч горяч горяч горяч горяч горяч горячк горячк горячк горячк горячн горяч горяч горяч горя горя горя горя горя госпитал госпитальн госпитальн господ господ господ господ господ господин господин господин господин господин господн господн господн господ господск господск господск господск господствова господствова господств господств господств господ господ госпож госпож госпож госпож госпожинок госпож госпож гост гост гостеприимн гостеприимн гост гост гост гостин гостинец гостиниц гостиниц гостиниц гостиниц гостин гостин гостин гостинц гостинц гостин гостин гост гост гост гост гост гост гост гост гост государствен государствен государствен государствен государствен государын государ государ гот гот готов готов готов готов готов готов готов готов готов готов готов готов готов готов готов готовл готовн готов готов готов готов готов готов готов готов готов готов готов готов готов готов готовя гофкригсрат гр грабеж грабеж грабеж грабеж грабеж грабеж граб граб грабител граб гравюр гравюр град граденапл градир градус градусник градус градус град гражданин гражданин гражданск гражданск гражданск гражданск гражданск гражданск грамматик грамматик грамот грамот грамотн грамотн грамотн грамот гранат грандиозн грандиозн грандиозн гран гран гранитн границ границ границ границ границ границ гранпасьянс граф графин графин графин графин графинин графин графинчик граф графск графск графск графск граф граф грац грациозн грациозн грациозн грациозн грациозн грац гребенк гребенк гребен греет грез грез грез грез грез грез грез грез грез грез грез грез грез гремел гремел гремел грем гренадер гренадер грет грех грех грех грех грех грех греховн греховн грех гречнев гречонк греш греш греш греш грешник грешниц грешниц грешн грешн грешн грешн грешн гриб гриб грив грив гривенник гривенник гривенник гривн гривн гривн гривн григор григорьевич григорьевич григорьевич гримас гримас гриф гриш гриш гроб гроб гроб гроб гроб гробниц гробниц гроб гробов гробов гробов гробов гроб гроб гроз гроз гроз гроз гроз гроз гроз гроз гроз гроз гроз грозн грозн грозн грозн грозн грозн грозн грозн грозн грозн грозн грозн грозов грозов грозов грозов гроз гроз гроз гроз гроз грозя гром гром громад громад громадн громадн громадн громадн громадн громадн громадн громадн громадн громадн гром гром гром громк громк громк громк громк громк громк громов громов громов громов громозд громоздк громоздк гром громч громыхан гром грот грохнул грохнул грохольск грохот грохот грош грош грош грошик грошов грошов гру груб груб груб груб грубееш груб груб грубия грубиян груб грубоват грубоват груб груб груб груб грубост грубост грубост грубост груб груб груб груб груб груб груд груд груд груд грудн грудн грудн грудн груд груд груд груд груд груз груз грузд грунт групп групп групп груст груст груст груст груст грустн грустн грустн грустн грустн грустн грустн грустн грустн грустн грустн грустн грустн груст груст груст груст груш грушев груш грущ грызет грызл грызл гряд гряд грядущ грядущ грядущ грядущ грядущ грядущ грядущ грядущ гряд гряз грязн грязн грязн грязн грязн грязн грязнотц грязн грязн грязн грязн грязн грязн грязн грязн гряз гряз грянет грянул грянул грянул грянул губ губ губ губ губ губернатор губернатор губернатор губернатор губернаторств губернатор губерн губерн губерн губерн губерн губернск губернск губернск губернск губернск губ губ губ губительн губительн губительн губ губ губк губк губк губ губок губ губ губ гувернантк гувернантк гувернантк гувернантк гудел гудел гудк гудут гуд гул гул гулк гул гульб гуля гуляк гуля гуля гуля гулян гулян гуля гуля гуля гуля гул гумаг гумаг гуманистическ гуманитет гума гума гума гума гуман гума гума гуммиластик гуня гурьб гусак гусак гусар гусар гусарск гусарск гус гус гусин густ густ густ густ густот густ густ густ густ гусын гу гущ гюг д да дав дава дава дава дава дава дава дава дава дав дав давеч давешн давешн давешн давешн давешн давешн давешн давешн дав дав дав дав дав дав давк давк давлен давн давненьк давнишн давнишн давнишн давнишн давнишн давнишн давн давн давн дав давя дагеста дад дад дадут дает дает дает даж да дайт дал дал дал дал дал далек далек далек далек далек далек далек далек далек далек далек далек далеч дал дал дал дал дал дал дальн дальн дальн дальн дальн дальн дальн дальн дальн дальн дальн дальн дальн дальн дальн дальн дальн дальш дал дам дам дам дам дам дам дам дам дамск дамск дам дам дам дан дан дан дан дан дан дан дант дант дантов дан дар дар дарвин дарен дар дар дармоедк дармоед дар дарован дарован даровщинк дар дар дар дар дар дар даст дат дач дач дач дач дач дач дач дачник дачник дачник дачн дачн дач дашеньк дашеньк дашеньк даш да дают два двадцат двадцатилетн двадцатирублев двадцатисемилетн двадцат двадца двадцат дважд две двенадцат двенадцат двенадцат двенадцат двенадцат двенадца двенадцат двер двер дверец двер дверн двер дверьм двер двер двер двер двест двига двига двига двига двига двига двига двига двига двигател двигател двига двига двига двиг двиг движен движен движен движен движен движен движен движен движен движен движен движет движет движеш движим движим движ движут движущ движущ движущ движущ двинет двинул двинул двинул двинул двинут двинут двинут дво двоевер дво дво дво двойк двойник двойн двойн двойн двойн двор двор двор двор двор дворец дворик дворн дворн дворн дворник дворник дворник дворник дворник дворников дворник дворник дворницк дворницк дворн дворн дворняжк дворняжк двор дворов двор двор дворц дворц дворцов дворцов дворц дворя дворян дворянин дворянин дворянк дворянк дворянск дворянск дворянск дворянск дворянск дворянск дворянск двугривен двугривен двугривен двугривен двукратн двуличн двум двум двусмыслен двусмыслен двусмыслен двусмыслен двух двухкопеечник двухмачтов двухминутн двухнедельн двухчасов двухэтажн де дебаркадер дебатирова дебош дебютирова дев дев дева дева дев дева дева девер девиз девиц девиц девиц девиц девиц девиц девиц девиц девическ девическ девическ девическ девическ девич девич девич девич девич девк девк девк девк девк дев девок девочек девочк девочк девочк девочк девочк девочк девочк девствен девствен девствен девствен девушек девушк девушк девушк девушк девушк девчонк девчонк девчонк девчоночк девчурочк дев девян девят девятнадцат девятнадца девят девят девят девят девя девятьсот дегот дегт дед дед дед дед дед дед дедушк дедушк дедушк дед дезертирова действен действ действ действ действительн действительн действительн действительн действительн действительн действительн действительн действительн действительн действительн действ действ действйтельн действова действова действова действ действ действ действу декабр декабр декабрьск декабр декадент декадент декадентск декадентск дека декламир декламирует декламиру деклариру декорац декорац декорац дел дел дела дела дела дела дела дела дела дела дела дела дела дела дела делал дел дел дел дела дел дела дела дела дела дел дел дел делен делец деликат деликатн деликатн деликатн деликатн деликатн деликатн деликатн дел дел дел дел делишк делишк дел делов деловит деловит делов делов делов делов делов делов дел дел дел делыва дельн дельн дельн дельц дельц дельц дементьев демисезон демк демон демон демонизм демон демонолог демон демонск демонстрац денег денежн денежн денежн денежн денек ден ден денниц ден ден денщик денщик денщик ден деньг деньг деньг деньг деньжонк деньк деньск деньск департамент департамент департамент департамент депутат депутат депутат депутат дер дерга дерга дерга дерга дерга дерг дер дерев дерев деревенел деревенск деревенск деревенск деревенск деревенск деревенск деревенск деревенск деревен деревеньк деревн деревн деревн деревн деревн деревн дерев дерев дерев деревушк деревц дерев дерев дерев дерев дерев деревя деревя деревя деревя деревя деревя деревя деревя деревя деревя дер дерет держ державн держа держа держа держа держа держ держат держа держа держ держ держ держ держ держ держ держ дерзк дерзк дерзк дерзк дерзк дерзк дерзк дерзк дерзок дерзост дерзост дерзостн дерзостн дерзост дерзост дер дериган дернет дернул дернул дернул дернул дернут дерут десерт деска десниц деспот деспотизм деспотическ деспотическ десят десятилетн десятилетн десятин десятин десятин десятирублев десятирублев десятк десятк десятк десят десят десят десят десят деся дета дет дет детк детк деток деточек деточк деточк детск детск детск детск детск детск детск детск детск детск детск детск детск детск детств детств детств дет детьм дет дет дет дешевл дешев дешев дешев деятел деятел деятельн деятельн деятельн деятельн деятельн деятельн деятельн деятельн джентльм джентльм джентльменничан джентльменск джентльм ди диалектик диапазон дива дива диван диван диван диван диван диванчик дива дивизион див див див див див дивл дивн дивн дивн дивн дивн дивн дивн див див див дикар дик дик дик дик дик диковин дик дик дик дик дикост дикост дик диктова диктова диктовк дикт дик дилетантизм дилижанс динам диорам дипл дипломат дипломатическ дипломат директор директор директрис дирижер диск диссертац диссонанс дит дит дифтер дич дич дич дич диэт длан длил длил длил длил длин длин длин длин длинноволос длин длин длин длин длин длин длин длин длин длин длин длин длин длин длит для длят дмитр дмитр дмитр дмитр дна дне дневн дневник дневник дневн дневн дневн дневн дневн дневн дне днем дни днищ дно дном дню дня дням дням днях до добав добав добежа добер добива добива добива добива добива добива доб доб добира добира добира добир доб доблест доблестн доблест добр добр добра добра добр добр добр добр добр добрел добреньк добреющ добр добровольн добровольн добр добродетел добродетел добродетельн добродетельн добродетельн добродетельн добродетельн добродетел добродетел добродуш добродушн добродушн добродушн добродушн добродушн добр добр добролюб добр добр добросовестн добросовестн доброт доброт доброт доброт добр добр добр добр добр добр добр добр добрын добрынин добр добряк добудет добыва добыва добыва добыва добыван добыван добыва добыва добыва добыва добыв доб добыт доб добыч добьет доб добьют доварива довар доведен доведен довед доведет довед довед довез довез довез довел довел довел доверен доверен доверен довер довер доверител довер довер доверч доверчив доверчив доверчив доверчив доверчив доверчив доверчив доверчив довершен доверш доверш доверьт доверя доверя доверя доверя довер доверя довер довест довод довод довод довод довод довод довол довольн довольн довольн довольн довольн довольн довольств довольств довольствова довольств довоспита довра довр догада догада догада догада догада догада догадк догадк догадк догадк догадок догадыва догадыва догадыва догадыв догляд догмат догна догна догна договарива договарива договарива договарива договарив договор договор договор договор договор договор догон догоня догоня догоня догора догора догора догора догора догоревш догорел догор додела додума додума додумыва доеда доеда доедеш доед доел доел доеха доеха дож дожда дожда дожда дожда дождев дождев дождев дожд дожд дожд дождет дождеш дожд дождик дождичек дождлив дождлив дожд дожд дожд дожд дож дож дожива дожива дожива дожива дожив дожида дожида дожида дожида дожида дожид дож дож дож дозва дозволен дозвол дозвольт дозвол дозволя дозвон доиска доискива доискива до доищ дойд дойдет дойдеш дойд дойдут дойд доймут дойт док докажет докажет докаж доказа доказа доказа доказа доказа доказа доказательств доказательств доказательств доказательств доказа доказыва доказыва доказыва доказыва доказыва доказыва доказыва док доканчива доканчива доклад доклад доклад докладн доклад доклад докладыва докладыва докладыва доклика докол докона докона доконч доконч доконч доктор доктор доктор докторск доктор доктрин документ документ документ документ докучлив докучлив докучн докучн дол долг долг долг долг долг долг долг долг долг долг долг долгов долгов долговремен долговремен долговяз долг долгогрив долг долгожда долг долголетн долг долг дол дол долета долета долетел долж должн должник должн должност должност должност должн должн должок дол долин долин долин долин долин долин долог долож долож долож долож дол дол дол дольн дольш дол дол дом дом дом дом дом домашн домашн домашн домашн домашн домашн домашн домашн домашн домашн домашн домашн дом домел домик домик домик дом домовит домов домов домов домов дом дом домосед домосед домохозяйк дом домча домча домч дом дон донельз донес донес донесен донесен донесет донес донесл донесл донест донес донжуа дониз дон доннерветтер дон донос донос донос донос донос донос донос донос донос донос донош донск донц донын допечет допив доп дописа дописыва дописыва допит допл доподлин дополнен дополня дополня допотопн допрашива допрашива допрашива допрашива допрашива допрашива допрашива допрашива допрос допрос допрос допросик допрос допрос допрос допрос допрос допросчик допрос допрош допуска допуска допуска допуска допуска допуска допуска допуска допуска допуст допуст допуст допуст допуст допуст допущ допытыва допья допят дорисовыва дорн дорн дорн дорн дорог дорог дорог дорог дорог дорог дорог дорог дорог дорог дорог дор дороговизн дорог дорог дорог дорог дорог дорог дородн дорож дорожат дорож дорож дорож дорож дорож дорожк дорожк дорожк дорожк дорожк дорожн дорожн дорожн дорожн дорожн дорожн дорожн дорож досад досад досадлив досадлив досадн досадн досадова досад досад досад досаду досад досажда досажда досел доселев доск досказа досказа досказа досказа доск доскачет доск доск доскональн доск доск дослед дослуша дослуша дослуша дослушива дослушива досмеива досок доспех достава достава достава достава достава достава достава достава достав достав достав достав доставля доставля доставля доста доста доста доста доста доста доста доста доста доста доста доста достанет достанет достанеш достан достанут достанут достан достатк достаточн достаточн достаточн достаточн доста доста доста доста достиг достига достига достига достига достига достигл достигл достигнет достигнеш достигн достигнув достигнут достигнут достижен достич достодолжн достоевск достоин достоинств достоинств достоинств достоинств достоинств достоинств достоинств достойн достойн достойн достойн достойн достойн достойн достойн достойн достоян достро достро достроч доступ доступн доступн доступн доступн доступн достуча досуг досчита дотащ дотащ дотл дотол дотолка дотрогив дотронет дотронул дотягива дотянет дотянеш дотянул дотянут доуч дохленьк дохнет дохнул дохнул дохнут дохнут доход доход доход доход доход доход доход доход доход доходц доход доход доход доцент дочел дочер дочер дочер дочер дочест дочист дочита дочита дочита дочитыва дочитыва дочитыва дочк дочк дочк дочк дочл дочл доч дошедш дошедш дошел дош дошл дошл дошл дощечк дощечк дощечк дощечк драгоцен драгоцен драгоцен драгоцен драгоцен драгоцен драгоцен драгоцен драгоцен драгунск драдедамов драдедамов драдедамов дразн дразн дразн дразн дразн дразн дразн дразн дразн дразн драк драк драк дракон драк драл драл драл драл драм драм дран дран драпирова драпировк драпир драпир драпр драт драт дребеден дребезжа дребезжа древ древ древесн древк древн древн древн древн древн древн древн древн древност дрема дрема дремлет дремлет дремлеш дремл дремлют дремлющ дремлющ дремлющ дремлющ дремн дремот дремот дремотн дремот дремот дремот дрем дремуч дремуч дроб дроб дроб дробн дров дров дров дрогнет дрогнувш дрогнувш дрогнул дрогнул дрогнул дрогнул дрож дрожа дрожа дрожа дрожа дрожа дрожа дрожа дрожа дрожа дрожа дрожан дрожат дрожа дрожа дрожа дрожа дрожа дрожа дрожа дрожек дрож дрож дрож дрож дрожк дрож дрож дрож друг друг друг друг друг друг друг друг друг друг друг друг друг друг друг друг дружб дружб дружб дружб дружб дружб дружелюбн дружелюбн дружеск дружеск дружеск дружеск дружеск дружеск дружеск дружеск дружеск дружеств дружествен дружествен друж друж друж дружищ дружк дружк дружн дружн дружн дружн дружн дружочек друз друз друз друз дрыхнет дрыхнеш дрыхн дрыхнут дрыхнут дрябл дрябл дрябл дрябл дрязг дрян дрян дрян дрян дрян дрян дрян дрян дрян дрян дрян дрянц дрян дрян дряхл дряхл дряхл ду дуб дуб дубняк дубняк дубов дуб дуб дуг дуг дуг дугообразн дует дует дуз дуклид дум дум дума дума дума дума дума дума дума дума дума дума дум дума дум дума дума дума дум дум дум дун дун дунечк дунечк дунечк дунечкин дунечкинов дунечкин дунечкин дунечкин дунечк дунечк дун дунул дунул дун дун дуняш дуняш дуняш дуняш дуняш дупел дуплет дуплет дур дур дурак дурак дурак дурак дурак дурак дурак дурацк дурацк дурацк дурач дурач дурач дурачк дурачк дурачок дур дур дурма дурма дурман дурн дурн дурн дурн дурн дурн дурн дурнушк дурн дурн дурочк дуррак дур дурынд дур ду дут дух дух дух дух дух дух дух духовник духовн духовн духовн дух духот духот духот духот дух дучяш душ душ душ душат душ душевн душевн душевн душевн душевн душевн душевн душевн душевн душегубец душегубств душегубц душ душеньк душеньк душечк душ душ душист душист душист душист душ душ душ душкин душкин душкин душн душн душн душн душн душн душ душонк душ душ дуэл дуэл дуют дщер дым дым дым дым дым дым дымк дымн дымн дымн дымн дымн дым дым дым дымя дыр дыр дыр дыр дырк дырочк дыр дыр дыряв дыряв дыряв дыряв дыхан дыхан дыхан дыш дыша дыша дыша дыша дышат дыша дыш дыш дыш дыш дышл дыш дьявол дьявол дьявол дьявол дьявол дьявольск дьявольск дьякон дьячк дьячок дюж дюжин дюжин дюжин дюжин дюссот дяд дяд дядек дядечк дяд дядин дядьк дядьшк дядьшк дяд дяд е евангел евангел евангел евг евген евген евнух евр еврейск еврейск евре европ европейск европ европ евстигн евстигне евш египет египетск египетск египетск египетск египт египт ег егоровн егор ед едв ед ед едемт едет едет едеш единичн единичн единоверц единовремен единогласн единогласн един един един единородн единородн единств единствен единствен единствен единствен единствен единствен единствен единствен единствен единствен един един ед едк едк едк едк ед едут едуч едущ ед е ежегодн ежедневн ежедневн ежедневн ежедневн ежедневн ежедневн ежедневн ежедневн ежедневн ежел ежемесячн ежеминутн ежеминутн ежеминутн еженедельн еженочн ежечасн езд езд езд езд езд езд езд езд ездиют езд езд езд езжа езжа езж е екатерингоф екатерингоф екатеринин екатерининск ектен ел ел елагин ел ел ел елен елен ел елец ел елисаветград елисейск елк елк елов елов ел ельц ем емел ем енотов епиход епиходов епиходов епоход еран еран еран ермак ермак ермола ермолк ермолк ерош ерунд ерусла ерш ерш ершик есл есм ест естествен естествен естествен естествен естествен естествен естествен естественнонаучн естествен естествен естествен естествен естествен естествен естествен ест е ет еф ефимьюшк ефтов ефт ефт еха еха еха еха еха еха еш ешьт ещ е ж жаб жаворонк жаворонк жадн жадн жадност жадност жадност жадн жадн жадн жадн жадн жажд жажда жажда жаждет жажд жажд жаждущ жаждущ жаждущ жаждущ жажд жакетк жал жал жал жал жалел жалел жалет жал жалеют жале жал жал жал жалк жалк жалк жалк жалк жалк жалк жалк жалк жалк жалк жалк жал жалоб жалоб жалобн жалобн жалобн жалобн жалоб жалова жалова жалова жалован жалован жалован жалова жалок жалост жалост жалост жал жалу жалует жал жал жал жал жан жандарм жандарм жанлис жар жар жаргон жар жарен жарен жар жар жарищ жарк жарк жарк жарк жарк жарк жарк жарк жарк жарк жарок жар жар жарч жги жгла жгли жгло жгут жгуч жгуч жгуч жгуч жгуч жгуч жгуч жгуч жгуч ждал ждал ждал ждал ждат ждем ждет ждет ждеш жди ждит жду ждут ждущ ждущ жды же жева жеван жева жег жезл жела жела жела жела жела жела жела жела жела жела желал желан желан желан желан желан желан желан жела жела жела желан желан желательн жела жела жела жел желез железн железн железнодорожн железн железн железн железн железн железн железн железн желез желез желоб желоб желт желтеньк желтеньк желтеют желтизн желтизн желтоват желтоват желтоват желтоват желт желт желт желт желт желт желт желт желт желт желт желудк желудок желудочк желч желч желчн желчн желчн желчн желч жеманств жемчуг жемчуг жемчуг жемчуг жемчуг жемчуг жемчужин жемчужин жемчужн жемчужн жен жен жен жен женат женат жен жен жен жен жен жен жен женитьб женитьб женитьб женитьб жен жен жених жених жених жених жениховск жених жених женишк женишок жен жен жен женск женск женск женск женск женск женск женск женск женск женск женствен жен женщин женщин женщин женщин женщин женщин женщин женщин женщин женщин жен жен жен жен жерд жерд жернов жертв жертв жертв жертв жертвова жертв жертвочк жертв жертв жертв жертв жест жест жест жест жест жестк жестк жестк жестк жестк жестк жестк жесткост жесткост жестк жесток жесток жесток жесток жесток жесток жесток жесток жесток жесток жест жест жест жестян жестян жетон жеч жженк жжет жив жив жив жив жив жив живет живет живет живеш жив жив жив живност жив жив жив жив жив живописец живопис живописн живописн живописн живописц живоп живопис живост живост живост животн животн животн животн животн животн животрепещущ животрепещущ жив живут живуч живуч живущ живущ живущ живущ живущ живущ жив живш живш жив жив жив жив жив жив жив жид жиденьк жиденьк жиденьк жидк жид жид жизн жизнен жизнен жизнен жизнен жизнен жизнен жизнен жизнен жизн жизн жизн жизн жил жил жил жил жилет жилет жилет жилетк жилетк жилетк жилетн жилет жилец жил жилиц жилиц жилищ жилищ жилк жилк жил жил жил жильц жильц жильц жильц жильц жильц жильц жил жир жирн жирн жирн жирн жирн жирн жирн жирн жирок жир жир житейск житейск житейск житейск житейск жител жител жительств жительств жит жит жит жмет жмет жмеш жмут жнив жнитв жнут жорж жорж жреб жреб жрец жреш жуа жуан жувеневск жужжа жужжан жужжат жужжа жужж жужж жук жук жуковск журавл журавлин журавл жур журна журна журнал журналист журнал журнал журнальн журнальц журчан журчан журч жутк жуткост жучк жующ з за заалел заалел заарестова забав забавля забавля забавля забавник забавник забавн забав забалканск забастовк забвен забвен забвен забвен забега забега забега забега забега забега забега забег забег забедневш забежа забежа забеж забелеет забелел заберет забер заберут заб заб забира забира забира забира забир заб забит забит забит заб забияк заблаговремен заблагорассуд заблагорассуд заблагорассуд заблест заблиста заблиста заблуд заблуд заблужден заблужден заболева заболева заболеет заболел заболел забол заболта заболта забор забор забор забор забор забор забор забот забот забот забот забот забот забот забот забот забот забот забот забот забот заботлив заботлив заботлив заботлив заботлив заботлив заботлив заботлив заботлив заботлив заботлив заботлив заботлив забот забот забот забот забот забоч забра забра забра забра забрасыва забра забрежж забрезж забрезж забрел заброд заброс заброс заброшен заброшен забубен забуд забуд забудет забудет забудеш забуд забудут забуд забудьт забулдыг забуян заб забыва забыва забыва забыва забыва забыва забыва забыва забыва забыва забыва забыва забыва забыв забывч забывчив заб заб заб заб заб заб заб заб заб заб заб заб забыт забыт забыт забыт забыт забыт забыт забыт забыт забыт забыт заб забыт забыт заб забыт забьет забьют завадск завал завал завал завал завал завалинк завал заваля заваля заварива завед заведен заведен заведен заведен заведен заведен заведет заведет завед завед заведова заведова завед заведут завед завезт завез завел завел завел завел завернул завернул завернут завернут завернут завернут завернут завертел завертел завертыва завертыв заверш завершител завес завес завест завес завет завет заветн заветн заветн заветн заветн завет завет завет завеща завеща завещан завещан завещан завещан завеща завеща завива завива завидел завид завидн завидова завидуеш завид завид завизжа завира зависел завис завист завистл завистлив завистлив завистлив завист завист завися завися завит завит завит завлад завлек завлека завлека завлекл завлеч завод завод завод завод завод завод завод завод завод заводск завод завоет завож завоз заволакива заволнова заволокл заволокл завоп завоп завоп заворожен заворот заворча завра завсегд завтр завтрак завтрак завтрака завтрака завтрака завтрака завтрак завтрам завтрашн завтрашн завтрашн завтр завыва завыва завыван завыван завяжет завяж завяз завяза завяза завяза завяза завяза завяза завязнет загада загада загада загадк загадк загадк загадк загадк загадок загадочн загадочн загадочн загадочн загадочн загадочн загадочн загадыва загадыван загар загвоздк заглав заглав заглав заглавн заглад заглад заглад заглохл заглуша заглуша заглуша заглушат заглушен заглушен заглуш заглуш заглуш заглушон заглядевш заглядел заглядел заглядыва заглядыва заглядыва заглядыва заглядыва заглядыв заглянет заглянет заглян заглянув заглянул заглянул заглянут загна загна загниют загнут заговарива заговарива заговарива заговарива заговарива заговарива заговарив заговел заговен заговор заговор заговор заговор заговор заговор заговор заговорщик заговорщик заговор заговор заголос загора загоражива загоражива загоражив загора загора загора загорд загоревш загоревш загоревш загоревш загорел загорел загорел загорел загорел загорел загорел загор загород загород загород загородн загорож загорожен загоряч заготов заготовл заград заград заграничн заграничн заграничн загреб загремел загремел загроможд загроможд загрубевш загрубел загрызет загрязн загрязн загрязн загуб загудел загуля задава задава задава задава задава задава задава задав задав задав задавл задав зада зада зада зада зад задар задаст задатк задатк задатк задаток зада задач задач задач задач задач зада задвига задвига задвижк задвижк задвинет задворк задворк задебрен задева задева задев задевш задел задел задел задел заденет задержа задержа задержа задержа задержива задержива задержива задержива задержк задержк задерж задернет задернул задет задет задира задирательн задира задн задн задн задн задобр задок задолжа зад задор задор задохнул задразн задрема задрема задрема задрема задремлет задремлеш задрожа задрожа задрожа задрожа задрожа задрож задул задума задума задума задума задума задума задума задума задума задума задума задума задума задума задума задума задумч задумчив задумчив задумчив задумчив задумчив задумчив задумчивосг задумчив задумчив задумчив задумчив задумчив задумчив задумчив задумчив задумчив задумчив задумыва задумыва задумыва задумыва задумыва задумыва задумыва задумыв задурман задушевн задуш задуш задуш задуш зад задым задым задыха задыха задыха задыха задыха задыха задыха задыха задыха задых заеда заеда заед заед заезж заел за заемн заемн заемн заемн заеха заеха заеха зажа зажажда зажа зажат зажат зажгл зажгл зажгут зажда зажег зажеч зажж зажжен зажжен зажива зажив заживет зажив зажига зажига зажига зажигательн зажига зажиг заж заж заж зажим зажирел зажиточн зажмет зажмеш зажмурен зажмур зажмурив зажмур зажмур зажмур зажужжа зажужжа зазва зазвенел зазвенел зазвен зазвуча зазвуча зазвучат зазвуч зазовет зазор зазубринк заигра заигра заигра заигра заигра заигра заигра заигрыва заика заика заика заик заикнет заикн заикнул заимодавец заинтересова заинтересова заинтересова заинтересова заинтересова заинтересова заинтересова заиска заискива заискиван заискива зайд зайдет зайдет зайдеш зайд зайд зайд зайд займ займ займет займет займеш займ займ займут займут зайт зайц зайц закаж заказ заказа заказа заказа заказа заказ заказыва заказыва заказыва заказыва закал закалив заканчива заканчива закапа закарабка закат закат заката закат закат закат закат закат закат закат закатн закатн закатн закатн закат закат закатыва закача закива закин закинув закинул закинул закинул закинут закинут закипел закипел закипел закипел закип заклад заклад заклад закладк закладн заклад заклад закладчик закладчик закладчик закладчик закладчик закладчиц заклад закладыва закладыва закладыва закладыва закладыва закладыва заклина заключа заключа заключа заключа заключа заключа заключа заключ заключен заключен заключен заключен заключен заключен заключ заключ заключ заключ заключ заклят заколдова заколдова заколот заколот заколот заколют закон закон закон закон закон закон закон закон закон закон закон закон закон закон закон закон закон законодател законодател закон закон законфуз законфуз законфуз законфуз законфуз закончен закончен законч законч закон закопа закоптел закопчен закоренел закороб закорузл закорузл закоснел закоулк закоченел закоченел закрадыва закрас закраснел закреп закрича закрича закрича закричат закрича закрич закрич закровян закроет закроет закр закр закр закроют закруглен закруж закруж закруж закруж закруж закрут закрут закручив закр закрыва закрыва закрыва закрыва закрыва закрыва закрыв закр закр закр закр закр закр закр закрыт закрыт закрыт закрыт закрыт закрыт закудахта закулисн закуп закупк закупк закупк закупорива закупор закураж закур закурива закурива закур закус закус закус закуск закуск закуск закуск закуск закусыва закусыва закусыва закута закута закута закута закута закута закута закута зал зал зала зал залая зал залег залегл залежа залежа залежа залез залеза залезл залезт залепета залеп залета залета залетел залетел залеч зал залив залива залива залива залива залив заливист заливн заливн зал зал зал зал зал зал залит залит залит залит залит зал залог залог залож заложен заложен залож залож залож залож залож залож залож зал залом залом заломл залп зал залуч зал зальдевш зальет зальет зальеш зал зама зама заман заман заман заманчив заманчив замара замарашк замаслен замаслен замаслен замаслен замаха замахива замахив замахнет замахнул замахнул замашк замашк замедлен замедл замедля замедля замелька замелька замен замен замен замен замен замен замен заменя заменя замер замерет замерзнет замерзнут замерл замерл замерл замертв замест замест замет заметен замет замет замет замет замет замет замет замет заметк заметлив заметн заметн заметн заметн заметн заметн заметн заметн заметн заметн заметн замет заметов заметовск заметов заметов заметок замет заметьт замет замеча замеча замеча замеча замеча замеча замеча замеча замеча замеча замечан замечан замечан замечан замечан замечател замечательн замечательн замечательн замечательн замечательн замечательн замечательн замечательн замечательн замеча замеча замеча замеч замеч замечен замечен замечта замечта замечта замечта замеша замеша замеша замеша замешательств замешательств замешка замешка замешка замига замира замира замира замиран замира замира замира замир замк замк замк замкн замкнут замкнут замкнут замкнут замнет замогильн замок замол замолк замолкл замолкл замолкл замолкнут замолкнут замолча замолча замолча замолчат замолча замолч замолч замолч замор заморск заморск заморск замоч замоч замочн замочн зам замрет замр замрут замуж замуж замужеств замужеств замужеств замужн замуча замуча замучен замуч замуч замуч замуч замуч замуч замуч замуч замшев замшев замыка замыкалов замыка замык замык замысел замысл замысл замысл замысл зам замышля замышля замя замя замямл замя занавес занавес занавеск занавеск занавеск занавеск занавесок занавес занавес занавес занд занес занес занесен занесен занест занима занима занима занима занима занима занима занима занима занима занима занима занима занима занима занима занимательн занима занима занима занима занима занима занов заноз занос заносчив заносчив заносчив заносчив заносчив заносчив зан заношен заношен заношен заня заня заня заня заня заня заня зан занят занят занят занят занят занят занят занят занят занят занят занят заня заня заодн заора заочн запада западник западн запамятова запамятова запар запас запас запас запас запасн запас запас зап запах запахл запах запах запачка запачка запачка запачка запачка запачка запашк запева запева запеван запев запека запекл запекл запекш запекш запекш запел запел запел запенд запен запер запер заперет заперет заперл заперл заперл запер заперт заперт заперт заперт заперт заперт заперт заперт заперш запечата запечата запива запива зап запина запин запинк запираем запира запира запира запира запира запира запира запира запира запира запира записа записа записа записа записа записа записа записк записк записк записк записк записк записк записн записн записн записок записочк записыва записыван записыван записыва записыва записыва записыв запихива запихив запиш запиш запиш заплака заплака заплака заплака заплака заплака заплака заплат заплат заплат заплат заплат заплат заплат заплат заплат заплат заплат заплатк заплат заплач заплачет заплачеш заплач заплачут заплева заплева заплева заплесневел заплест заплет заплута заплыва запл запл запляса запнул запнул заподозр заподозр запо запоет запоеш запозда запоздал запоздал запозда зап заполн заполн заполон заполон запомн запомн запомн запомн запомн запомн запомн запомн запонк запонк запор запор запор запорет запорош запорош запо заправ заправлен заправля запрег запредельн запрет запрет запрет запрет запрет запреща запреща запрещен запрещен запрещен запрещ запр запр запропаст запропаст запрос запрос запрост запрос запруж запрыга запрыга запрыга запрыга запрягут запряжен запряжен запряч запуга запуга запуга запуга запуска запуска запуска запуст запуст запута запута запута запута запута запута запутыва запутыв запутыв запушен запущен запущен запущен запущен запущен запущ запыла запыла запылен запылен запылен запылен запылен запыл запыл запыл запыха запыха запьеш зап запяст запяст запятна зарабатыва заработа заработа заработа заработа заработа заработк заработк заража заража заража заража зараж заражен заражен заражен зараз зараз зараз зараз зараз заразител заразительн заразительн заразительн зараз зарайск зарайск зарайск заран зарапортова зараста зардел зар зарев зарев зарев зарев заревел зарев зарев зарев зарев зарев зарежет зареж зареза зареза зарез зар заречн заречн зар заржавлен заржет зар зарниц зарницын зарницын зарод зарод зародыш зарожда зарожден зарожден зар заронен зарон заросл заросш заросш зароют зарубежн заруб зар зарыда зарыда зарыда зар зар зарыт зар зар зарыча зарыч зар зар заряд заряд заряжан засад засален засален засален засасыва засверка засверка засверка засверка засверка засвет засвет засвет засветл засвидетельствова засвидетельствова засвидетельствова засвидетельствова засвидетельствован засвидетельствова засвидетельствова засева заседан засек засел засел засел засел засел засидел засид засипел засия засия засия засия засия заскака заскрежета заскрежета заскрип заслонен заслон заслон заслон заслоня заслоня заслуг заслуг заслужен заслужен заслужен заслужен заслужен заслуж заслуж заслушива заслыш заслыша засматрива засмеет засмеют засмеют засмея засмея засмея засмея засмея засмотр заснет засн заснувш заснул заснул заснул заснут засов засохл засохл засохл засохш засохш засохш засохш заспа заспа заспа заспа засп застава застава застав застав застав застав застав заставлен заставл заставля заставля заставля заставля заставля заставля заставля заставл застав застав застав застав заста заста заста застан застанет застанеш застан застарел заста застегива застегиван застегива застегив застегнут застегнут застелют застенч застенчив застенчив застенчив застенчив застенчив застиг застила застойк застойк застольн застона застонет застрахова застрахова застрел застрел застрел застрел застрел застрел застрел застрел застря застрянет застука заступа заступ заступ заступ заступ заступ застуча застуча застуча застучат застуч застыва застыва застыв застыд застыд застыд заст заст застынет засуд засует засует засует засует засунув засунул засунул засух засух засух засучен засуч засыпа засыпа засыпа засыпа засыпа засыпа засып засыплет засыха засяд затаен зата зата зата зата затаратор затаска затаска затверд затвор затвор затвор затвор затвор затвор затворничеств затвор затворя затворя затворя затвор затвор затева зате затеет затееш зат зат затека зат затемн затепл затепл затерл затер затерт затерт затеря затеря затеря затеря затеря затеса затесн затея затея затея зате затея зат затиха затихл затихл затихл затиш затиш затиш заткнет заткн заткнут заткнут затмева затмен затмен зат затолп затопа затопа затоп затопчет заторел затороп затороп затороп затороп затоскова затравлен затрат затревож затревож затрепета затрепета затрепета затрепещет затрет затрогива затронет затронул затруднен затруднен затруднен затруднен затрудн затруднительн затруднительн затрудня затрудня затрудня затрудня затрудн затряс затрясл затрясл затряс затуман затуман затуман затуш затуш затуш затыка затылк затылк затылок затягива затягив затянувш затянул затянул затянул затянут заунывн заурядн заутрен заутрен заучен заучен заучен зауч зауч зафилософствова захар захар захарк захарк захар захарович захарович захарович захаров захар захар захарыч захарыч захвал захват захват захват захват захват захват захват захват захватыва захватыва захватыва захвачен захвора захвора захихика захлебнул захлебыв захлестнул захлестнул захлопнул захлопнул захлопыва захлопыв захмелевш захныка заход заход заход заход заход заход заход заходя заходя захожден захож захотел захотел захотел захотел захотет захот захот захохота захохота захохота захохочет захочет захочет захочеш захоч захрипел захрома захрустел зацелова зацеп зацеп зача зачаст зачат зачатк зач зачахл зачахн зач зачеркива зачеркнет зачерпнул зачешет зачешут зачисл зачита зачитыва зачт зашата зашата зашевел зашевел зашевел зашевел зашевел зашел зашепта зашепта зашепта зашива зашипел зашип заш зашл зашлепа зашл зашл заштопа заштопа зашум зашум защелка защем защит защ защит защит защит защит защит защитник защит защит защища защища защища защища защища защища заяв заяв заяв заявлен заявл заявля заяч звал звал звал звал зван зван зван зван зван зват звезд звезд звезд звезд звезд звезд звездн звезд звездочк звезд звезд звезд звен звенел звенел звенел звен звен звен звен звен звен звен звеня звер звер звер зверин зверин зверин зверин зверок зверск зверск зверск зверск зверств звер звер звер звон звон звон звон звон звон звонк звонк звонк звонк звонк звонк звонк звонк звонок звон звон звук звук звук звук звук звук звук звук звуча звуча звуча звуча звучат звуча звуча звуч звуч звучн звякан звяка звякнул зги зготовля здан здан здан здан здан зде зде здешн здешн здешн здешн здешн здешн здешн здешн здешн здешн здор здоров здорова здорова здорова здоров здоров здоровеньк здоровеньк здоровехонек здоров здоров здоров здоров здоров здоров здоров здоров здоров здоров здоров здоров здоров здрав здрав здрав здрав здрав здрав здравствова здравств здравств здравств здрав здсе зе зева зева зевак зева зева зев зевес зевнет зевнул зевнут зевот зевот зевот зейн зейнрок зел зелен зеленеет зелененьк зелененьк зеленеют зелен зелен зел зеленоват зеленоват зелен зелен зелен зелен зеленщик зеленщик зеленщик зеленщик зел зелен зелен зелен зелен зелен зелен зелен земел земл земледел земл землетрясен землетрясен земл земл земл земл земляк земляк земляк земл земляник землянк земн земн земн земн земн земн земн земск земск земск земск земств земчуг зен зениц зерка зерка зеркал зеркал зерка зеркал зеркальн зеркальн зеркальц зеркальц зерн зерн зигзаг зигзаг зигфрид зим зим зимн зимн зимн зимн зимн зимн зим зим зим зинаид зия зла злак злат злат злат злат злат злат зла зле злейш злил злит злит злит злит злиш зло злоб злоб злоб злобн злобн злобн злобн злобн злобн злобн злобн злобн злобн злоб злоб злобств злоб злоб зловещ зловещ зловещ зловещ зловещ зловещ злов зловредн злод злоде злод злодейк злодейск злодейств злодейств злоде злодеян зло зло злом злонамерен злорадн злорадств злослов злослов злост злостн злост злост злосчастн злоупотреблен злоупотреблен злоупотребля злу злу злы злы злым злым злых злю злющ злющ зля зме зме зме зме зме зме змеин зме зме зме зме змея знава знава знавш знавш зна знает знает знаеш зна знайт знак знак знак знак знак знак знаком знаком знаком знаком знаком знаком знаком знаком знаком знакомств знакомств знакомств знакомств знакомств знаком знаком знаком знаком знаком знаком знаком знаком знал знал знал знал знам знам знаменательн знаменательн знаменательн знаменательн знаменательн знаменательн знамен знамен знамен знаменит знаменит знаменит знаменит знаменит знамен знаменова знам знам знан знан знан знан знан знан знан знатн знатн знатност знатн знатн знаток знат знат значат значен значен значен значен значен знач знач знач знач значительн значительн значительн значительн значительн значительн значительн значительн значительн значительн значительн знач знач знач зна знают знающ знающ знающ знающ зна зноб зно зно зно знойков знойн знойн знойн зно зно зов зов зов зовет зовет зовеш зов зов зов зовут зовущ зол зол зол золовк зол золот золот золот золотеет золотист золотист золотист золот золот золот золотник золот золотоглав золот золот золот золотокудр золот золотопромышленник золотопромышлен золот золот золотух золотушн золот золот золот золот золот зол зонтик зонтик зонтик зоргенфр зор зорк зорк зорк зорк зорк зорч зор зор зосим зосимов зосимов зосимов зрак зрачк зреет зрел зрелищ зрел зрел зрелост зрелост зрелост зрел зрел зрен зрен зрен зрен зреют зрим зрим зрит зрител зрител зрител зрител зриш зря зуб зуб зуб зуб зубк зуб зубок зубоскал зубчат зубчат зубчат зуб зуд зурн зурн зыбк зыбк зыбк зыблет зят и иб ибс ибс ива ива иван иван иванов иванович иванович иванович ивановн ивановн ивановн ивановн ивановн иванов ивановск иванов иван иванонв иван иваныч иваныч иваныч иваныч иг игл игл игл игл игнатьевич игнатьевич игнатьевич игнатьевн игнатьевн игнатьич игнатьич игнашк иг иголк иголочк иг игр игра игра игра игра игра игра игра игра игра игра игра играл игр игра игр игра игра играюч игра игра игра игр игр игрив игрив игривеньк игрив игрив игрив игрив игрив игр игрок игр игрушечк игрушк игр идеа идеа идеал идеал идеал идеализм идеалист идеал идеал идеал идеал идеальн идеальн идеальн ид иде ид ид идейк идейк идейн ид идемт идет идет идет идеш ид иде ид идилл идилл идиот идиотк идиотств ид идол идол идол идол идт ид идут идуч идущ идущ идущ идущ идущ идущ идущ идущ ид иезуит иен иер иероглиф иерусал иерусалим ижиц из изб изб избав избав избав избав избавл избавлен избавлен избавля избавл избав избалова избалова избалова избалова избалова изб изб избега избега избега избега избега избег избегнул избегнут избежа изберет избереш изб избит избит избит избит избит избра избра избра избра избра избра избра избра избран избра изб избудеш избушк изб избытк избытк избыток изб изведа изведа изведа изведа изведа изведеш извел изверг изверг извергнет извергнут извер извер извест извест извест извест извест извест извест извест известк известк известков известн известн известн известн известн известн известн известн известн известн известн известн известн известн известн известн известн извест извест извечн извечн извещен извещ изв извив извив извилист извинен извинен извин извин извин извин извин извин извин извин извиня извиня извин извиня извиня извиня извин извин извлек извлека извлекл извлек извлеч извн извод извозчик извозчик извозчик извозчик извозчик извозчик извозчик извозчик извол извол извол извол извол извол извол извольт извол изворачива изворачива изворот изворотлив изворот извращен изгад изгад изгад изгиб изгиб изглад изглад изгнанник изголов изголов изгон изгоня изготов изготов изготовля издава издавн издалек изда изда издан издан издан издан изданьиц издател издател издательск издательств издател изда издева издева издел издерга издержа издержек издержк издержк издержк издержк издержк издохн издохнут издрогнувш издрогш издыхан издыха изжар изжог иззяб излага излага излага излага излаг излер излечива изл излива излива излива излива изл излишек излишн излишн излишн излишн излишн излиян излиян излов изловч изложен изложен излож излож излож излож излома излома излома излома излома излома излома излома излома излома излучен измайл измалетств измельча изм изм изменен изменен измен измен измен измен измен измен измен измен измен измен измен измен изменник изменниц измен измен изменя изменя изменя изменя измен измен изменя изменя измен измер измер измер измеря измеря измеря измер измокш измокш измор измуч измучен измучен измучен измучен измуч измуч измуч измуч измуч измуч измуч измуч измуч измуч измуч измят измят измят измят измят измят изнежен изнежен изнежен изнемог изнемога изнемога изнемога изнемога изнемога изнемог изнемогл изнеможен изнеможен изнеможен изнемоч износ изношен изношен изношен изношен изнурен изнурен изнутр из изоблич изобража изобража изобража изобража изобража изображ изображ изображен изображ изобраз изобраз изобразительн изобраз изобраз изобрета изобретател изобретен изобретен изобьеш изовра изодра изодра изойт изорва изорвал изорва изошл изощр изощря израсходова изредк изрек изрека изрека изрека изтоптал изувер изуверств изукрашен изум изум изум изумительн изумительн изумительн изумительн изумительн изумительн изум изум изумл изумлен изумлен изумлен изумлен изумлен изумлен изумлен изумлен изуродова изустн изуча изуча изуча изуч изучен изучен изуч изуч изуч изъезд изъяв изъяв изъявлен изъявля изъясн изъясн изъясня изъят изыска изыска изыска изюминк изюм изяществ изящн изящн изящн изящн изящн изящн изящн изящн изящн изящн изящн иисус иисус икнув икон иконостас икон икон ил ил илиад илист ил ил ил ил ильин ильин ильин ильиничн ильинск ильинск ильинск ильинск ильинск ильинск ильинск ильич ильич ильич ильич ильич ил ил илюш илюш илюш илюш им иматр имевш имевш имевш имевш имевш имевш име имеет имеет имеет имееш им им имел имел имел имел имел имел им им имен имен имен имен имен имен именин именин именин именинник именинниц именинниц именин имен имен имен именьишк имет им имеют имеющ имеющ имеющ имеющ име им импровизир имуществ имуществен имуществ им инач ин инд индеек индейк индейк индейк индейск индивидуализм индивидуум индийск индифферентизм инд ине ин инициал инкрустац ин иногд ин ин ин инок ин иностранец иностранк иностра иностра иностра иностранц иностранц иностранц иностранц ин инспектор инстанц инстанц инстинкт инстинкт инстинкт инстинктивн инстинктивн инстинкт инстинкт институт институт институт инструкц инструкц инструмент инструмент инструмент инструмент инструмент интегральн интеллигент интеллигентн интеллигенц интеллигенц интендант интерес интерес интерес интерес интерес интересн интересн интересн интересн интересн интересн интересн интересн интересн интересн интересн интересн интересн интерес интересова интересова интересова интересова интересова интересова интерес интерес интересует интересует интерес интерес интерес интересу интерес интимн интимн интонац интонац интриг интригова интриг ин ин инфлюэнц информац ин ин ин ин иоаннов иога ипохондр ипохондрик ипохондрик ипохондрическ ипохондр ирин ирин ирин ирин ирин ирис ирис ирон ироническ ироническ иртыш иса ис искажа искажа искаж искажен искажен искажен искажен искаж исказ исказ исказ исказ иска иска искалечен искалечен иска иск искан искан искательниц иска исключ исключен исключен исключен исключен исключительн исключительн исключительн исключительн исковерка исковерка исковерка искон искорен искорен искореня искос искр искр искр искрен искрен искрен искрен искрен искрен искрен искрен искрен искрен искрен искрен искрен искрен искрен искрив искрив искрив искрив искрив искривлен искривлен искривлен искривлен искривл искр искр искрометн искр искупа искуп искупительн искупител искуп искуплен искусн искусн искусн искусств искусств искусств искусств искусствен искусствен искусствен искусств искусств искушен испак испанец испан испанк испанск испарен испарен испачка испачка исп испекл испекут испепел испепеля испепеля испечет исписа исписа исписа исписа испит испит исп исповед исповедова исповедова исповедова исповедыва исповед исповед исподлоб исподтишк исполинск исполн исполн исполнен исполнен исполнен исполнен исполнен исполнен исполнен исполнен исполнен исполн исполн исполн исполн исполн исполн исполн исполним исполн исполн исполнител исполнител исполн исполн исполн исполн исполн исполня исполня исполня исполня исполня исполня исполн использова использован испорт испорт испорт испорт испорт испорч испорчен испорчен испорч испошл исправ исправим исправ исправ исправлен исправлен исправля исправля исправник исправник исправн исправн исправн испрашива испрос испуг испуг испуга испуга испуга испуга испуга испуга испуга испугал испуга испуга испуга испуга испуга испуга испуга испуга испуга испуга испуга испуга испуга испуга испуга испуг испуг испуг испужа испужа испуска испуск испуст испыта испыта испыта испыта испыта испытан испытан испытан испыта испыта испыта испыта испытующ испыт испытыва испытыва испытыва испытыва испытыва испытыва исследован исследова исслед иссоса иссохш иссохш иссохш иссохш иссохш исстрада исстрада исстрада исступлен исступлен исступлен исступлен исступлен исступлен исступлен истаска истаска истаска истаска истая истерза истерза истерза истерза истерза истерза истерза истерик истерик истерическ истерическ истерическ истерическ истеричн истечен истин истин истин истин истин истин истин истин истин истин истин истин истин истин истин истолкован истолкован истолковател истом истом истом исторг истор истор историк историческ историческ истор истор источник источник источник источник источник истоща истощ истощен истощ истощ истощ истощ истрат истрат истрачен истрачен истреб истреб истреблен истреблен истребля истребля истрепа истрепа истрепа истрепа истяза истязан истязан исход исход исход исход исход исхуда исхудал исцелен исцеля исчахнет исчез исчеза исчеза исчеза исчеза исчеза исчеза исчеза исчезл исчезл исчезл исчезнет исчезнет исчезн исчезнут исчерпа исчерпа исчерчен исчислен исчисл исчисл исшарка ит итак ита итал итал итальянск итальянск итальянск итог итог итог ит иуд иуде иуд их ихн ихн ихн ихн ихн ихн иш ищ ищ ищет ищет ищеш ищ ищ ищ ищут ищущ ищущ июл июл июн июн июн й йодоформ к ка кабак кабак кабак кабак кабал кабацк кабачн кабачн кабинет кабинет кабинет кабинет кабинет кабинет каблук каблук каблук каблучк кабул каб кавалер кавалер кавалер кавалер кавалер кавалер кавалькад каватин каватин кавказ кавказск кавычк кавычк кад кад кад кадил кадильн кадк кадочк кадочк кажд кажд кажд кажд кажд кажд кажд кажд кажд кажд кажд кажд кажет кажет кажет кажеш каж каж кажут кажущ каза каза казак каза каза каза каза каза казанск казарм казарм казарм казарм каза каза казачк казачк казачок казен казен казен казен казен казен казенщин казимир казн казн казнен казн казн казн казн казн казн казн казн казн казн казн казн казуистик казуистик казуистик казус кайм как как как как как как как как каков каков каков каков как как как как как как кактус кактус как как кал каламбур каланч каланч калач калач калашн калеб калеб калек калек календар календар календар кален калигул калинников калитк калитк калитк калитк калит калош калош калош камел камел камел каменист каменист камен камен камен камен камен камен каменщик камен камен камер камердинер камердинер камешек камешк камешк камин камн камн камн камн камн камн камн камн каморк каморк каморк каморк кампан камушк камфар камыш камыш камыш кана канав канав канавк канав канав кана кана канал канал канал канареек канарейк канарейк канарейк канарейк канат канв канв канв кандал канделябр канет канеш каникул канка канка канул канул канцеляр канцелярист канцелярист канцелярист канцеляр канцелярск канцелярск канцон капа капел капельк капельк капернаум капернаумов капернаумов капернаумов капита капита капиталист капитал капитал капитал капитал капитальн капитальн капитальн капитальн капитальн капитальн капита капитол капитэн капка капл капл каплет капл капл каплют капл капл капл капот каприз каприз каприз каприз каприз капризн капризниц капризнича капризнича капризнича капризн капризн капризн каприз каприз каприз капсюл капуст капуст капуст капуст кар карабка карабк карава карамзин карандаш карандаш карандаш карандашик карандаш кара караул караул караул караш кара кардамонов карет карет карет карет каретн каретн карет карет карет кар кар карка карлик карловн карлос карл карма карма карман карман карман карма карма карма карман карма карм карменсит карниз карп карпат карпович карт карт картав карт карт карт карт картежник картин картин картин картин картинк картинк картин картин картинок картин картин картишк картограмм картограмм картограмм картограмм карт картонк картонк картофел картофел картофел картофел карточек карточк карточк карт картуз картуз карт кар карьер карьер карьер карьер карьер каса каса каса каса каса каса каса каса каса каса кас каск каскетк каск кастаньет кастелянш кастрюл кастрюл кастрюл ката ката ката катастроф катастроф катастроф катастроф катастроф ката кат кат катерин катерин катерин катерин катерин катерин катехизис кат кат кат кат каторг каторг каторг каторг каторжник каторжн каторжн каторжн каторжн каторжн катышек кат кат кат кат каучук кафедр кафедр кафешанта кафта кафта кафтан кафтан кафэ кацавеек кацавейк кача кача кача кача кача кача качалк кача качан кача кача кач качел качел качеств качеств качеств качеств качеств качнув качнул качн каш каш кашел каш кашин кашл кашл кашля кашля кашля кашлянув кашлянул кашля кашля кашл каштанов ка кают кают кают кая квадратн квадрат кварта кварта квартал квартальн квартальн квартальн квартальн квартальн квартальн квартальн кварташк квартир квартир квартир квартир квартир квартир квартирк квартирк квартирн квартирн квартирова квартир квартир квартир квас квас квас квасц квас квентин кверх кел кел кем кеплеров керченск кива кива кива кив кивер кивнув кивнул кивнул кида кида кида кида кида кида кид ки киев киев киевск ки кильватерн кинематограф кинжа кинжал кин кинув кинул кинул кинул кинул кинут кинут кинут кинут кинут киот киот киот кипарисн кипарис кипел кипел кипел кипен кипет кип кипсэк кипуч кипуч кип кипятк кипят кирбитьевн кирбитьевн кир кирк кирк кирк кирпич кирпич кирпич кирпичик кирпичик кирпичн кирпичн кирсановск кисе кисейн кисейн кисел кисел кисел кисл кисл кисл киснет киссинг кист кист кист кист китайск китайск китайц кит кихот кичк кичлив кишат кишел кишел кишк клавикорд клавиш клавиш клад кладб кладбищ кладбищ кладбищенск кладбищ кладет кладет клад кладк кладов кладов клад клад кладут клад клал клал кланя кланя кланя кланя кланя кланя кланя клан кларнет класс класс класс класт клеверн клеверн клевет клевет клеветник клевет клевет клевет клевещет клевещут клеенк клеенчат клеймен клейм клейм клейм клейм клекот клен кленов клен клетк клетк клетк клетк клетушк клетушк клетчат клешн клещ клика клик клика клик клик кликн кликн кликнул кликнут климат климат климат климатическ клинк клинок клич кличет кличеш клич клоак клоак клоак клобук клок клокота клокочут клон клон клон клон клон клон клон клон клоп клоп клоп клопшток клоп клочк клочк клочк клочк клочк клочок клуб клуб клуб клубн клуб клубок клуб клумб клюк клюкв клюк ключ ключ ключ ключ ключ ключик ключниц ключниц ключ клюют кля клял клял клял клянет клянет клянеш клян клян клян кляст кляст клятв клятв кляч кляч клячонк кляч кн книг книг книг книг книг книг книг книг книгопродавец книгопродавц книг книжек книжечк книжк книжк книжк книжк книжк книжк книжк книжн книжн книжн книжонк кноп кнопк кнопк кнут кнут кнут кнут кнут кнут кнут княгин княгин княжеск княжеск княжеск княжн княжн княж княз княз князьк княз княз княз ко кобелев кобыл кобыленк кобыленк кобыленк кобылиц кобылиц кобылятников ков коварн коварн коварн коварн коварств коварств коварств ков ковер коверка ковр ковр ковр ковр коврик ковр ковр ковр ковр ковчег ковчег ковчег ковш ков ковыл ков ковыл ковыря когд ког когс когт когт когт когт кодекс ко ко ко кожан кожан кожан кож кож коз коз коз козел козел козл козлов козл козн козоедов коз козыр козыр козырьк коз ко ко ко кокард кокетк кокетлив кокетнича кокетнича кокетнича кокетнича кокетнича кокетств кокетств кокетств кокетств кол кол колбасниц колбасн колбас колдовск колдовств колдун колдун колдун колд кол колеба колебан колебан колеблет колеблющ колеблющ колебл колебл кол кол кол кол колен колен колен колен колен коленк коленк коленк коленкоров коленк кол коленопреклонен коленочк коленяк колен колен колен колес колес колес колес колесниц колес колец колечищ колечк колечк колечк кол коле кол количеств количеств количеств колк коллежск коллежск коллежск коллежск коллежск коллекц коллекц колод колодез колодец колодник колод колод колок колокол колокол колокол колокольн колокольн колокольн колокольн колокольц колокольчик колокольчик колокольчик колокольчик колокольчик колол колол коломенск колон колон колон колон колор колорит колорит колос колос колотилин колот колот колот колот колотушк колот колот колот колпак колпак колпак колпак колпин колумб колхид колча колыбел колыбел колымаг колымягин колыха колыха колых кол кольк кольц кольц кольц кольц кольц кольчуг кол колюч колюч колюч кол коляск коляск коляск коляск коляск колясочк колясочк колясочк ком команд команд командир командир командировк командир командова командор командор комар комбинац комед комед комед комет комет комет комизм комик комисс комисс комисс комическ комическ комическ комическ комк комк комк коммерческ коммиссаржевск коммун коммун коммун коммун коммун комнат комнат комнат комнат комнат комнатк комнатк комнатк комнатк комнатн комнат комнат комод комод комод комод комок компанейск компан компан компан компан компаньон компаньон комплекц комплимент комплимент комплимент комплимент компромисс ком комфорт комфорт комфорт комфортн конверт конво конвойн конвойн конвульс конг конгресс кондитер кондитерск кондрашк кондуктор кон кон конец конечн конечн конник конниц консервативн консервативн консерватор консилиум конск конск конск конспиративн конста константин константин константин константинопол константин конституц конституц консультац контор контор контор контор конторк конторск контор конторщик конторщик контор контрабас контракт контракт контракт контракт контракт контракт контральт контрданс контрол контр конур конур конур конур конус конфект конфет конфуз конфуз конфуз конфузлив конфузлив конфузьт конфуз конц конц конц конц концентрирова концентрическ концепц концерт концерт концерт конц конц конц конц конча конча конча конча конча кончат конча конча конча конч конч конч конч конч кончик кончик кончик кончик конч конч конч конч конч конч конч конч конч конч конч конч конч конч кон коньк коньяк коньяк коньяк коньячк кон конюш конюшен конюшн конюшн конюшн конюшн кон копа копа копеек копеечк копейк копейк копейк копейк копейк копейк коп коперник коперник коп коп коп коп коп коп копн копот копош коп копыт копыт копыт корабл корабл корабл корабл коралл кордегард кордел кордел кордел корен корен корен корен корен корен корешк корзин корзинк корзин корзиночк корзин корзин коридор коридор коридор коридор коридор коридор кор кориц кориц кориц кориц коричнев коричнев коричнев коричнев корк корк корм корм корм кормилец корм кормилиц корм корм корм кормлен корм корн корнел корн корнет корнет корн корн короб короб короб коробк коробк коробк коробк коробочк коробочк коробочк короб коров коров коров коровник коров коров коров коров корок королев королев корол корол корон корон корон корота коротеньк коротеньк коротеньк коротеньк коротеньк коротк коротк коротк коротк коротк коротк коротк коротк короч корп корпус корпус корпус корреджиев корректн корректур корреспонденц корреспонденц корточк корчаг корч коршун коршун кор корыстолюбив корыст корыт корыт корыт корыт кор кос кос кос косвен косеньк кос косичк космос коснел косност косноязыч косноязычн косноязычн косноязычн коснувш коснул коснул коснул коснут кос косоворотк косоворотк кос косоплетк кос кост кост костер кост костляв косточк косточк косточк костр костр костр кост костюм костюм костюм костюм костюм костюм костюм костюшк кост костян костяшек кос кос косынк косынк косынк косынок косыночк косыночк косыночк косяк кос кос кос кот кот котенк котенк котеночк котлет котлет котлетк котлет котомк котор котор котор котор котор котор котор котор котор котор котор котор котор коттедж коттен кот коф кофе коф коф кофейник кофейник кофейник кофейник кофейн коф кофе коф кофточк кофточк кофточк кох кох кохан кох кочев коченеют кочерг кочк кочк коч кошач кошек кошелек кошельк кошельк кошемар кошк кошк кошк кошмар кощунствен кра краб краден краден крадеш кра кра краешк краж кра кра крайн крайн крайн крайн крайн крайност крайност крайност крайност крайн крак крал кран крапив крапив крапив крапинк крас красавец красавиц красавиц красавиц красавиц красавиц красавиц красавц крас крас крас красив красив красив красивеньк красив красив красив красив красив красив красив красив красив красив крас крас красильщик красильщик красильщик красильщик красильщик крас крас краск краск краск краск краск краск краск краск красн краснеет краснееш краснел краснел красненьк красненьк красненьк красненьк краснет красн краснеющ красне красн красноват красноват красноват красноват красноват красн красн красн красн красн красноречив красноречив красноречив краснореч краснощек красн красн красн красн красн красн красн красн красова крас красок красот красот красот красот красот красот красот красочн краст крас крас кратер кратк кратк кратковремен краткост кратк кратчайш крахмал крашен крашен кра кра кра кра кред кредитивн кредитк кредитк кредитк кредитк кредитн кредитн кредитова кредиток кредиторш кредит кремл кремнист крендел крендельк крендел креп креп креп креп креп крепк крепк крепк крепк крепк крепк крепк крепк крепнет крепок крепост крепостник крепостник крепостн крепостн крепостн крепостн крепостн крепост крепост крепча крепч креп креп кресел кресл кресл кресл кресл кресл кресл крест крест крест крест крестик крест крест крест крест крест крестин крестин крест крест крест крестненьк крестн крестн крест крестовск крестов крест крест крест крестья крестьян крестьян крестьян крестьянин крестьянин крестьянк крестьянск крестьянск крестьянск крест крещ крещен крещен крив кривизн кривизн крив крив крив крив крив крив крив кризис крик крик крик крик крик крик криклив крикнет крикнеш крикнул крикнул крикнут крик крик крик кринолин кристал кристальн критик критик критик критикова крич крича крича крича кричал кричат крича крича крич крич крич крич крич кров кровав кровав кровав кровав кровав кровав кроват кроватк кроватк крова кроват кроват кровел кров кров кровл кровл кровл кровл кровн кровн кровн кровн кров кровообращен кровопроливц кровопролит кровотечен кров кров кроет кро кро крокет крокодил кролик кром крон крот кротк кротк кротк кротк кротк кротк кротк кротк кротк кроток кротост кротост кротост кротост крошат крошен крошечн крошечн крошечн крошечн крош крошк крошк кру круаз круг круг круг круг круг круг круг кругл кругл кругленьк кругл кругл кругл круглолиц кругл кругл кругл кругл кругл кругл кругл круг круговорот круговорот круг круг кружат кружа кружа круж кружев кружев кружев кружевн кружевн кружев кружевц кружен кружен кружен круж круж круж круж круж круж круж кружк кружк кружк кружк кружок крупн крупн крупн крупн крупн крупн крупн круп круп крут крут крутизн крут крут крут крут крут круторог крут крут крут круч круч круч кручин круш круш крыл крыл крыл крылат крылат крыл крыл крыл крыл крылов крыл крыл крыл крыльц крыльц крыльц крыльц крыльц крыл крыл крыл крым крым крыс крыс крысин крыс крыс крыт крыт крыт крыш крыш крыш крыш крышк крыш крюк крюк крюк крюк крючк крючк крючк крючк крючкотворец крючок крючочк крякнет крякнул кряхтел кряхт кряхт кряхт кстат ксюш кто кувшин кувыркнул кувырк куд кудахта кудахта кудахтан кудахтан кудахта кудел кудр кудр кудрин кудряв кудр куд кузин кузнец кузнец кузнец кузнец кузнечик кузниц кузовков кузьк кузьминичн кукл куколк кукушк кулак кулак кулак кулак кулак кулачонк кулебяк кулебяк кулебяк куликов куликов кулинарн кулис кулыгин кулыгин кулыгин кулыгин кульков кульк культурн кум кум кумач кум кумир кумирн кумир кумир кум кум кум кум купа купа купа купальн купальн купальн купан купан купа купец купеческ купеческ куп купидон куп куп куп куп куп купл купл куплен куплен куплен куплен купл купл купол купол купон купц купц купц купц купц купц купчик купчих кур кураж кур курант курен кур кур кур курин курин кур кур кур куриц куриц куриц куриц курнос курнос курок куролес куропатк курс курс курсистк курск куртин куртк куртк куртк курточк курточк курчав курчав кур курьез курьез курьер курьер курьерск курьер кур кур курятник курятник куря куря куса кус куск куск куск куск кусок кусочек кусочк кусочк кусочк кусочк куст куст куст кустарник куст куст кустик куст куст куст кута кута кут кут кут кут кут кухарк кухарк кухарк кухарк кухарк кухарк кухарк кухарок кухмистерск кухн кухн кухн кухн кухн кухн кухон кухон куц куч куч куч кучер кучер кучер кучер кучер кучер кучер куч кучк кучк кучк кучк куч куша куша кушак кушак куша куша куша кушан кушан куша куша куша кушетк кушетк куш кущ кующ кхе кхи л ла лабаз лабазник лабазник лабазник лабиринт лавиз лавиз лавиз лавиз лавин лавк лавк лавк лавк лавк лавк лавочек лавочк лавочк лавочк лавочк лавочк лавочк лавочник лавочник лавочник лавочн лавр лаврент лаврент лавр лавр лагер лагер лагун лагун лад лада лад лад лад ладн ладон ладон ладон ладон лад лад ла ла лает лает лает лазаревич лазар лазар лаз лаз лазур лазурн лазур ла лайдак лайт лакедемон лак лаке лак лак лакейск лакейск лакейск лакейск лакейщин лак лаке лаке лакирова лакомств ламп лампад лампадк лампад ламп ламп ламп ламп лангваген лангваген ландыш ландыш ландыш ланит ланит лап лапт лапт лап лапш лапш ларец ларц лас ласк ласк ласка ласка ласка ласка ласк ласкательн ласк ласка ласка ласка ласка ласка ласка ласк ласк ласк ласк ласк ласков ласков ласков ласков ласков ласков ласков ласков ласков ласков ласк ласк ласточк ласточкин лат лат латник латын латынск латын латыш лафит лачуг ла лая лая лба лбов лбом лбу лгал лгал лгал лгат лги лгу лгунишк лгун лгут лгущ лебед лебед лебед лебез лебезятник лебезятников лебезятников лебезятников лебезятников лев лев лев лев лев лев лев лев лег легав легион легион легистраторш легк легк легк легк легк легк легк легк легковерн легковерн легк легк легк легк легкомысл легкомыслен легкомыслен легкомыслен легкомыслен легкомыслен легкомыслен легкомысл легкомысл легкомысл легкомысл легкопер легк легк легл легл легл легок легоньк легч лед леденец ледене леден леденц леденц леденя лед ледк ледоход ледян леж лежа лежа лежа лежа лежа лежа лежа лежа лежа лежа лежа лежа лежанк лежанк лежанк лежанок лежан лежан лежан лежат лежа лежа лежа лежа лежа леж леж леж леж леж лез лезв лезв лезв лезгинк лезет лезеш лезт лезут лейтмот лейтмотив лекар лекарств лекарств лекарств лекарств лекарствен лекарств лексикон лектор лекц лекц лекц лелеем лелеет лел лелея лен лен лен лен ленив ленив ленивец ленив ленив ленив ленив ленив ленив ленив ленив ленив ленив лен ленор леност леночк лент лент ленточек ленточк лент лент лентя лент лен лен лен лен леонард леонид леонид лепестк лепет лепета лепета лепет лепечет леп леп лепн леп леп лермонт лермонтов лес лес лес лес лес лесничеств лесничеств леснич лесн лесн лесн лес лесок лес лест лест лестниц лестниц лестниц лестниц лестниц лестниц лестниц лестничеств лестн лестн лестн лестн лест лест лес лет лет лета лета лета лета лета лета лет лет лета лет лета летевш летел летел летел летет лет лет лет лет лет летн летн летн летн летн летн летн летн летн летн лет лет летопис лет летун летуч летуч летуч летуч лет лет лечат лечебниц лечен лечен леч леч леч леч леч леч леч леч леш леш леш лжет лжет лжеш лжи лжив лжив лжив ли либера либерализм либерал либеральн либеральн либеральн либ ливен ливингстон ливр ливр ливрейн ливре лид лид лидиньк лид лид лидочк лидочк лидочк лидочк лизавет лизавет лизаветин лизаветин лизавет лизавет лизавет лизнет лик лик лик ликован лик лик ликург ликург лил лил лил лил лил лиллас лилов лилов лилов лилов лил лимон лимоннича лимон линейк линейк линейк лин лин лин лин лин линор линял линя лион липк липк липк липл липов липов липов липов липов липпевехзел липп лип лип лир лирическ лисиц лиссабонск лиссабонск лист лист лист лист листик листик листк листк листк листк лист листок лист листочк лист лист лист лист лист ли литавр литейн литейн литератор литератор литератор литератор литератур литератур литературн литературн литературн литературн литератур литератур литератур лит лит лифчик лих лих лихоманк лихорадк лихорадк лихорадк лихорадк лихорадочн лихорадочн лихорадочн лихорадочн лихорадочн лихорадочн лихорадочн лихорадочн лихорадочн лихорадочн лиц лиц лиц лиц лиц лицезрет лицемерн лиц лиц лиц личек личик личик личик личик личин личн личн личн личн личн личн личност личност личност личн личн личн личн личн лиша лиша лишен лишен лишен лишен лишен лишен лиш лиш лиш лиш лиш лиш лишк лишн лишн лишн лишн лишн лишн лишн лишн лиш лиш ло лоб лобзан лобыза ловелас лов лов лов лов ловк ловк ловк ловкост ловк ловл ловушк ловушк ловушк лов лов лог логик логик логик логиныч логическ логическ логическ логическ логичн логичн лодк лодк лодк лодк лодочник лодочник лодочник лож лож ложат лож ложек лож ложемент ложечк ложечк ложечк ложечк лож лож лож лож лож лож лож лож лож ложк ложк ложк ложн ложн ложн лож лож лож лоз лозунг лозунг локон локончик локончик локот локт локт локт локт локт локт лом лома лома лома лома лома ломал ломан лома лома лома лома лома лом ломбард ломберн ломберн ломберн лом лом ломк ломк ломов ломов лом ломот ломт ломт лом лон лондон лондон лондон лон лон лопаст лопаст лопатк лопат лопахин лопахин лопахин лопахин лопнет лопнувш лопнул лопнул лорд лорд лорнет лорнетк лорнетк лорнирова лос лоск лоскутк лоскутк лоскутк лоскутн лоскуток лоскут лоскут лосн лосн лосня лососин лососин лот лотере лотер лотер лотк лот лохмат лохмотник лохмотник лохмот лохмот лохмот лохмот лошад лошаденк лошаденк лошаденк лошад лошадин лошадк лошадк лошадк лошад лошадьм лошад лошад лошад луг луг луг луг луг луж луж луж лужин лужин лужин лужинск лужин лужин лужиц лужиц луж луиз лук лук лука лукав лукав лукав лукав лукавл лукав лукав лукав лукав лукав лукавств лукавств лукавств лукав лукав лукав лукав лукав лукав луковиц лук лукомор лук лун лунатизм лун лун лун лун лун лун лун лун лун лун лун лун луп луч луч луч луч луч лучезарн луч луч лучинк лучин лучин лучист лучист луч лучш лучш лучш лучш лучш лучш лучш лучш лучш лучш лучш лущ лчат лы лыс лысин лысин лысин лысин лыс лыс ль львин львин львиц львиц львов львович львовн льву львы льда льдин льдом льет льет льзя льнет льном льстец льстив льстив льстив льстил льша льшим льших льшу льщу льюис льют лю люб любв любез любезн любезн любезн любезн любезн любезн любезн любезн любезн любезн любезн любезн любезн любезн любезн любек люб люб люб люб люб люб люб любим любимец любимиц любим любим любим любим любим любимц любимц любим любим любим любим люб люб любител любител люб люб любл люб любова любова любова любова любов любовник любовник любовник любовник любовниц любовниц любовниц любовн любовн любовн любов любов люб люб люб любопыт любопытн любопытн любопытн любопытн любопытн любопытн любопытн любопытн любопытн любопытн любопытн любопытств любопытств любопытств любопытств люб люб любу люб люб любя любя любя любя любя любя любя люд люд людвиговн людвиговн людвиговн люд люд людовик люд людск людск людск людск людск людск людск людьм люд люд люл люстр лютн лют лют люциферов ля ляг ляга ляга ляга лягнул ляг лягут лягушек ляда ляжет ляжет ляжеш лязбил лязг лязга ляп м ма мавзол маг магазин магазин магазин магазин магазин магазин магарыч магдалин магистр магическ магнатск магнетизер магомет магомет магомет мад мадемуазел мадер мадер мадон мадон мадон мадон ма маевск мает маеш мажет мажорд мажордом маж маза маз мазилк мазнул мазурик мазурк мазурк мазурк мазурк маз ма майк майор майор майор майск майск майск майск мак макарон маклашин маклер маков маков мак макушк макушк мал мал малан мал мал мал мал мал мал мал мал маленьк маленьк маленьк маленьк маленьк маленьк маленьк маленьк маленьк маленьк маленьк маленьк маленьк маленьк малин малинк малинов малинов малин малин малин малицк мал маловажн маловажн мал малограмотн малодействен малодуш малодуш малодуш малодушн малодушн мал мал малолетн малолетн малолетн мал мал малоподвижн малостоя малост мал мал мал мал мал мал мальск мальчик мальчик мальчик мальчик мальчик мальчик мальчишек мальчишеск мальчишеск мальчишечк мальчишк мальчишк мальчишк мальчишк мальчишк мальчишк мальчишк мальчишк мальчонк мальчуга малютк малюток малявочк малявочк маляр маляр мам мама мамас мам мамаш мамаш мамаш мамаш мамашин мамаш мам маменьк маменьк маменьк маменьк мамк мамочк мам мам ман манг мандолин мандолин манер манер манер манер манер манер манер манер манжет манжет ман ман ман ман манир ман манифест манишк манишк манишк манкирова манкирова манкирова манкирует мансард мансард мантил мантил мантильк мантильк мантил мант манфред маня маргарит марев мар марин марин маринова марин марин марионетк мар мар марк маркиз мармелад мармелад мармеладов мармеладов мармеладов мармеладов марсов март март март марф марф марф марф марф марш маршир мар мар мар мар мар марьяж маск маскарад маскарад маск маск маск масл маслениц маслениц маслениц маслен масл масл масл маслян масок масс массивн массивн массивн массим массис масс масс масс маст мастер мастериц мастеров мастеров мастеров мастер мастерск мастерск мастерск мастерск мастерск мастер масштаб матвевн матвеевич матвеевич матвеевич матвеевич матвеевн матвеевн матвеевн матвеевн матвеевн матве матвеич матвеич матвеич матв математик математик математическ математическ математическ математическ математическ матер матер материа материа материал материал материал материал материальн материальн материальн материальн материальн матер матер матер материнск материнск матер матер матерчат матер матер матерьялистск матов матов матов мат матр матрос матрос матушк матушк матушк матчиш мат маха маха мах махнеш махнув махнул махнул мах махочк махочк мах мачех мачех мачех мачех мачт мачт маш маш маш машеньк машеньк машеньк машет маш машин машин машинальн машин машинист машинк машин машин машк маш ма маяк маяк маятник маятник маятник мая маяч мгла мгле мглист мглист мглист мглист мгло мгло мглу мглы мгновен мгновен мгновен мгновен мгновен мгновен мгновен мгновен мгновен мгновен мгновен мгновен ме мебел мебел мебельн мебельщик меблирова меблирова мед меда медал медальон медальон медал медвед медвед медведенк медведенк медведенк медведенк медвед медвед медвеж мед медикамент медик медик медицин медицинск медицинск медицин медицин медлен медлен медлен медлен медлен медлен медлен медлен медлен медл медл медл медл медлительн медлительн медл медл медн медн медн медн медн медн медн медн медн медов медов мед мед мед мед меж межд меж мездров мейербер мел меланхолик меланхолик меланхолическ меланхолическ меланхолическ меланхоличн меланхол меленьк мелет мелет мелеют мел мелк мелк мелк мелк мелк мелк мелк мелк мелк мелк мелк мелкоозабочен мелкопоместн мелк мелод мелодрам мел мелоч мелоч мелоч мелоч мелочн мелочн мелочн мелочн мелоч мелоч мелька мелька мелька мелька мелька мелька мелька мелька мелька мелька мелька мелька мелькнет мелькнувш мелькнувш мелькнувш мелькнул мелькнул мелькнул мелькнут мельк мельник мельниц мельниц мельниц мельниц мельничн мельчайш мельчайш мельчайш мельча мел мен мен мен ментон меньш меньш меньш меньш меньш меньшинств меньш меньш меньш мен меня меня меня меня меня меня меня менял меня мен мер мер мер мер мерещ мерещ мерещ мерещ мерещ мерещ мерещут мерзавец мерзавц мерзк мерзк мерзк мерзк мерзл мерзнут мерзок мерзост мерзост мерк мерк меркнет мерк мерлехлюнд мерн мерн мерн мерн мерн мер мертв мертв мертв мертв мертвец мертвец мертвец мертвец мертвец мертвец мертвец мертвечинк мертв мертв мертв мертв мертв мертв мертв мертв мертв мертв мертвя мер мерца мерцан мерцан мерцан мерца мерца мер мер мер мессианическ мессин месс месс мест мест мест мест мест мест местечк местечк мест местност местност мест мест мест местоположен мест мест мест месяц месяц месяц месяц месяц месяц месяц мета мета металл металлическ металлическ мета метан мета мета метафизик метел метелиц метел метельн метел метеор метеш мет мет метк метлинск метл метнул метнул метнул метод метод методическ метр мет мех мех механизм механик механическ мехмет мех мехов мехов мехов мех меч меч меч мечет мечет меч меч мечт мечта мечта мечта мечта мечта мечта мечта мечта мечт мечт мечтан мечтан мечтан мечтан мечтан мечтан мечтател мечтательниц мечтательн мечтательн мечтательн мечтательн мечтател мечта мечт мечта мечт мечт мечт мечт меч мечут мечут меша меша меша меша меша меша меша меша меша меша меша меша меша меша меша меша меша меша меша меша меша меша меш меш мешка мешк мешк мешк мешок меща мещан мещанин мещанин мещанинишк мещанинишк мещанинишк мещанин мещанк мещанок мещаночк мещанск мещанск ми миг миг мига мига мига мига мига мига мига миг миг мигнет мигн мигнувш мигнул мигнут миг мигрен мизерн мизерн мизерн мизинц микельанджел микола микола микол миколк миколк миколк миколкин миколк миколк микроб микроскопическ микроскопическ микстур мил мил милашк мил мил мил мил миленьк миленьк миленьк милитрис милитрис миллиард миллион миллион миллион миллионер миллионер миллион миллион миллион миллион мил милова мил мил мил милосерд милосерд милосерд милосерд милосердн милосердн милосердн милосердн милосерд милосерд милост милост милостив милостив милостив милостив милостисдар милост милостын милостын милост милочк мил мил мил мил мил мильон милюк милютин мим мимик мим мимолетн мимолетн мимолетн мимоход миндал миндал миндал минерал минет министерств министерств министр министр министр министр министр минова минова минова минова минова минова минова минова мин мин минувш минувш минувш минувш мин минул минус минут минут минут минут минут минутк минутк минутн минутн минутн минутн минутн минут минуток минуточк минут минут минут мин мину мин миньятюрн мир мир мираж мираж мираж мир мир мир мир мир мир мир мир мирн мирн мирн мирн мирн мирн мирн мир миров миров миров миров миров миров миров миролюбив мир мирт мирт мир мир мирян мир мисс мистик мистифицирова мистическ мистическ мистическ митре митр митре митрофаниевск митьк митьк митьк митьк миха миха михайл михайлович михайловн михайловн михайловн михайловск михайлыч михайлыч михе мих мих михе мицкевич мичман миш миш мишел миш мишур млад младенец младенц младенц младенц младенц младенческ младенческ младост младш младш младш младш младш млад млет мне мнен мнен мнен мнен мнен мнен мнет мнител мнительн мнительн мнительн мног мног мног мног мног мног многодумн мног многознаменател многозначительн многоколон многократн многократн многолюдн многолюдств мног мног многообеща многообеща многообразн многопен многоразличн многоразличн многоречив многословн многосторон многотрудн многотрудн многоуважа многоуважа многоуважа многоуважа многоцветн многочислен многочислен многочислен многочислен многочислен многочислен многоэтажн многоярусн множествен множеств множеств множ мноз мно мно мня мо мов мовщин мог мог мог могил могилк могилк могилк могилк могил могил могил могил могильн могильн могильн могильн могильщик могл могл могл мог могут могуч могуч могуч могуч могуч могущ могуществ могущ мод мод модел модел модистк модн модничан модн модн модн модн модн мо мо мо мо мо моет моет мо мож можед мож может может можеш можн моза мозг мозг мозг мозг мозол мо мо мо моисеич мо мо мокк мокр мокр мокр мокр мокрот мокрот мокр мокр мокр мокр мокр мокр мокр мол молв молв молв молв молв молв молв молеб мол мол мол мол мол мол мол мол мол молитв молитв молитв молитвен молитвен молитв молитв молитв мол мол мол мол молн молниеносн молниеносн молн молн молн молн молн молод молод молод молодеет молодеж молодеж молоденьк молоденьк молоденьк молодец молод молод молод молод молод молод молод молод молод молод молодц молодцеват молодц молод молод молод молод молод моложав моложав молож молок молок молок молокосос молокосос молокосос молол молот молотк молот молочниц молочн молочн молочн молч молча молча молча молча молчалив молчалив молчалив молчалив молчалив молчалив молчалив молчалив молчалив молчан молчан молчан молчан молчан молчановк молчат молча молч молч молч молч молч молч мол мольб мольб мольб мольб мол мол мол моля момент момент момент монастыр монастыр монастырск монастыр мон монах монах монах монах монашк монбла монет монетк монет монетчик монет монист монолог монолог монолог монома мономан мономан мономан мономан монома монопол монотон монотон монотон монтирова монумент монументальн монументальн мопасса морал моральн морга морг морд морд морд мор мор морков морков морков моров морожен морожен мороз мороз мороз мороз морозн морозн морозн морозн морозц мороз мороч мороч морск морск морск морск морск морск морск морск морск морф морф морщ морщат морщ морщ морщ морщин морщин морщин морщинист морщинк морщин морщин морщ морщ мор мор мосеич москв москв москв москв москв москв московск московск московск московск московск московск московск мост мост мост мост мостик мостк мостк мост мостов мостов мостов мостов мост мост мост мос мота мота мота мот мотор мотьк мох мохнат мохнат мохнат моцарт моцион моцион мочальн мочен моч моч мочк моч мошенник мошенник мошенник мошенник мошенническ мошенническ мошенничеств мощн мо мо мрак мрак мрак мрак мрак мрамор мраморн мраморн мрач мрач мрач мрачн мрачн мрачн мрачн мрачн мрачн мрачн мрачн мрачн мрачн мрачн мрачн мрачн мрачн мрачн мстил мстител мстител мстительн мстит мстят мсье му мудр мудр мудр мудрен мудрен мудрен мудрен мудрен мудрец мудрец мудр мудр мудр мудр мудрост мудрств мудр муж муж муж муж муж мужеств мужествен мужествен мужествен мужествен мужествен мужествен мужествен мужеств мужик мужик мужик мужик мужик мужик мужик мужик мужик мужичк мужичк мужичок мужич мужн мужнин мужнин мужнин мужск мужск мужск мужск муж мужчин мужчин мужчин мужчин мужчин муж муж муж муз муз муз муз музык музыкальн музыкальн музыкальн музыкант музыкант музыкант музыкант музык музык музык музык мук мук мук мук мук мук мук мундир мундир мундир мундштук мурав муравейник мурав муравьин мурашин мурашк мурлыка мурлыч мурлычут муромц мус мускул мускулист мускул мускул мускульн мускульн мускульн муссинск муссинск мус мут мут мут мут мутн мутн мутн мутн мутн мутн мут мут мух мух мух мухояр мухояров мух муча муча муча муча муча муча муча муча муча муч мучат муча муча муча муч муч мучен мучен мучен мученичеств мучен мучен мучен мучен мучен муч муч муч муч муч муч муч мучим муч мучител мучител мучительн мучительн мучительн мучительниц мучительн мучительн мучительн мучительн мучительн мучительн мучительн мучительн мучительн мучительн мучительн мучительн мучител муч муч муч муч муч муч мучнист мучн мучт муч муч мучьт мучьт мхов мчал мчал мчат мчат мчит мчит мщен мщен мы мыка мыка мыка мыка мыл мыл мыл мыл мыльн мысл мыслен мыслен мысл мысл мысл мыслител мыслител мыслительн мыслительн мыслител мыслител мысл мысл мысл мысл мысл мысл мысл мысл мысл мысля мысля мыт мыт мыт мыт мыч мыш мыш мыш мышк мышк мышлен мышонк мышц мыш мыш мэр мюнхенск мягк мягк мягк мягк мягк мягк мягк мягк мягк мягк мягк мягк мягк мягкост мягкост мягкост мягок мягч мяс мясист мясник мясник мясник мясник мясн мясн мяс мясоед мятеж мятеж мятежн мятежн мятежн мятежн мятежн мятежн мят мят мячик н на набав набалдашник набат набат набега набега набег набега набега набежа набежа набекрен набережн набережн набережн набережн наберет набер наберут набива набива набира набира наб набит набит набит набит набит наблюда наблюда наблюда наблюда наблюда наблюда наблюда наблюдател наблюдательн наблюдател наблюда наблюда наблюда наблюд наблюден наблюден наблюден наблюден наблюден набожн набок наболевш наболел набол наборн набра набра набра набрасыва набрасыва набра набра набрел наброса наброса наброса наброс наброс наброс наброс наброс наброс наброск наважден навален навал навал навал навар наведа наведа навед наведыва наведыва наведыва навек навек навел навел навел навел наверн наверн навернул навертел наверх наверх наверч навес навесел навес навес навест навест навест навет навеша навеща навея навзнич навзрыд навин нависа нависл навлеч навод навод навод навод навод навод наводнен наводнен навод навоз наволочк наволочк навостр навостр навостр навостр навра навра навсегд навстреч навыворот навык навыкат навяза навяза навяза навязчив навязыва навязыва навязыва навязыва наг нагиба нагиба нагиба нагиб нагл нагл наглост наглост наглост наглух нагл нагл нагл наглядел наглядел наглядн нагна нагнет нагн нагнув нагнувш нагнул нагнул нагнул нагн нагнут наговарива наговор наговор наговор наговор наг нагон нагорн наготов нагот наград наград наград наград наград наград награжда награжд награжден награжд нагреет нагроможден нагруб нагруб нагрудник нагружа нагрязн нагрянет нагрянул над надава нада надар надбавля надвига надвинул надв надворн надворн надворн надворн надвяж надвяж надвяза над надева надева надева надева надева надева надев наде надеет надеет надежд надежд надежд надежд надежд надежд надежд надежд надежд надежн надежн над надел надел надела надела надела надела надел наденет наденет наденеш наден наден наденьт надер надет надет надет надет надет надет надет над надеют надея надея надея наде надея надзирател надзирательниц надзирател надзирател надзор надив надлежа надлежа надмен надмен надмен надмен надмен надмен надмен надмен над надоб надоб надобн надобн надобн надобн надобн надоеда надоеда надоеда надоеда надоедл надоед надоел надоел надоел надоел надо надоест надоест надолг надорва надорва надорва надот надписа надп надпис надруга надругательств надрыва надрыва надрыва надрыв надсажд надседа надсон надстройк надтреснут надув надува надуван надува над над надуеш надул надул надул надума надума надума надума надума надума надут надут надут надушен надуш над надыша наедин наездник наездниц наемн наеха нажива наживеш наж нажима нажима наж нажм назад назад назва назва назва назва назван назван назван назван назван назван назва назем назидан назидан назидательн назл назнача назнача назнача назнача назнача назнача назнача назнача назнача назначен назначен назначен назначен назначен назначен назначен назначен назнач назнач назнач назнач назнач назнач назовет назовет назовеш назов назовут назойлив назойлив назойлив называ называ называ называ называ называ называ называ называ называ называ называ называ называ называ называ называ называ называ называ назыв назюзюка наибесполезн наибеспрерывн наиблагородн наибол наив наивн наивн наивност наивн наивн наивн наивн наивн наигрыва наигрыв наизнанк наизуст наименьш наиполезн наиприличн наиск наит наихитр найд найд найд найд найд найдет найдет найдет найдеш найд найд найд найдут найдут найд найм наймет наймеш найм найм найм найт накажет наказа наказа наказа наказан наказан наказан наказа наказ наказыва наказыва наказыва наказыва наканун накапа накарка накидыва накинет накинет накинув накинул накинул накинул накинул накинут накинут накинут накипа накипевш накип накладет накладыва накладыва накладыва наклевыва наклеен накле наклепа наклон наклонен наклонен наклон наклон наклон наклон наклон наклон наклон наклон наклон наклон наклон наклон наклон наклон наклон наклон наклоня наклон наклон наклон накол наколет наконец накоп накоп накопля накорм накорм накорм накормл накорм нак накошен накрахмален накрахмал накрепк накрест накрич накроют накрыва накрыва накрыва накрыва накрыва накрыва накр накр накр накр накр накрыт накр накупл накур накур накуролес налага налага налага налад налад налга налев налег налег налегк належа налезл налеп налет налетевш налетел налетел налетел налет налива налива налива налива налива налив наливк наливн нализа нал налинова налит нал налиц наличн наличн наложен налож налож наложниц нал налюб налюбуеш налюб нам намаза намахива намахнул намачив намашет намедн намеднишн намек намек намека намека намека намека намека намек намек намека намека намек намек намекнул намек намек намелет намелк наменя намерева намерев намер намер намерен намерен намерен намерен намерен намерен намерен намерен намерен намерен намер намер намета намет намечта нам намок наморщ намота намоч нанес нанесен нанесен нанесет нанест нанима нанима нанима нанима нанима нанков нанос нанос наносн наносн наня наня наня наня нанят нанят наня наобеща наоборот наобум наотмаш наотрез напада напада напад нападен напа напа напасеш напаст нап напев напева напев напева напев напев напекл напекут напел напер наперв напереб наперед наперекор наперер напечата напечата напечата напечата напечата напечет напива напива напива напива нап нап нап напира напира напира напир написа написа написа написа написа написа написа написа написа написа написа написа написа напита напитк напиток напитыва нап напиха напишет напишеш напиш напиш напиш напишут наплака наплева наплева наплыва напляса наподлича напоен напо напо напо нап напоказ наполеон наполеон наполеон наполеоновск наполеон наполза наползет наполн наполнен наполнен наполнен наполнен наполнен наполнен наполн наполн наполн наполн наполн наполн наполн наполн наполн наполняем наполня наполня наполня наполня наполня наполня наполня наполня наполн наполовин напомад напомад напомажен напомина напомина напомина напомина напомина напоминан напомина напомина напомина напомина напомин напомн напомн напомн напомн напомн напомн напомн напомн напомн напор напор напорет нап напо направ направ направ направ направ направл направлен направлен направлен направлен направлен направл направл направля направля направл направ направ напрасн напрасн напрасн напрасн напрасн напрасн напрасн напрашива напрашива напрашива например напролет напрост напрот напряга напряга напряга напряга напряг напряг напрягут напрягш напряжен напряжен напряжен напряжен напряжен напряжен напряжен напряжен напряжен напряжен напряж напуга напуга напуга напуга напуга напуга напуга напудрен напужа напускн напускн напускн напуст напуст напута напута напутств напутств напущен напьет нап наравн нараспашк нараста нараста нарастан нараста нар нарв нареза нарисова нарисова нарисова нарисова нарисова наркотическ народ народ народ народец народниц народн народн народ народовольческ народ народонаселен народ народ народ нарочн наруж наружн наружн наружн наружн наружн наружн наружн наружн наружн наружн наруж нарукавничк нарумянен наруша наруша наруша наруша наруша наруша наруша наруша наруш наруш нарушен нарушен наруш наруш наруш наруш наруш нарушител наруш наруш наруш нарциз нар нарыва нарыва наряд наряд наряд наряд наряд нарядн нарядн нарядн наряд наряжа наряжа наряжа нас насад насад насвистыва наседк наседк наседк наседк насеком насел населен населен населен населен насел насел насердк насид насил насил насил насилова насил насильн насильствен насильствен насильствен насильствен насильствен насил насказа насквоз наскольк наскор наскоч наскучива наскуч наскуч наслажда наслажда наслажда наслажда наслажда наслажда наслажда наслажда наслажда наслажд наслажден наслажден наслажден наслажден наследник наследник наследник наследова наследова наследств наследствен наследств наследств наслыша наслышк насме насмерт насмеха насмеха насмешек насмеш насмеш насмешк насмешк насмешк насмешк насмешк насмешл насмешлив насмешлив насмешлив насмешлив насмешлив насмешлив насмешлив насмешлив насмешлив насмешлив насм насмея насмотрел насол наспа настава настава настав наставительн наставлен наставлен наставл наставля наставниц наста настаива настаива настаива настаива настаива настаива настаива настаива наста наста наста наста настанет настанут настас настас настас настас настасьюшк настас настеж настеньк настига настла настоен насто насто настойчив настойчив настойчив настойчив настойчив настойчив настойчив настойчив настойчив настойчив настольк настоя настоя настоян настоя настоятельн настоятельн настоя настоя настоя настоя настоя настоя настоя настоя настоя настоя настоя настоя настраива настраива настр настроен настроен настроен настроен настроен настро настро настро настро настро настройщик настроч настуд настуд наступа наступа наступа наступ наступ наступ наступ наступ наступ наступлен насущн насущн насущн насущн насущн насчет насыла насыл насып насып насядет натал натал натал наташ наташ наташ наташ наташ натащ натащ нат натекл натерпел натерпел натерт наткнул натолкнут натолоч натоплен натопта наточ натружен натуг натур натурализм натуральн натуральн натуральн натуральн натуральн натур натур натур натур натыка натыка натягива натягива натяжек натянул натянул натянут натянут натянут наугад наук наук наук наук наук наук наук наумыч наутр науч науч науч науч науч науч науч науч науч научн научн науч наущен нафталин нафталин наха нахальн нахальн нахальн нахальн нахальств нахальств нахальств нахвата нахвата нахлебник нахлеста нахлобуч нахмурен нахмурен нахмурен нахмурен нахмурен нахмурен нахмур нахмур нахмур нахмур нахмур нахмур наход наход наход наход наход наход наход наход наход наход наход наход наход наход наход находк находчив наход наход наход наход находя нахож нацарапа нац нац национальн национальн нац нац нача нача нача нача нача нача начал нача начал нача нача нача начал нача нача начальник начальник начальник начальник начальник начальник начальниц начальниц начальниц начальств начальств начальств начальств начатк начат начат нача начерн начерта начерта начерта начерта начерта начерта начерт начест начина начина начина начина начина начина начина начина начина начина начина начина начина начина начина начина начина начина начина начинан начина начина начина начина начина начина начина начин начинен начинк начинк начист начистот начн начнет начнет начнет начнеш начн начн начнут начнут начуд наш наш наш наш наш нашел нашел наш наш нашептыва нашептыва нашеств нашеств наш наш наш наш нашл нашл нашл нашл нашл нашл наш нащипа нащупа наяв наяд не неаккуратн неаккуратн неаполитанск неб неб небелен небес небес небес небес небес небесн небесн небесн небесн небесн небесн небесн небесполезн неблаговолен неблагодар неблагодарн неблагодарн неблагодарн неблагодарн неблагодарн неблагодарн неблагонадежн неблагонамерен неблагополучн неблагоприятн неблагородн неблагородн неблагородств неблагосклон небледнеющ неб небогат небогат небожител небойк неб небольш небольш небольш небольш небольш небольш небольш небольш небольш неб небосклон небосклон неб небреж небрежн небрежн небрежн небрежн небрежн небрежн небрежн небрежн небрежн небрежн небрежн небрит небрит небрит неб небывал небывал небывал небывал небывал небывал небывал небывал небывал небывальщин небылиц небыт нев неважн неважн неважн неважн неважн невдомек нев неведен неведен невед неведом неведом неведом неведом неведом неведом неведом неведом невежд невежд невежеств невежеств невеж невежлив невежлив невежлив невелик невелик невелик невеликодушн невер неверн неверн неверн неверн неверн неверн невероятн невероятн невероятн невероятн невер невер невер невесел невесел невесел невест невест невестк невестк невест невест невест невест невзгод невзнача невзрачн невида невида невида невида невидим невидим невидим невидим невидим невидим невин невин невин невин невин невин невин невин невин невин невин невин невин невиновн невменя невниман невнимательн невнятн невод невозвратим невозвратим невозвратн невозвратн невозвратн невозвратн невозвратн невоздержан невозмож невозможн невозможн невозможн невозможн невозможн невозможн невозможн невозможн невозмутим невозмутим невозмутим невозмутим невозмутим невозмутим невозмутим нев невол невол невол невольн невольн невольн невольн невольн невольн невольн невольн невообразим невоскресш невоспита нев невпопад невред невредим невредим невредим невск невск нев нев невыгодн невыкуплен невынос невыносим невыносим невыносим невыносим невыносим невыносим невыносим невыразим невыразим невыразим невыразим невыразим невыразим невысказа невысок невысок невыстоя невысыха невыторгова нег негада негада негд нег нег негибнущ неглубок неглуп неглуп неглуп нег неговорл негодн негодн негодн негодн негодован негодован негодован негодован негоду негодя негодя негод нег негоциант неграциозн негреющ негреющ негромк негромк негр негуст недавн недавн недавн недавн недавн недавн недавн недавн недавн недавн недалек недалечк недальн недальн недар недвижим недворянин недел недел неделикатн неделикатн неделовит неделов недел недельк недел недел недел недетск недетск недлин недобор недобровольн недобр недоброжелател недоброжелательств недоварен недовар недовер недовер недоверч недоверчив недоверчив недоверчив недоверчив недоверчив недоверчив недоверчив недоверчив недовол недовольн недовольн недовольн недовольн недовольн недовольств недовольств недогадлив недоглядел недогляд недогляд недозрел недоимк недоимк недокончен недолг недоноск недоразумен недор недосказа недосказа недостава недоста недоста недостатк недостатк недостатк недостатк недостаток недостаточн недостижим недостоин недостойн недостойн недостойн недостойн недостойн недостойн недоступн недоступн недоступн недоступн недоступн недоступн недосяга недосягаем недосяга недосяга недотеп недотрог недоумева недоумева недоумев недоумен недоумен недоумен недоумен недоумен недоумен недоумен недоуч недочита недр недр недремлющ недрогнувш недруг недружелюбн недуг недуг недуг недурн недурн не неестествен неестествен неестествен неж нежат нежда нежда нежда нежда нежел неж неженск нежив нежив нежинск неж неж нежливеньк нежн нежн нежн нежн нежн нежн нежн нежн нежн нежн нежн нежн нежност нежност нежност нежн нежн нежн нежн нежн нежн нежн незабвен незабвен незабыва незабыва незабыва независим независим независим независим независим независим независя незадолг незажива незакон незакон незаметн незаметн незаметн незаметн незаметн незапамятн незапамятн незаперт незапятна незаработа незаработа незаслужен незаслужен незастенчив незатейлив незач незван незван незвучн нездешн нездешн нездешн нездешн нездор нездоров нездоров нездоров нездоров нездоров нездоров нездоров нездоров неземн неземн неземн неземн незлобн незна незнак незнакомец незнаком незнаком незнаком незнаком незнаком незнакомств незнаком незнакомц незнаком незнаком незнаком незнаком незнан незнан незнача незначительн незначительн незначительн незр незрим незрим незрим незрим незыблем неизбежн неизбежн неизбежн неизбежн неизбежн неизбежн неизбежн неизбежн неизбежн неизбежн неизбежн неизведа неизвест неизвестн неизвестн неизвестн неизвестн неизвестн неизвестн неизвестн неизвестн неизвестн неизвестн неизвестн неизгладим неизгладим неизлечим неизлечим неизлечим неизмен неизмен неизмен неизмен неизменя неизмерим неизречен неизъясним неизъясним неизъясним неимен неимоверн неимоверн неимоверн неинтересн неинтересн неискусн неисполнен неисправ неисправим неисправим неисправн неистин неистов неистов неистов неистов неистов неистов неистощим неистощим неистребим неистребим неисходн неисходн не нейд нейдет нейдут нейрастен нейт нек нек некогд нек неко некоммерческ нек некончен некончен некормлен некотор некотор некотор некотор некотор некотор некотор некотор некотор некотор некотор некотор некотор некошен некрасив некрасив некрасив некрасив некрасив некраснеющ некрасов некрашен некрепк некрепок некстат нект некуд некуплен нек неладн неласков неласков нелегк нелегк нелегк нелеп нелеп нелеп нелеп нелеп нелеп нелеп нелеп нелеп нелеп нелеп нелжив нелицемерн неловк неловк неловк неловк неловк неловк неловк неловк неловк неловок неложн нельз нельк нелюбим нелюбов нем немал нема немедлен немедлен немедлен немедлен немедлен немеет немезид немел немеркнущ немеркнущ немец немецк немецк немецк немецк немецк немецк немецк немецк немецк немецк немилосердн неминуем неминуем неминуем неминуем неминуч немк немк немк немк немног немног немног немн немножечк немножк нем нем нем немок немолод немолод немолод немолод немолчн нем немот немотств немоч немощ немощен немощен нем немудрен немудр немудрен немудр нем немц немц немц немц немц немц нем нем немыслим немыслим немыт немыт немыт нем ненавидел ненавидел ненавидел ненавидет ненавид ненавид ненавид ненавид ненавид ненавиж ненавист ненавист ненавист ненавистн ненавистн ненавистнича ненавистн ненавистн ненавистн ненавистн ненавистн ненавистн ненавист ненавист ненаглядн ненаглядн ненаглядн ненаглядн ненадежн ненадежн ненадолг ненарок ненарушим ненарушим ненарушим ненастн ненастн ненаст ненасытим ненасытим ненасытн ненасытн ненатуральн ненормальн ненормальн ненужн ненужн ненужн ненужн ненужн ненужн ненужн ненужн ненужн ненужн необдума необдума необита необита необозрим необозрим необозрим необозрим необразова необразова необузда необход необходим необходим необходим необходим необходим необходим необходим необходим необходим необходим необходим необходим необходим необъясним необъясним необъясним необъясним необъятн необъятн необъятн необъятн необъятн необъятн необъятн необъятн необыден необыкновен необыкновен необыкновен необыкновен необыкновен необыкновен необыкновен необыкновен необыкновен необыкновен необыкновен необыкновен необычайн необычайн необычайн необычайн необычайн необычайн необычн неоднократн неоднократн неодобрен неодолим неодушевлен неожида неожидан неожида неожида неожида неожида неожидан неожида неожида неожидан неожида неожида неожида неожида неокончен неокрепш неопас неопасн неописан неописа неописа неописа неопределен неопределен неопределен неопределен неопределен неопределен неопределен неопределен неопровержим неопрят неопрятн неопрятн неопрятн неопытн неопытн неопытн неопытн неопытн неосновательн неоспорим неосторожн неосторожн неосторожн неосторожн неосторожн неосторожн неосторожн неосторожн неостроум неосуществим неосуществим неотвратим неотвратим неотвязн неотвязн неотвязчив неотвязчив неотдела неотеса неоткуд неотлагательн неотлагательн неотлагательн неотлучн неотраз неотразим неотразим неотразим неотразим неотразим неотразим неотразим неотсрочим неотступн неотступн неотступн неотступн неотъемлем неохот неохотн неохот неохот неоценен непарадн неплотн непобедим непобедим непобедим неповин неповоротлив неповоротлив неповторим непогибш непогод неподалек неподвиж неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподвижн неподдельн неподдельн неподкупн неподкупн неподобн неподража неподражаем неподходя непозволительн непоказа непокладн непокойн непокойн непоколебим непоколебим непоколебим непокорн непокрыт неполнот неполн неположен непомерн непомерн непомерн непониман непонят непонятлив непонятлив непонятн непонятн непонятн непонятн непонятн непонятн непонятн непонятн непорядк непорядочн непосвящен непосильн непослушн непосредствен непосредствен непосредствен непостижим непостижим непостижим непот непотребн непохож непохож непочтительн непочтительн неправд неправд неправедн неправильн неправильн неправильн неправильн неправильн неправильн неправлен неправ непредвиден непреклон непреклон непрекраща непреложн непрем непремен непремен непреодолим непрерывн непрерывн непрерывн непрерывн непрерывн непреста непривилегирова непривлекательн непривычк непривычн непривычн непривычн непривычн неприготовлен непризна непризна неприкосновен неприкосновен неприлич неприличн неприличн неприличн неприличн неприличн неприметн неприметн непримирим непринужден непринужден неприступн неприступн неприступн непритворен непритворн непритворн непритворн непричаст непричеса неприязнен неприязн неприятел неприятельск неприятельск неприят неприятн неприятн неприятн неприятн неприятн неприятн неприятн неприятн неприятн неприятн неприятн неприятн неприятн неприятн неприятн неприятн непробудн непробудн непробудн непробудн непробудн непробудн непроглядн непроглядн непродажн непродолжительн непроезж непролазн непроницаем непроницаем непроницаем непроница непроница непростительн непростительн непростительн непростительн непроходим непроч непрошен непрошен непрядв непрядв непутн неравенств неравн неравнодуш нераден нерадостн неразборчив неразвернувш неразвит неразвит неразгада неразговорч неразговорчив неразделен нераздельн неразлучн неразлучн неразреза неразреша неразрешен неразреш неразрешим неразрешим неразрешим неразрыва неразрывн неразрывн неразрывн нерасположен нерасчетлив нерв нерв нервическ нервическ нервическ нервическ нервн нервн нервн нервн нервн нервн нервн нервн нервн нерв нерв нередк нерешен нерешим нерешительн нерешительн нерешительн нерешительн нерешительн нерешительн неровн неровн нерукотворн нерях нес несб несбыточн несбыточн несбыточн несбыточн несбыточн несбыточн несвеж несвеж несвеж несветск несвойствен несвяза несвязн несвязн несгора несдоброва несерьезн несессер несессер несет несет несет несеш нес несказ несказа нескладн нескладн нескладн нескольк нескольк нескольк нескольк несконча несконча несконча несконча нескромн нескромн нескрыва нескрыва нескучн несл неслаб несл несл несл неслиян несл несловоохотл несл неслуша неслыха неслыха неслыха неслыха неслыха неслыха неслыха неслыха неслыха неслышн неслышн несмел несметн несметн несмотр несмыкаем несносн несносн несносн несносн несносн несовершен несовершеннолетн несовершеннолетн несовершенств несоглас несогласим несоглас несомнен несомнен несомнен несомнен несомнен несомнен несообразн несообщител несоразмерн несостоятельн неспешн неспешн неспешн неспешн неспокойн неспокойн неспособн неспособн несправедлив несправедлив несправедлив несправедлив несправедлив несправедлив несправедлив несправедлив несправедлив несправедлив несравнен несравнен нессиметричн нестерп нестерпим нестерпим нестерпим нестерпим нестерпим нестерпим нестерпим нест нестира нестройн нес несут несущ несуществ несущ несущ несущ несущ несч несчаст несчаст несчаст несчаст несчаст несчаст несчаст несчастл несчастлив несчастлив несчастлив несчастн несчастн несчастн несчастненьк несчастн несчастн несчастн несчастн несчастн несчастн несчастн несчастн несчастн несчастн несчастн несчастн несчастн несчаст несчаст несчаст несчаст несчаст несчетн несыт не нет нетактичн нетверд нетверд нетверд нетверд нетверез нетерепен нетерпел нетерпелив нетерпелив нетерпелив нетерпелив нетерпелив нетерпелив нетерпелив нетерпелив нетерпен нетерпен нетерпен нетерпен нетоплен нетороплив нетрезв нетронут нетронут нет нетяжк неуважен неуважительн неуверен неувяда неувяда неувяда неувяза неугодл неугодлив неугодн неугомон неуда неуда неуда неудач неудач неудач неудач неудач неудачн неудачн неудержим неудержим неудержим неудержим неудержим неудобн неудобн неудобн неудобств неудобств неудобств неудобств неудобств неудовлетворен неудовлетворительн неудовольств неудовольств неудовольств неужел неужел неузнава неузнава неузна неуклон неуклюж неуклюж неуклюж неуклюж неуклюж неуклюж неукоснительн неукрашен неукротим неуловим неуловим неуловим неуложен неумел неумел неум неумен неумен неумен неумерен неумерен неумерен неуместн неумира неумн неумол неумолим неумолим неумолим неумолка неумолка неумолчн неумолчн неумыт неумыт неумышлен неумышлен неун неурожа неурожа неурядиц неуследим неуспех неуста неуста неустойк неустраним неустрашим неустройств неусыпн неутешн неутолим неутомим неутомим неутомим неучен неучтив неучтив неуютн нехитр нехожден нехорош нехорош нехорош нехорош нехот нехот нехрист нецеремон нечальн нечая нечая нечаян нечая нечая неч нечеловеческ неч неч нечесан нечесан нечет нечиновн нечист нечистот нечистот нечистот нечист нечищен нечищен нечопорн нечт нечувствительн нечуток нешт нештопа нештопан нешуточн не неяв неярк неярк неясн неясн неясн неясн неясн неясн неясн неясн неясн неясн ни нибуд нив нив нигд нигилизм нигилист нигилист нигилист ниж нижеподписа нижн нижн нижн нижн нижн нижн нижн нижн нижн нижн низа низведен низвел низвод низеньк низеньк низк низк низк низк низк низк низк низк низк низк низк низк низк низмен низок низост низост низш низш низш низш никак никак никак никак никак никак никак никак никак никанор ник никит ник никитич никит никит никит никифоровн никл никнет никогд ник никод никодим никодим никодим никодим нико николавн николаевич николаевич николаевич николаевн николаевн николаевн николаевн николаевск николаевск никола николаич никола николашк николашк никола никол никол ник никт никуд нил ним нима нимб ним нимф нин нин нин нин нин ниоткуд нипоч нискольк ниспосла нисход нисход нисход нит нитк нитк нитк нитк нитк ниток ниточк ниточк нит нит нитян нитян них ниц ницц ницш нич ничегошечк нич нич нич ничк ничт ничтож ничтожеств ничтожеств ничтожн ничтожн ничтожн ничтожн ничтожн ничтожн ничтожн ничтожн ничтожн ничтожн ничтожн ничтожн ничтожн ничут нич нич нич нич нич нич ниш нищ нищ нищ нищ нищ нищенск нищенск нищет нищет нищет нищет нищет нищ нищ нищ нищ нищ нмен нна нно ннх нных но нов нов нов нов нов нов новеньк новеньк новеньк новеньк новехоньк новизн новичк новичк новичк новичок нов нов новодевич нов нов нов нов новорожден новосел новост новост новост новост новост новост нов новоявлен нов нов нов нов нов нов нов нов ног ног ног ног ног ног ног ног ног ноготк ног ногт ногт ногт ног но ноет нож нож нож нож нож нож ножик ножичек ножищ ножк ножк ножк ножниц ножничк нож нол номер номер номинальн нон нор норк норк норм нормал нормальн нормальн нормальн нормальн нормальн нормальн нормальн нормальн норм норм нор норов норов нор нос нос нос нос носик нос нос нос нос нос нос нос носил носильщик нос нос нос нос нос нос носк носк носк носк носк носк носов носок нос нос нос нос нос нос нося нот нот нот нот нотк нотк нотк нот нот нотунг нот ноч ноч ноч ночева ночева ночева ночевыва ноч ноч ночлег ночн ночн ночн ночн ночн ночн ночн ночн ночн ночн ночн ночн ночу ноч ночует ночуеш ноч ноч ноч нош нош нош нош нощ нощн ноют ноябр ноябр ноябр нрав нрав нрав нрав нрав нрав нрав нрав нрав нрав нрав нрав нрав нравоучен нравствен нравствен нравствен нравствен нравствен нравствен нравствен нравствен нравствен нравствен нравствен нравствен нравствен нравствен нрав нрав нрав ну нудн нудн нудн нужд нужда нужда нужд нужда нужда нужд нужд нужд нужд нужд нуж нужн нужн нужн нужн нужн нужн нужн нужн нужн нужн нужн нул нумер нумер нумер нумер нумер нумер нумер нумер нутр ны ны нын нынешн нынешн нынешн нынешн нынешн нынешн нынешн нынешн нынч ньютон ньютонов нюха нюха нюхан нюх нян нян нянек нянечк нян нянч нянч нянч нянч няньк няньк няньк няньк няньк нян нях о оазис об об обагр обагр обагр обанкрут обаян обаян обаян обаян обаян обаятельн обаятельн обаятельн обаятельн обаятельн обвал обвар обвел обвел обветша обвешива обв обвива обвива обвив обв обв обвинен обвинен обвинен обвинен обвинен обвинен обвин обвин обвин обвинительн обвин обвин обвиня обвиня обвиня обвиня обвиня обвиня обвиня обвит обвод обвод обвод обвод обворожительн обворожительн обвяза обвяза обвяза обглода обдава обдадут обда обда обда обда обдела обдела обдергива обдерет обдира обд обдул обдума обдума обдума обдума обдума обдума обдума обдума обдумыва обдумыва обдумыв об обега обег обед обед обеда обеда обеда обеда обеда обеда обеда обеда обед обеда обеда обед обед обеден обедн обедневш обедн обедн обедн обедн обед обед обед обед обежа обеж обездолен обезличива обезображен обезображен обезображ обезобраз обезоружен обезум обезумел обе обе обе оберега оберега оберега оберега обереч обереш обернет обернув обернувш обернул обернул обернул обернул обернул обернут обернут обертк обескураж обеспеч обеспечен обеспечен обеспеч обеспеч обеспоко обеспокоен обеспоко обеспоко обеспоко обеспоко обеспок обеспоко обессилен обессилен обессил обессил обетов обетован обетова обет обеща обеща обеща обеща обеща обеща обеща обеща обеща обещан обещан обещан обещан обещан обещан обещан обеща обеща обещ обещ обжег обжег обжеч обжив обжига обжига обжигательн обжог обжор обива обива обид обид обидевш обидел обидел обидел обидел обидел обидет обидет обидн обидн обидн обидн обидн обидн обидн обид обид обидчив обидчик обидчик обид обид обижа обижа обижа обижа обижа обижа обижа обижа обижа обижа обижа обижа обижа обиж обижен обижен обижен обижен обил обил обил обил обильн обильн обильн обильн обиняк обира обира обира об обита обитател обитател обит обит обит обит обиход обкрадыва облагодетельствова облагодетельствова облагодетельствова облагорожен облагорожен обладател облак облак облак облак облак облак облак облак обласка обласка обласка област област област облачк облачк облегчаем облегча облегча облегча облегча облегча облегча облегч облезл облека облекл облен облен облеп облеплен облетел облеч облива облива облива облива облив облив облигац облизыв облик облик обл облит облит облит обл обл облича облича обличат облича облича облича облича обличен обличен обличен обличен облич облич обличител обличител облич облич облобыза облож облож облож облож облокот облокот облокот облокот облокот облома облома обломк облом обломов обломов обломовк обломовк обломовк обломовк обломовк обломовк обломовск обломовск обломовск обломовск обломовск обломовск обломовск обломовск обломов обломовц обломовц обломовц обломовщин обломов обломов обломов обломок облуп обмакнеш обмакнул обмакнут обма обма обманет обманет обман обманувш обманул обманул обманул обманут обманут обманут обманут обманут обманч обманчив обманщик обманщиц обманыва обманыва обманыва обманыва обманыва обманыва обманыва обманыва обманыва обмахив обм обм обменен обменя обменя обмер обмерива обмерива обмерив обмер обмест обмета обмоет обморок обморок обморок обморок обморок обморок обморок обмундировк обмыва обмыва обмызга обнажа обнажа обнажен обнажен обнажен обнажен обнаж обнаруж обнаруж обнаружен обнаружива обнаружива обнаружив обнаруж обнаруж обнаруж обнаруж обнаруж обнаруж обнаруж обнеряш обнес обнесен обнима обнима обнима обнима обнима обнима обнима обнима обним обним обнимет обн обнов обнов обнов обновк обновлен обновлен обновля обноск обнюха обня обня обня обня обня обня об обобра обовьет обовьют ободк ободра ободра ободрен ободр ободр ободр ободр ободр ободр ободр ободр ободр ободр ободр ободря ободря обо обожа обожа обожа обожан обожан обожа обож обожг обожгл обожгл обожг обожж обожжен обоз обозва обозва обозна обознача обознача обознача обознача обознач обознач обознач обознач обозник обозрет обоз обо обо обо обо об обойдет обойдет обойдеш обойд обойд обойдут обойд обойн обойт обойт обокра обокраст оболочк оболочк обольет обольет обольст обольстительн обольстительн обольстительн обольют обомлел обомлел обопрет оборачива оборачива оборачива оборачива оборачив оборачив оборва оборва оборва оборва оборва оборванец оборва оборва оборванц оборванц оборва оборвет оборвет оборвеш оборв оборвыш обороня оборот оборот оборот оборот оборот оборот оборот оборот оборот оборотн оборотн оборотн оборотн оборот оборот оботрет обошел обошл обошл обошл обошл обошьют обоюдн обоюдн обо обо обрадова обрадова обрадова обрадова обрадова обрадова обрадова обрадова обраду обрад обрад образ образ образ образ образин образ образова образова образова образова образован образован образован образован образован образова образова образова образова образован образован образова образова образова образок образ образ образ образум образум образц образцов образцов образцов образц обрамл обрамл обрамлен обрат обрат обрат обрат обрат обрат обрат обрат обрат обрат обрат обрат обрат обрат обрат обратн обрат обрат обрат обраща обраща обраща обраща обраща обраща обраща обраща обраща обраща обраща обраща обраща обраща обра обра обращ обращ обращен обращен обращен обращен обращен обращен обращен обращен обращен обращ обращ обращ обрееш обреза обрезк обрека обрел обреч обреч обречен обречен обречен обреч обрисовыва обрит обрит обробел обробел оброк оброк оброк оброк оброн обросш оброчн обруб обрублен обрубок обруга обруга обруга обруга обруга обруга обруга обруга обруч обруч обруч обруч обручк обруш обруш обруш обруш обруш обр обрыв обрыва обрыва обрыва обрыв обрыв обрывк обрывк обрывк обрывок обрыв обрюзг обряд обряд обряд обряд обряд обрящет обсажен обсасыв обслуживан обставлен обставлен обста обстанов обстановк обстановк обстановк обстановк обстановк обстоятельн обстоятельн обстоятельн обстоятельн обстоятельств обстоятельств обстоятельств обстоятельств обстоятельств обстоятельств обстоятельств обстоятельств обстоятельств обстрелива обстрижен обстро обструга обступ обступ обступ обсуд обсуд обсуд обсуд обсуд обсужден обсужива обсыплет обтер обтерет обтерпел обтерт обтеса обтира обтира обтир обтирк обтяжк обув обуглен об обуз обусловл обут обут обух обуча обучен об обуя обхват обхват обхват обхват обхватыва обход обход обход обход обход обход обход обход обход обход обход обход обхожден обшарива обшарка обшарка обшива обшир обширн обширн обширн обширн обш обшлаг обшмыга общ общ общ общ общ общ обществ обществ обществен обществен обществен обществен обществен обществен обществен обществ обществ обществ общечеловеческ общ общ общ общ общ объеда объезд объезжа объ объемист объемлет объемл объем объеха объяв объяв объяв объяв объяв объяв объяв объяв объяв объяв объяв объявл объявлен объявл объявля объявля объявля объявля объявля объявля объявл объя объяснен объяснен объяснен объяснен объяснен объяснен объяснен объясн объясн объясн объясн объясн объясн объясним объясн объясн объясн объясн объясн объясн объясн объясн объясн объясня объясня объясня объясня объясня объясня объясня объясня объясн объясн объят объят объят объят объят объят объят объят обывательск обывательск обывательск обыден обыден обыден обыден обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыкновен обыск обыск обыскива обыскива обыскива обыск обыча обыча обыча обыча обычн обычн обычн обычн обычн обычн обычн обыщ обыщ обьят обяжет обяза обяза обяза обязан обязан обязан обязан обязан обяза обяза обяза обязательн обязательн обязательн обязательн обязательн обязательств обязательств обязательств обязательств ова овальн овальн овац овес овечк овеют овея овин овин овладева овладева овладева овладева овладева овладевш овладел овладел овладел овладел овощ овощ овощн овраг овраг овраг овраг овраг овс овс овц овц овц овчинин овчинин огарк огарк огарк огарок огиба огиба огиба огиб огласк оглобл оглобл оглохл оглушен оглушен оглуш оглуш огляд оглядевш оглядел оглядел оглядел оглядет огляд оглядк оглядк оглядыва оглядыва оглядыва оглядыва оглядыва оглядыва оглядыван оглядыва оглядыва оглядыва оглядыв оглядывая оглядыв оглян оглянет оглян оглян оглянувш оглянул оглянул оглянул оглянул оглянут огн огнев огневиц огнев огнев огнев огн огнекрасн огнекрасн огн огнен огнен огнен огнен огнен огн огн огн огн огн огн ог оговор огонек огон огоньк огоньк огоньк огород огород огород огород огород огород огородишк огородниц огород огород огород огорошат огорошива огорош огорча огорч огорчен огорчен огорчен огорчен огорч огорч огорч огорч ограб ограб ограб ограбл ограблен ограблен оград оград оград оград оград оград оград оград ограж ограничен ограничива огранич огранич огранич огранич огрел огромн огромн огромн огромн огромн огромн огромн огромн огромн огромн огромн огромн огромн огромн огрыза огрыза огурец огурц огурц огурц огурчик одар одарен од одева одева одева одева одева одева одева одева одева одева одева одев одевш одеж одежд одежд одежд одежд одежд одежк одежонк одеколон одел одел одел оденет оденеш оден оден оденут одеревенел одерж одесс одесс одет одет одет одет одет одет одет одет одет одет одет одет одея одея одеял одеян один одинак одинаков одинаков одинаков одинаков одинаков одинехоньк одинешеньк одиннадцат одиннадцат одиннадцат одиннадцат одиннадца одинок одинок одинок одинок одинок одинок одинок одинок одиночеств одиночеств одиночеств одиночк одиночн одича одичал одн однажд однак однакож однех одн одн одн одн одн одноважд одновремен одн однозвучн однозвучн однозвучн одноимен одн однократн одн одн однообраз однообраз однообраз однообразн однообразн однообразн однообразн однообразн однообразн однообраз односторон односторон односторон одноцвет одн одн одобрен одобрен одобрительн одобр одобря одолева одолева одолева одолева одолел одолел одолел одолел одолет одолжа одолжен одолжен одолж одонцов одр одр одума одума одурач одуревш одурееш одурен одурманен одурманива одутловат одушевл одушевлен одушевлен одушевлен од одышк одышк ожерел ожесточен ожесточен ожесточен ожива оживет ожив ожив ожив оживлен оживлен оживлен оживлен оживлен оживлен оживлен оживл оживля оживля оживля оживля оживля оживл оживут ожида ожида ожида ожида ожида ожида ожида ожида ожидан ожидан ожидан ожидан ожидан ожида ожида ожида ожид ожидовел ож ожиревш оз озабот озабоч озабочен озабочен озабочен озабочен озабочен озадач озадачен озадач озарен озар озар озар озар озар озар озар озар озаря озаря озем озер озер озер озер озерн озер озер озер оз озим озира озира озира озира озира озиран озир озл озл озлоб озлоблен ознаком ознаменова ознаменова означа означа означа означен озноб озноб озноб озябл озябш озябш озябш о ок окажет оказа оказа оказа оказа оказа оказа оказ оказыва оказыва оказыва оказыва оказыва оказыва оказыва оказыва окаймля окаменеет окаменел окаменел окаменел окаменел окаменя оканчива окарикатур окат окая окая окая окая океа океан океа окидыва окидыв окинет окинеш окинув окинул оклад оклад оклевета оклевета оклеива оклика оклика окликнул окликнул окликнут окн окн окн окн окн окн окн окн ок ок окова окова оков оковыва оковыва околдова околдова околиц окол околотк околотк околоток ок окон окон окон окончан окончан окончан окончательн окончательн окончательн окончательн окончательн окончательн окончательн окончательн окончательн окончательн оконч оконч оконч оконч оконч оконч оконч окопа окорок окорок окостенел окостенел окостенел окоченел окочен окошечк окошк окошк окошк окошк окошк окраин окраск окраш окраш окрест окрест окрест окрестн окрестн окрестн окрестн окрик окрик окровавлен окровавлен окром окром окрошк округл округл округля округля окружа окружа окружа окружа окружа окружа окружа окружа окружа окружа окружа окружа окруж окруж окруж окруж окруж окружн окрыля октав октав октябр октябрьск октябрьск октябр ок окунул окун окупа окута окута ол оледенел оледен оледен оледен ол олен оленьк олечк олешкин ол олимпийск олиф олицетворен олов олух олух олух ольг ольг ольг ольгин ольг ольг ольх ольх олюшк олюшк ол омар омбрельк омерзен омерзен омерзен омерзительн омрача омрача омрачен омрачен омрачен омрач ом омут омут омут омыва ом омыт он он онем онемел онемел онемел онемен онемет онер он онис онисимовн онисимовн онисим он он он он опада опа опален опален опален опал опамятова опар опаса опас опас опасен опасен опасен опасен опасен опаслив опасн опасн опасн опасн опасн опасн опасн опасн опасн опасн опасн опасн опасн опасн опек опек опек опекун опекун опекун опер операц операц опер оперед оперед оперл оперн опер опер оперш опер опечален опечал опечал опечаток опива опира опир описа описа описан описан описан описан описа описа описа описа опис описыва описыва опиш оплака оплакива оплева оплет оплетет оплетеш оплеух оплеух оплодотворя оподл опозда опозда опозда опозда опозда опозда опозорен опозор опозор опомн опомн опомн опомн опомн опомн опомн опор опор опор опосл опочиет опошл оппозиц оппонирова оправда оправда оправда оправдан оправдан оправдан оправдательн оправда оправда оправдыва оправдыва оправдыва оправдыва оправдыва оправдыва оправдыва оправдыва оправдыва оправдыва оправдыва оправдыва оправдыв оправ оправ оправ оправ оправ оправ оправ оправл оправля оправля оправля оправл опрашива опрашива определ определен определен определен определен определен определен определен определен определен определен определен определен определ определ определ определ определительн определ определ определя определ опредлен оприч опроб опроверга опроверга опровергнут опровергнут опрокидыван опрокидыва опрокидыв опрокинул опрокинут опрокинут опрокинут опрокинут опрометчив опромет опрос опротивел опрыскива опрятн опрятн опрятн оптическ опт опубликова опуска опуска опуска опуска опуска опуска опуска опуска опуска опуска опуска опуска опуск опуск опустевш опустевш опустеет опустел опустел опустел опустел опуст опуст опуст опуст опуст опуст опуст опуст опуст опуст опуст опуст опустоша опустоша опустошен опустошен опустошен опуст опута опухол опухол опуш опущ опущен опущен опущен опущен опущен опущ оп опыт опыт опыт опытн опытн опытн опытн опытн опытн опытн опыт опыт опьян опьяневш опьянел опьянен опьянен опьян опя орав оракул ора оранж оранжер оранжере оранжере орар оратор оратор ораторск ораторствова ораторствуеш орб орга организац организм организм организм орган органчик органщик орд орд орд орден орд ореол орет орех орех орех орешк оригинал оригинальн оригинальн оригинальн оригинальн оригинальн оригинальн оригинальн оркестр оркестр орл орл орлин орлин орлиц орл ороб оробевш оробеет оробел оруд оруд оруж оруж орфограф ос осад осад осадк осад осадок осад осажда осанист осанист осанист осанк осанк осведом осведом осведом освежа освежа освежа освежа освежа освеж освеж освеж освеж освеж освеж освет освет освет осветлен освет освеща освеща освеща освещ освещ освещен освещен освещен освобод освобод освобод освобод освобод освобод освобод освобод освобожда освобожда освобожден освят оседа оседла осекл осекл осек осел осел осел осен осен осен осен осен осен осен осен осен осен осеня осеня осеня осен осерд осерча осетр осетр осетрин осетрин осечк осилива осил осил осинов осин осипш оскверн осклаб осколк осколок оскоплен оскорб оскорб оскорб оскорбительн оскорбительн оскорбительн оскорб оскорбл оскорбл оскорблен оскорблен оскорблен оскорблен оскорблен оскорблен оскорблен оскорблен оскорблен оскорблен оскорбл оскорбл оскорбля оскорбля оскорбля оскорбля оскорбля оскорбля оскорбл осл ослаб ослабева ослабева ослабева ослабевш ослабевш ослабел ослабел ослабел ослеп ослеп ослеп ослепительн ослепительн ослепительн ослепительн ослепительн ослепл ослеплен ослеплен ослеплен ослепля ослепля ослепнув ослепнут ослепш ослепш ослепш ослобод осл ослыша осматрива осматрива осматрива осматрива осматрива осматрива осматрива осматрива осматрива осматрив осматрив осмелива осмел осмел осмел осмел осмел осмел осмел осмел осмея осмея осмотр осмотр осмотрел осмотрел осмотрел осмотрел осмотр осмотрет осмотр осмотрительн осмотр осмысл осмысл осмысл осмысл оснежен оснежен оснеж основ основа основан основан основан основан основан основан основа основа основа основа основател основательн основательн основа основн основн основн основ основ основыва основыв особ особ особен особен особен особен особен особен особен особен особен особен особен особен особен особен особен особ особ особ особ особ особ особ особ особ особ особ ос осп оспарива оспарива оспарива осп осрам осрамл остава остава остава остава остава остава остава остава остав остав остав остав остав остав остав остав остав остав оставл оставлен оставлен оставлен оставлен оставл оставля оставля оставля оставля оставля оставля оставля оставля оставл оста оста оста оста остав оставьт остав оста оста оста оста оста оста остальн остальн остальн остальн остальн остальн остальн остальн остальн останавлива останавлива останавлива останавлива останавлива останавлива останавлива останавлива останавлива останавлива останавлива останавлива останавлив останавлив остан останет останет останеш останов останов останов останов останов останов останов останов останов останов останов останов останов останов останов останов останов останов останов остановк остановк остановл останов останов останов остан останут остан останьт остатк остатк остаток оста оста оста оста остервенен остервенен остервенен остервен остерегл остереч остзейск остолбенел остолбенел осторож осторожн осторожн осторожн осторожн осторожн осторожн осторожн осторожн осторожн осторожн осторожн осторожн осторожн острастк остр остр остр остр остров остров остров остров островок островск остров острог острог острог остр остр острожн острожн острот острот острот острот остроум остроум остроум остроум остроумн остроумн остроумн остроумн остроумн остроумн остр остр остр остуд оступа оступ ост остынет осуд осуд осуд осуд осужда осужда осужда осужда осужд осужд осужден осужден осунул осунул осуш осуществ осуществ осуществ осуществ осуществля осчастлив осчастлив осчастливлен осыпа осыпа осыпа осыпа осыпа осыпа осыпан осыпа осыпк осыплет осьмерк от отбива отбивн отб отбира отб отблагодар отблеск отблеск отборн отборн отбрасыва отброс отброс отброс отброшен отваг отваг отваг отважива отважн отважн отважн отвал отвалива отвал отведа отведа отведет отвед отвез отвезл отвезт отвек отвел отвел отверга отвердел отверженец отвержен отверза отверза отверзл отверзш отвернет отвернувш отвернувш отвернул отвернул отвернул отвернут отвертыва отвертыва отвесн отвес отвест ответ ответ ответ ответ ответ ответ ответ ответ ответ ответ ответ ответн ответ ответ ответствен ответствен ответ ответ ответ ответьт отвеча отвеча отвеча отвеча отвеча отвеча отвеча отвеча отвеча отвеч отвеч отвешива отвинч отвлека отвлека отвлека отвлечен отвлечен отвлечен отвлеч отв отвод отвод отвод отвод отвод отвод отвод отвод отвод отвож отворачива отворачива отворачив отворачив отвор отвор отворен отворен отворен отворен отворен отвор отвор отвор отвор отвор отвор отвор отвор отвор отвор отвор отвор отворот отворот отворот отворот отворот отвор отворя отворя отворя отворя отворя отворя отворя отворя отворя отворял отворя отворя отворя отворя отвор отврат отвратительн отвратительн отвратительн отвратительн отвратительн отвратительн отвратительн отвратительн отврат отвращен отвращен отвращен отвращен отвращен отвык отвыкл отвыкн отвычк отвяжеш отвяжут отвяза отвяза отвяза отвяза отгада отгадыван отговарива отговарива отговор отговор отговорк отговорк отголосок отгон отгон отгон отгоня отгоня отгорожен отгуля отдава отдава отдава отдава отдава отдава отдава отдав отдад отда отда отда отда отда отда отда отда отда отдален отдален отдален отдален отдален отдален отдален отдален отдален отдален отда отдал отдаля отд отда отдан отда отдаст отда отда отдаш отда отда отда отда отда отдела отдела отдела отдела отделен отделен отделен отделен отделен отдел отделен отделк отделк отдел отделыва отделыва отделыва отделыва отделыва отделыва отдельн отдельн отдельн отдельн отдельн отдельн отделя отделя отделя отделя отделя отделя отдернул отдернул отдохн отдохнет отдохнеш отдохн отдохн отдохновен отдохн отдохнув отдохнул отдохнул отдохнут отд отдых отдыха отдыха отдыха отдыха отдыха отдыха отдых отдыхив отдыхнувш отдыхнул отдых отдыша отдыша отека отекш отел отел отер отерл отец отеческ отеческ отечеств отечествен отечеств отж отж отж отзвук отзвуч отзовет отз отзыв отзыва отзыва отзыва отзыв отзыв отзыв отзыв отира отира отир откаж откажет откажет откажет откаж откаж откаж откаж отказ отказа отказа отказа отказа отказа отказа отказа отказа отказа отказа отказ отказыва отказыва отказыва отказыва отказыва отказыва отказыва отказыва отказыва откапыва откармлива откашлива откашлянул откидн откидыв откинув откинул откинут откладыва откладыва откладыв откланива откланива откланив откланя откланя откланя отклика отклик отклик откликнул откликнул отклон отклон отколот отколот откомандирова откопа откопа откровен откровен откровен откровен откровеннича откровеннича откровен откровен откровен откровен откровен откровен откровен откровен откровен откровен откроет откроет откр откр откр откроют откроют откр открыва открыва открыва открыва открыва открыва открыва открыва открыва открыва открыва открыва открыва открыв откр откр откр откр откр откр откр открыт открыт открыт открыт открыт открыт открыт открыт открыт открыт открыт открыт открыт открыт открыт открыт открыт открыт откр откр откуд откудов откуп откуп откуп откупор откус откус откуша откушен отлага отлага отлегл отлетевш отлетел отлетел отлет отлив отлива отлива отлива отлича отлича отлича отлича отличат отлича отлича отлич отличительн отлич отличн отличн отличн отличн отличн отличн отличн отличн отличн отлог отлог отложат отложен отлож отлож отлож отлож отлож отлож отлож отлома отломок отлуча отлуч отлуч отлучк отлуч отмалчива отмахива отмахива отмахива отмахив отмахнувш отмахнул отмен отмен отмен отмен отмен отмен отменя отмет отмет отметк отметк отмечен отмечен отмеч отмоеш отм отмыва отмыва отмыва отм отнекива отнекива отнес отнесет отнесл отнесл отнесл отнес отнест отнес отнима отнима отнима отнима отнима отнима отним отнимет отним отним отнимут относ относ относ относительн относительн относ относ относ относ относя отношен отношен отношен отношен отношен отнош отнын отнюд отня отня отня отня отня от отобеда отобеда отобеда отобеда отобеда отобра отобра отовсюд отогна отогна отогрел отогрел отодвига отодвига отодвига отодвиг отодвиг отодвинув отодвинул отодвинул отодвинул отодвинул отодвинут отозва отозва отозва отозва отозва отойдет отойд отойд отойд отойт отолстел отолщен отомкнув отомкнул отомст отоплен отопрет отопрет оторва оторва оторва оторва оторва оторвет отороп оторопевш оторопел отороп отосла отосла отошел отошл отошл отошл отоща отпарива отпеван отп отпер отперл отперл отперт отперт отперт отперт отперт отпечата отпечата отп отпив отп отпира отпира отпира отпира отписа отпихнул отпихнул отпихнут отплясыва отполирова отпор отправ отправ отправ отправ отправ отправ отправ отправ отправ отправ отправ отправ отправ отправ отправ отправл отправлен отправлен отправлен отправлен отправлен отправлен отправл отправл отправля отправля отправля отправля отправля отправля отправля отправля отправля отправля отправля отправля отправля отправл отправ отпразднова отпрыск отпрыск отпрыск отпрыск отпуск отпуска отпуска отпуска отпуска отпуска отпуска отпуска отпуск отпуст отпуст отпуст отпуст отпуст отпущ отпущ отра отрав отрав отрав отрав отрав отрав отрав отрав отрав отравлен отравлен отравлен отравлен отравл отравля отравля отравля отрад отрадн отрадн отража отража отража отраж отражен отражен отраж отраз отраз отраз отраз отрапорт отраст отреза отреза отреза отреза отреза отреза отрезвел отрезв отрезв отрезв отрезвля отрезвля отрезвля отрезвля отрезвля отрезвл отрезк отрека отрека отрека отрекомендова отрекомендова отрекомендова отрек отретирова отречен отречен отреч отреш отрин отрица отрица отрица отрицан отрицател отрицательн отрицательн отрицательн отрица отрица отриц отрод отрод отрочеств отрочеств отрублен отрыва отрыва отрыва отрыва отрыва отрыв отрыв отрывист отрывист отрывк отрывок отрывочн отряд отряс отсвет отсвет отсидел отскакива отскоч отскоч отскоч отскоч отскоч отслуж отслуж отслуж отсохл отсрочива отсроч отстава отстав отставк отставк отставн отставн отставн отста отста отста отста отста отста отста отста отстал отстал отстал отстал отстанет отстан отстанут отстан отстаньт отста отста отстегнул отстоя отстоя отстран отстраня отстран отстран отстряпа отступа отступа отступа отступ отступ отступ отступ отступ отступ отступ отступ отступл отступл отступ отсутств отсутств отсутств отсутств отсутств отсчита отсчитыва отсыла отсыла отсыпн отсюд отта отталкива отталкива отталкива отталкива отталкива отталкива отталкива отталкива отталкива отталкив оттащ оттенк оттенк оттенок оттеня оттеня оттепел оттепел оттер оттерт отт оттолкнет оттолкнеш оттолкнув оттолкнул оттолкнул оттолкнут оттопыр оттрепа оттреплет оттуд оттудов оттянул отуманен отуманива отупен отупет отуплен отуч отуч отхаркив отхаркнул отхват отхлебнул отхлебыв отхлеста отхлеста отхлынет отхлынул отход отход отход отход отходн отход отц отц отц отц отцвел отцветш отцветш отц отц отцовск отцовск отцовск отцовск отц отц отц отчаива отчаива отчаива отчаст отчая отчая отчая отчаян отчаян отчаян отчаян отчаян отчая отчая отчая отчая отчая отчая отчая отчая отчая отчая отчая отчая отчая отчаян отчаян отчаян отчая отч отч отчеканив отчекан отчекан отчекан отчеств отчет отчет отчетлив отчетлив отчет отчет отчет отчизн отчизн отч отчин отчист отчищен отчужден отшатнул отшатнул отшельник отшибл отшумел отщелкив отъезд отъезд отъезд отъезд отъезд отъезжа отъезжа отъеха отъявлен отъявлен отъявлен отыска отыска отыска отыска отыска отыска отыска отыска отыскан отыска отыскива отыскива отыскива отыскива отыскива отыскива отыскив отыщет отыщеш отыщ отягч отяжелевш отяжелел отяжелел офел офицер офицер офицер офицер офицер офицерск офицерск офицер офицер официальн официальн официальн официальн официальн официальн офранцуз ох оха оха оха охан охан охапк ох охват охват охват охват охват охватыва охватыва охватыва охватыва охвач охвачен ох охладевш охлажда охлажда охлажден охлажден охмелел охмел охмеля охнет охнут ох охот охот охотк охотлив охотн охотник охотник охотнич охотн охот охот охот охранен охран охраня охран охрипш охт оценен оцен оцен оцен оцен оценк оцепенеет оцепенел оцепенел оцепенел оцепенен оцепенен оцеп оч очаг оч очарова очарова очарова очарован очарован очарова очарова очарован очарован очарован очаровател очаровательн очаровательн очаровательн очаровательн очаровательн оч очевидн очевидн очевидн очевидн очевидн очевидн очевидн оч очен очеред очеред очерк очертан очертан очертан очерт очерт оч очин очин очин очистительн очист очистк очища очища очищен оч очк очк очк очнет очнет очнеш очн очнувш очнувш очнул очнул очнул очн очнут очумел очут очут очут очут очут очут очут очут ошеломл ошеломлен ошен ошиба ошиба ошиба ошиба ошиба ошиба ошиба ошиба ошиба ошибет ошиб ошибк ошибк ошибк ошибк ошибк ошибл ошибл ошибок ошибочк ошибочн ошибочн ошибочн ошиб ошиб ощер ощетин ощетин ощупа ощупа ощуп ощупыва ощупыв ощуп ощут ощут ощут ощутительн ощут ощуща ощуща ощуща ощуща ощуща ощуща ощущ ощущен ощущен ощущен ощущен ощущен п па пав павел павильон павильон павл павлин павлин павлин павлович павловн павловн павловн павловн павловн павловц павлыч павлыч пагубн пада пада пада пада падал пада пада пада пада пада падеж паден паден паден паден паден паден паден падет падут падуч падчериц падчериц падш падш падш падш падш падш падш паек пазух пазух па пакет пакет пакет пакост пакостн пал пал паладин палат палат палат палат пал палев палев палец пал пал пал палим палим пал палк палк палк палк палладиум палочк палочк пальб пальмерстон пальмерстон пальм пальм пальт пальц пальц пальц пальц пальц пальц пальц пальц пальчик пальчик пальчик паля паля памят памят памятл памятник памятн памятн памя памят пан панам пан панегирик панел пан панихид панихид панихид паническ паническ пан пан панорам панорам панорам пансион пансион пансион пансионерк пансион панталон панталон панталон пантелеевич пантелеевн пантелеич пантер панцыр пап папаш папаш папаш папаш папеньк папеньк папеньк папеньк паперт папильотк папирос папирос папироск папироск папироск папироск папиросочк папиросочк папиросочниц папиросочниц папирос папирос папк папк папочк папочк пап пап пар пар парабол парад парадн парадн парадн парадн парад парализова паралич паралич параллел параллельн параш параш пардон пар парен пар париж париж париж париж парижск парижск парикмахер парик пар пар парк парк парк парк пармезан парн парн парник парнишк парнишк парн парн пар пароксизм пароксизм пар пароход пароход пароход пароход пароход партер парт парт парт парт пар парус парус парч парч пар пас паск пасл пасмурн пасмурн пасмурн паспорт паспорт паспорт паспорт паспорт пассивн паст пастух пастух пастух пастуш пастушк пастыр паст паст пасхальн пасхальн пасх пасх пасх пасьянс патентова патер патетическ патетическ патетическ патетическ патетическ патетическ паток патриархальн пауз пауз паук паук паук паук паутин паутин паутин паутин паф пафос пахар паха пахл пахнет пахнул пахнул пахнут пациент пациент пациент пациент пач пачк пачк пачк пачтен пачул паш пашеньк пашеньк пашеньк пашеньк пашк паш пашут па паяц паяц пе пева пева певец певиц певиц певуч певуч певуч певуч певуч певуч певц певц певч певч певш пегашк пегашк пег педагог педагогическ педагог педант педантизм педантизм педантическ педантическ педантическ педантическ педант пе пейзаж пейзаж пейзаж пейзаж пейзаж пейзажист пейс пейт пекл пекл пекут пекущ пел пел пелаг пелаге пелен пелен пелен пелерин пел пел пен пен пен пен пен пен пен пен пенк пенкин пенкин пен пен пенс пенсион пенсион пенсион пенсионишк пенсион пенс пен пен пен пен пеня пеня пеня пепел пепельниц пепл пепл пер перв перв перв первенец первенств первенств первенств первенц первенц перв первобытн первобытн перв перв первозда перв перв перв первоначальн первоначальн первоначальн первоначальн первоначальн первоначальн первоначальн перв перв перв перв перв перв пергаментн пергамент перебега перебега перебега перебежа перебер переберет перебереш перебер перебер переберут перебива перебива перебива перебива перебиван перебива перебив переб переб переб переб перебира перебира перебира перебира перебира перебир перебит переб переболел переборол перебор перебра перебра перебран перебра перебрасыва перебуд перебыва перебыва перебыва перебьют перевал перевалива перевалив перевал перевар перевар перевар переведет перевез перевезен перевезл перевезут перевел перевел перевел перевел перевернет перевернул перевернул перевернул перевернул перевернул перевернул перевернут перевертел перевертыва перевертыва перевертыв перевес перевест перевест перевив перевод перевод перевод перевод переводн перевод перевод перевод перевод перевод перевож перевоз перевозк перевозк перевоз переворачива переворачива переворачив переворот переворот переворот перевороча перевра перевяза перевяза перевязочн перевяз переглота переглянул переглянул перегнувш переговарива переговор переговор переговор переговор переговор переговор переговор перегора перегородк перегородк перегородк перегородк перед переда передава передава передава передава передава передава передав передад переда переда переда переда переда перед переда передаст переда переда передвиг передела передела передела передела передела переделк переделк переделыва передергив передернул передернут передк передн передн перед передов передов передов перед передразниван передразн передума передума передума передума передума передума передумыва передуш переед переедет переедет переедеш переед переежа переезд переезд переезд переездк переезд переезжа переезжа переезжа переезжа переезжа переезжа переезжа переезжа переезжа переезж перееха перееха перееха перееха перееш пережда пережда пережда пережевыва пережива пережива пережива пережив переживеш пережив переж переж пережит переж перезрел переименова переимчив переинач переинач перейдет перейдет перейдут перейд перейт перекидыва перекинет перекинул перекинут перекладн перекладыва перекладыва переклика перекова перекос перекос перекрест перекрест перекрест перекрест перекрест перекрест перекрест перекрестк перекрестк перекрестк перекрестк перекрестн перекресток перекрест перекрещива перекрещиван перекрещив перекрещ перекуплен перекус перелетн перелетн перелива перелива перелив перел перелистыва перелистыва перелистыва перелистыва перелистыв перелож перелож перелож перел перелом перелома перелома перелом перельет перемелет перем перем перемен перемен перемен перемен перемен перемен перемен перемен перемен перемен перемен перемен перемен перемен перемен перемен перем переменя перемен перемен переменя перемен перемеша перемеша перемеша перемин перемолв перемолол перем перенес перенес перенесен перенес перенесет перенесеш перенесл перенесл перенес перенест перенест перенест перенес перенесут перенос перенос перенос перенос перенос перенос перенос переноч перенош переня перенят переодет перепа перепаха перепел перепел перепелк перепел переписа переписа переписа переписк переписыва переписыван переписыва перепишет перепишет переплет переплет переплет перепл переползан переползан переполнен переполнен переполн переполня переправ переправ переправ переправ перепрода перепрода перепуга перепуга перепуга перепуга перепуга перепуга перепуга перепуга перепута перепута перепута перепута перепьют переработа перераста перерва перерв перерезыва перерод перерожда перерожден перер перерыв перерыв перерыв переряжен пересвет пересекл пересел пересел пересел пересел пересел переселя пересилива пересилива пересилив пересил пересил перескаж пересказа пересказа пересказыва пересказыва пересказыва перескакива перескакива перескоч пересла пересла пересла переслюн пересмешк пересмотр пересохл пересохл переспор переспрашива переспрос переспрос перессор переста перестава перестава перестава перестава перестав перестав переставля переста переста переста переста переста перестан перестанемт перестанет перестанет перестанеш перестан перестанут перестан перестаньт переста переста переста перестрада перестройк перестройк переступа переступа переступа переступ переступ переступ переступ переступ пересуд пересчита пересчита пересчита пересчитыва пересыла пересыла пересыпа перетаска перетаскива перетасовк перетащ перетащ переулк переулк переулк переулк переулк переулк переулк переулок перехват перехват перехитрен переход переход переход переход переход переход переход переход переход переход переходя перехож перечисл перечита перечита перечниц перешагнеш перешагнул перешагнут перешедш перешедш перешел перешепта перешептыва перешива переш перешл перешл перешл перещеголя пер пер перил перин перин перин перин период периодическ периодическ периодическ периодическ периодическ перифер перл перламутр перл пер пер персидск персидск персидск персик персон перспектив перспектив перспектив перспектив перспектив перст перстен перстн перстн перстн перстн перст перст пер перуджийск перуджин перудж перун перц перчатк перчатк перчатк перчатк перчатк перчаток перш перышк пер пер пер пес пес песенк песенк песенк песенник песенник песк песк песк пескин песк песк песн песн песн песн песн песн песн песн песн песн песок пестик пестр пестреют пестр пестр пестр пестрот пестр пеструшк пестр пестр пестряк пестряков пестряков песчан песчан песчан песчинк пет петел петербург петербург петербург петербург петербургск петербургск петербургск петербургск петербургск петербургск петербургск петербургск петербургск петербуржц петл петл петл петлиц петлиц петлиц петл петл петр петр петр петр петрович петрович петрович петрович петрович петровк петровн петровн петровн петровн петровн петровск петровск петроградск петроградск петр петр петруш петрушк петрушк петрушк петрущ петух петух петух петух петушк пет пет пехот пехот пехотн печал печал печа печал печал печал печал печальн печальн печальн печальн печальн печальн печальн печальн печальн печальн печальн печал печал печ печата печата печата печата печат печатн печатн печа печат печен печен печенк печенк печен печен печет печ печк печк печк печк печ пешк пешк пешн пещер пещер пианин пив пива пива пив пив пивц пивц пигмалион пидерит пиджак пиджак пиджак пик пикирова пикирова пикирова пикнут пил пил пил пил пилот пилюл пилюл пион пир пир пирамид пирамид пирамид пир пир пирог пирог пирог пирог пирог пирожк пирожк пирожн пирожн пир пирушк пир писа писа писа писа писа писа писан писа писа писан писан писан писан писар писаришек писаришк писаришк писар писар писател писател писател писател писательниц писательств писател писател писател писател писа пис писец писк пискарин пистолет писц писц писц писц писыва письм письм письм письмен письмен письмен письмен письмен письмец письмец письм письмоводител письмоводител письмоводител письмоводител письм письм пита пита пита пита пита питан пита пита пита пит пит питейн питейн питер питер питер пит питомник питомц питомц пит пит пит пиф пиш пишет пишет пишет пишеш пиш пиш пиш пишут пишут пишущ пищ пища пищат пищ пищ пищик пищик пищик пищик пищик пищ пиявк пла плава плаван плава плава плав плавн плавн плавн плака плака плака плака плака плака плака плака плакал плака плакид плаксив плакун пламен пламен пламен пламен пламен пламен пламен плам план план план план план планет планет планет планет планет планет план планомерн плантатор план план пластинк пластыр плат платеж плат плат плат платил плат плат плат плат плат платк платк платк платк платк платк платок платон платочек платочк плат плат платформ плат плат плат платьиц платьиц платьишк платьишк плат плат плат плат плат плат плач плач плач плачевн плачевн плач плачет плачет плачеш плач плачут плачущ плачущ плачущ плачущ плач плачьт плащ плащ плащ плащ плащ пле плева плеван плевн плевок плед плед плел племен племянник племянник племянниц плен пленительн пленительн плен пленник плен плен плен пленя пленя пленя пленя плеск плескан плесн плеснул плеснул плеснул плест плетен плетен плетет плет плетн плетн плетн плетут плеч плеч плеч плеч плеч плеч плеч плечик плеч плеч плеч плешив плещет плибьет плисов плит плит плит плит плит плит плод плод плод плод плодотворн плодотворн плод плоск плоск плот плотин плотин плотн плотник плотник плотн плотн плотн плотн плотн плотн плот плот плох плох плох плох плох плох плохоньк плох плош площад площадк площадк площадк площадн площадн площад площад площад плуг плут плутоват плутоват плутовк плутовск плутовск плутовск плутовств плут плывет плыв плыл плыл плыл плыт плэд плюгавеньк плю плюнет плюн плюнул плюнут плюн плюс плюх плющ плющ плющ плю плюют пляс пляса пляса пляск пляск плясун плясун пляшет пляш пляшут пни по побагровел побаива побаива побег побег побег побегут побегушк побед побед побед побед побед побед победител победител побед победн победн побед победоносц победоносцев побед побед побежа побежа побежа побежа побежда побежда побежда побежда побежд побежд побежден побежд побеж побелевш побелевш побелел поберег поберег поберег побереч побереч побеспоко побеспоко побива поб побира поблагодар поблагодар поблагодар поблагодар поблагодар побледн побледневш побледневш побледнеет побледнел побледнел побледнел побледнел побледнеют поблек поблекл поблекл поблекл поблекл поблекнет поблекш поблескива поближ поблиз пободр побо побож побож побо побо поб побок побол поболта побольш побор поборол побор побра побран побрезг побр поброд поброса побуд побуд побуд побуд побуд побуд побудьт побужда побужден побыва побыва побыва поб побыстр поб побьет повад повадк повад поважн повал повал повал повал повал повал повал повальн поваля повар повар повар повар повар повар поведа поведен поведен поведен поведен поведет поведеш повед поведут повез повезет повезл повезут повел повел повелева повелел повелен повелен повел повел повелител повелительн повелительн повел повел поверга поверга поверен поверен поверен поверен поверен повержен повержен повер повер повер повер повер повер повер повер повер поверк поверк поверн поверн повернувш повернувш повернул повернул повернул повернул повернул повернут повернут поверстн повертел повертет поверт повертыв поверх поверхностн поверхностн поверхн повер поверьт повер поверя поверя поверя повер поверя повеселевш повесел повеселеет повеселел повеселел повеселел повеселел повесел повесел повесел повес повес повес повес повес повествова повествован повествова повеств повест повестк повестк повестк повестк повест повест повест повесьт пов повечер повешен повея повея повивальн повивальн повида повида повида повилик повин повин повин повинова повинова повин повин повину повин повис повиса повисл повисл повиснет повисш повисш повит повит повихнул повлекл повлекл повлия повнимательн повод повод повод повод повод повод поворачива поворачива поворачива поворачива поворачива поворачив поворачив поворот поворот поворот поворот поворот поворот поворот поворот поворча повостр повред повред повред повред повсеместн повсеместн повстреча повстреча повсюд повторен повторен повторен повтор повтор повтор повтор повтор повтор повтор повтор повтор повтор повтор повтор повторя повторя повторя повторя повторя повторя повторя повторя повторя повторя повторя повторя повтор повыскоч повысосет повыш повышен повяжет повяза повяза повязк повязк повязк поган поган погас погаса погаса погасан погас погас погас погасл погасл погасл погасш погиб погиба погиба погиба погиба погиба погиба погибел погибельн погибел погибл погибл погибл погибнет погибн погибнут погибш погибш погибш поглад поглот поглот поглоща поглощ поглощен поглубж поглум поглуп поглупел поглупет погляд поглядел поглядел поглядел поглядел поглядет погляд погляд погляд погляд погляд поглядыва поглядыва поглядыва поглядыва поглядыв погляж погна погна погнуша поговарива поговарива поговарива поговор поговор поговор поговор поговор поговор поговор поговор погогочут погод погод погод погод погод погод погод погод погод погож погон погон погон погорд погорел погорельц погорельц погорячат погоряч погоряч погост погреб погреб погреба погреба погребальн погребальн погреб погреб погребл погребл погроз погроз погроз погрозн погромч погружа погружа погружа погружа погружа погруж погруж погруж погружен погружен погруз погруз погруз погруз погруз погруз погруз погруз погруз погруз погруст погруст погуб погуб погуб погуб погубл погуля погуля погуля погуля погуля погущ под подава подава подава подава подава подава подав подав подав подавлен подавлен подавлен подавлен подавл подавля подавля подавл подавн подагр подагрик подагр подагр подагр подад подад подадут пода пода пода пода пода пода пода пода пода подальш под пода пода пода пода подарен подарен подар подар подар подар подар подар подарк подарк подарк подарок подар подаст подат пода подач подачк подаш пода пода пода пода подаян подбег подбегут подбежа подбежа подбежа подбер подбива подбира подбира подбира подбира подбира подбир подбит подбит подбородк подбородк подбородок подбочен подброс подва подвал подвальн подвальн подвальн подведет подведеш подвел подвел подвел подверга подверга подверга подверга подверга подверга подвергнут подверн подвернувш подвернул подвернул подвест подвиг подвиг подвига подвига подвига подвига подвиг подвиг подвиг подвиг подвигл подвиг подвиг подвижник подвижн подвинет подвинув подвинул подвинул подвинул подвинул подвинул подвинут подвластн подвод подвод подвод подвод подвод подвод подвож подворотн подвох подвяза подгад подгад подгиба подгиба подгиб подглядел подглядыва подгоня подгорел подгор подгорюн подготовк подготовлен подготовл подготовл подготовля подготовля подгреб подгуля поддава поддава поддад поддад подда поддакива поддакива поддакива поддакива поддакив поддакнул подда подда подда подд поддаст подда поддева поддевк поддел поддела поддела подделыва подделыва подделывател поддержан поддержа поддерж поддержива поддержива поддержива поддержива поддержива поддержива поддержива поддержив поддерж поддержк поддразнива поддразнива поддразнива подействова подействова подействова подействова подействова подейств подела подел подел подел подержа подер подернул подернул подернут подернут подернут подернут подерут подешевл поджа поджат поджег поджига поджига поджида поджида поджида поджида поджима поджим подзадорив подзатыльник подземел подземн подземн подземн подзыва подзыва под подив подив под подкараул подкараульт подкат подкинут подкипят подкладк подкладк подкладк подкладыва подкладыва подкладыв подк подколенк подколодн подкос подкрадет подкрадут подкреп подкрепл подкрепля подкуп подлажив подл подл подл подл подл подл подл подленьк подлец подлец подлец подлец подлец подлец подливк подл подлин подлин подлича подл подл подл подлож подл подлост подлост подл подл подл подл подман подмахнет подмахн подмахнул подмест подмет подмет подметк подметок подмигива подмигива подмигива подмигив подмигнув подмигнул подмонтирова подмыва подмыва подмышк подмышк поднадул поднадут поднес поднест подн поднима поднима поднима поднима поднима поднима поднима поднима поднима поднима поднима поднима поднима поднима поднима поднима подним подним подним поднимет подн поднимут подноготн поднос поднос поднос поднос поднос поднос подня подня подня подня подня подня подня подня подня подня подня подня подн поднят подня подня под подоб подоб подобн подобн подобн подобн подобн подобн подобн подобн подобн подобн подобостраст подобострастнича подобострастн подобра подобра подобра подобра подобра подобра подобр подогнут подогрева подогрет пододвинул пододвинул подожгл подожда подожда подожда подожд подождет подождеш подожд подожд подожд подожм подозва подозрева подозрева подозрева подозрева подозрева подозрева подозрева подозрева подозрева подозрева подозрев подозрен подозрен подозрен подозрен подозрен подозрен подозрен подозрител подозрительн подозрительн подозрительн подозрительн подозрительн подозрительн подозрительн подойдет подойдеш подойд подойд подойд подойт подоконник подоконник подол подолг подол подольст подольща подонк подонкихотствова подопрет подосадова подосла подоспеет подоспел подоткнул подох подошв подошедш подошедш подошедш подошел подошл подошл подошл подошл подпева подпева подпева подпер подперет подп подпира подпир подписа подписа подписа подписа подписа подписа подпис подписк подписк подписк подписыва подписыва подписыва подписыва подписыва подписыва подписыв подп подпис подпишет подпишет подпишеш подпиш подпиш подполза подполз подползл подполковник подпол подпоручик подпруг подпрыгива подпрыгива подпрыгив подпрыгнул подпуска подрагива подража подразделен подразделен подразн подразумева подра подра подра подраста подреза подреза подробн подробн подробнича подробн подробн подробн подробн подробн подробн подробн подробн подробн подробн подрос подросл подруг подруг подруг подруг подруг подруг подруг подруж подруж подряд подряд подряд подрядчик подрядчик подрядчик подряжа подсвечник подсвечник подсвистыва подсека подсекл подсекл подсел подскаж подсказа подсказыва подсказыва подсказыва подсказыв подскакива подскоч подскоч подслеповат подслеп подслужива подслуша подслуша подслуша подслушива подслушива подслушива подслушива подслушива подсматрива подсмеива подсмотрет подсоб подсочинен подстав подстав подстав подставля подставля подставл подстерега подстерега подстерега подстереч подстил подстрекательн подстрелен подстрел подстрел подстрига подступ подступ подсудим подсудим подсудим подсудим подсунул подсунул подталкива подталкива подтверд подтверд подтверд подтверд подтвержда подтвержда подтвержда подтвержда подтвержда подтвержда подтвержда подтвержда подтвержден подтвержден подтверж подтибр подтыка подтянут подув подуван подув подул подума подума подума подума подума подума подума подума подума подума подума подума подума подума подумыва подумыва подурнел подурнел подустро подушек подушечк подушк подушк подушк подушк подушк подхват подхват подхват подхват подхват подхват подхват подхватыва подхватыва подхватыв подхлопыва подход подход подход подход подход подход подход подход подход подходн подход подход подходя подходя подходя подхож подчас подчеркнул подчинен подчинен подчинен подчин подчин подчин подчин подчиня подшиван подшива подшучива подъезд подъезжа подъезжа подъ подъем подъемлют подъемн подъеха подыма подыма подыма подыма подыма подыма подыман подыма подыма подым подым подым подыскива подыша подыш под подьяческ подьяч подьяч подьяч поед поедемт поедет поедет поедеш поединк поединк поед поедут поед поезд поезд поезд поездк поездк поездк поездк поезд поезд поезжа поезжа поел по поест поест поет поет поет поеха поеха поеха поеха поеш пожа пожа пожа пожал пожалеет пожалееш пожал пожал пожалел пожалел пожалел пожа пожалова пожалова пожалова пожалова пожалова пожал пожалуйст пожалуйств пожал пожар пожар пожар пожар пожарищ пожарн пожарн пожар пожар пожар пожат пожат пожа пожела пожела пожела пожела пожела пожелан пожела пожела пожела пожелтевш пожелтел пожелтел пожертвова пожертвова пожертвован пожертвова пожертв пожертвует пожертв пожива пожива пожив пожив поживеш пожил пож пожил пожил пожил пожил пожима пожима пожима пожим пожина пожира пожира пожира пожир пож пожм пожмет позабав позабот позабот позаб позаб позабыт позавидова позавидова позавтрака позавтрака позавтрака позавтрака позавчер позад позаимствова позаня поз позва позва позва позвал позва позва позволен позволен позвол позвол позвол позвол позвол позвол позволительн позволительн позволительн позвол позвол позвольт позвол позволя позволя позволя позвол позволя позволя позволя позволя позвон позвон позвякива поздн поздн поздн поздн поздн поздн поздн поздн поздорова поздорова поздорова поздорова поздоров поздоров поздрав поздрав поздравлен поздравля поздравля поздравля поздравля поздравля поздрав поз позевыв позж позиц позл позна познава позна позна позна познаком познаком познаком познаком познаком познакомл познаком позна позна познан познан познан позначительн позовет позов позов позовут позолот позолот позор позор позорищ позорн позорн позорн позорн позор поз поз поз позыв позьм по поигра по по поимен поиска поиска поиска поиск поистин по по поищ поищ поищ по пойд пойдемт пойдет пойдет пойдеш пойд пойдут пойма пойма пойма пойма пойма пойма пойма пойма пойма пойма пойма пойма пойм поймет поймет поймеш пойм пойм пойм поймут пойт пок покажет покажет покажет покаж покаж покаж покажут покажут показа показа показа показа показа показа показа показа показан показан показан показан показан показа показа показа показыва показыва показыва показыва показыва показыва показыва показыва показыва показыва показыва показыва показыва показыва показыва показыва показыв покамест поката поката поката покат покат покат покат покат покатыва покат покача покача покача покача покачива покачив покачнул покачнул пока покаян покаян покиван покида покида покида покида покида покида покида покид покинет покинеш покин покинув покинул покинут покинут поклад покладыва поклаж поклаж покла поклон поклон поклонен поклонен поклонен поклон поклон поклон поклон поклон поклон поклонник поклонник поклонник поклонник поклонник поклонник поклон поклон поклон поклон поклоня поклоня поклоня поклон поклоня поклон покля покля покля поклян пок поко поко поко поко поко поко поко поко поко поко пок покойн покойн покойн покойник покойник покойник покойник покойник покойник покойниц покойниц покойниц покойн покойн покойн покойн покойн покойн покойн покойн покойн покойн покойн покойн пококетнича поколеба поколеба поколеба поколен поколен поколен поколен поколот поколот покомфортн поконч покончен поконч поконч поконч поконч поконч поконч поконч покор покорен покор покор покорм покорн покорн покорн покорн покорн покорн покорн покорн покорн покорн покорн покорн покорн покорн покорн покороб покороб покороч покорств покоря покоря покоря покос покос пок пок поко поко поко покоя покрадут покраж покраж покрасн покрасневш покраснеет покраснел покраснел покрашен покрепч покрив покрив покрикива покрич покровител покровител покровительств покровительств покровительствова покровительств покровительств покров покр покрупн покрут покр покрыва покрыва покрыва покрыва покрыва покрывал покрыв покр покр покр покр покрыт покрыт покрыт покрыт покрыт покр покрышк покуд покупа покупа покупа покупа покупател покупа покупк покупк покупк покупк покупк покупк покупн покураж покур покус покуша покуша покуша покушен покушен покушен покушен покушен пол пол полага полага полага полага полага полага полага полаг полаком поласков полатв полбутылк полведр полгод полгород полден полдневн полдн полдорог полдюжин пол полев полегоньк полегч поледенел полежа полежа полежа полеж полеж полез полеза полез полезет полезл полезл полезн полезн полезн полезн полезн полезн полезн полезн полезн полезн полезн полезн пол пол пол полен полен полен поленьк поленьк поленьк полет полет полетел полетел полетел полетел полетет полет полет полет полет полечива полечк полечк полечк полечк полеч полжизн полза полза полза полза ползет ползуч ползущ ползущ полз полива полива пол полин полин полин полин полиня полиня полиня полиня полинял полинял полинял полис политик политик политик политик политик политическ политическ политическ политическ политическ политическ политическ полицейск полицейск полицейск полициан полиц полиц полиц полиц полиц полишинел полк полк полкан полк полк полк полковник полковник полковник полковнич полковнич полководц полков полков полкомнат полк полмиллион полминут полн полн полн полн полн полнет полн полноводн полн полн полн полнокровн полн полност полнот полнот полнот полноч полночн полночн полночн полноч полн полн полн полн полн полн полн полн пол половецк половик половин половин половинк половинк половин половин половин половинчат половин полов полов полов половч полов полог полог полож положат полож полож положен положен положен положен положен положен положен положен положен положен положен положен полож полож полож полож полож полож полож полож полож положительн положительн положительн положительн положительн положительн положительн полож полож полож полож полож пол полок пол полома поломойн полон полонен полонск полос полосат полоска полоск полоск полоск полоск полоск полоснет полос полос полост полост полос полотенц полотенц полотн полотн полотн полотнищ полотн полоумн полоумн полоумн полоумн полощет полощут полпивн полпивн полслов полсотн полстака полстраниц полсутк полтинник полтинник полтинник полтин полтор полтор пол полубезумн полубессмыслен полубог полубред полувесел полугод полугор полудворянск полуден полудн полудн полудремот полукруг полумгл полумертв полумрак полумрак полун полуноч полуночн полунощн полуотворен полуотворен полуотворен полуотворен полуоткрыт полупоклон полупрезрен полупрезрен полупросып полупьян полуразвал полуразрушен полуслов полусн полусознан полустрадн полусумасшедш полусумасшедш полутон полутор полуформен получа получа получа получа получа получа получа получа получа получа получас получа получа получ получен получен получен получен получен получен получен получен получен получ получ получ получ получ получ получ получ получимш получиновнич получ получ получ получ получ получш полушепот полуштоф полушубк полушубок полчас полчасик полчищ полшаг пол полым полым полын полын полыхнет польз пользова пользова пользова пользова пользова польз польз пользует пользуеш польз пользу польз польк польк польск польск польск польск польск польст польст польш польш польш польщ пол полюб полюб полюб полюб полюб полюб полюб полюб полюбл полюбопытствова полюб полюс полюс пол поляк поляк поляк поляк пол пол полян поля полярн пол полячишк полячк полячк полячк полячок помадк помад помад помаленьк помал помахив помашет помедл помедл помедл помедл помел помен помен поменьш поменя поменя помер померанцев померанцев померанцев померанц померет померещ померещ померзл померк померка померкл померкл померкш померл помертв помертвевш помертвевш помертвевш помертвевш помертвевш помертвел помертвел померя помест помест помест помест помест поместительн помест помесячн помех помечен помечта помеша помеша помеша помеша помеша помеша помеша помеша помеша помеша помеша помеша помеша помеша помеша помеша помешательств помешательств помешательств помеша помеша помеша помеща помеща помеща помеща помещен помещен помещен помещен помещик помещик помещик помещик помещик помещиц помещич помещ помещьч помил помил помил помим помин помина помина помина помина помина помина поминк поминк поминк поминок помин поминутн поминутн помира помир помир помир помир помир помн помн помн помн помн помн помн помн помн помн помн помн помн помн помн помня помня помня помог помога помога помога помога помога помога помога помог помог помог помогл помогл помогл помог помогут поможет помойн помойн помол помол помол помол помолодел помолодел помолож помолча помолча помолча помолча помолча помолч помолч помол поморщ поморщ помоч помоч помоч помоч помощ помощ помощник помощник помощник помощниц помощниц помощ помощ помо помо помрача помрач помрачен помрачен помрач помрет помудрен помут помутневш помуча помуч помыка помысл помысл помысл помысл помышлен помышлен помышля помягч помянет помян помян помянул помянут помянут помят помя понаведа понадея понадея понадея понадоб понадоб понадоб понадоб понадоб понадобл понадоб понапрасн понасажа понатуж поневол понедельник понедельник понедельник понелеп понемног понемножк понес понес понесет понесет понесл понесл понесл понесут пониж пониж понижен пониз пониз поник поникнув понима понима понима понима понима понима понима понима понима пониман пониман пониман пониман понима понима понима понима понима понима поним понм понов понос поношен поношен поношен поношен понош понрав понрав понрав понрав понрав понрав понуд понужда понука понукан понук понур понын понюха понюха понюха понюха понюха понюха поня поня поня поня поня поня пон понят понят понят понят понят понят понят понятлив понятн понятн понятн понятн понятн понятн понятн понятн понят поня пообеда пообеда пообеда пообеда пообеда пообеда пообеда пообтер поодал поочередн поощря поощря попа попа попада попада попада попада попада попада попада попада попада попада попада попад попадет попадет попадеш попадеш попа попа попа попа попа попа попа попа поп попаст попахива поперек поперемен поперечн поперечн попестр попечен попира попиран пописыва поплавок поплака поплачет поплачут поплевыва поплевыв поплел поплел поплотн поплывут поп попович попозж попойк попол пополз поползет поползл поползновен пополнел пополн пополудн попользова попомн попон попорт попотчева поправ поправ поправ поправ поправ поправим поправ поправ поправ поправк поправк поправк поправл поправля поправля поправля поправля поправл поправочк поправ поправьт поправ попрека попрека попрека попрека попрека попрекнул попрек попридержа попридерж поприщ поприщ поприщин поприщ попробова попробова попробова попроб попроб попроб попрозорлив попрос попрос попрос попрос попрос попрос попрос попрос попрос попрост попрос попрош попуга попудр попуст попутчик попыта попыта попыта попытк попытк попыток попят пор пор поработа поработа порабощен поравня поравня поравня порадова порадова порад поража поража поража пораж пораж поражен поражен поражен поражен поражен поражен поражен пораж поражен пораз пораз пораз пораз пораз поразительн пораз пораньш пораспотрош порастет пор порва порва порва порва порвет пор пореш пореш пореш пореш пореш порица порицан порица поровня поровня порог порог порог порог порог пород пород пород пород порожда порожда порожн пор порок порок порок порок поросенк поросл поросл поросят порох порох порохов порохов порох порох пороч порочн порочн порочн порочн порочн порошк порошк порошок порошочек пор портер портер портер порт порт порт порт портмон портних портних портних портн портн портн портн портн портрет портрет портретик портрет портфел портьер пор порубк поруга поруган поруга поруган поруган порумян поручат поруча поручен поручен поручен поручен поручен поручен поручен поруч поруч поручик поручик поручик поручик поручиков поручик поручик поруч поруч поруч поруч поруч поруч поруч порфир порфир порфир порфир порфир порха порха порхнул порхнул порц порч пор пор порыв порыва порыва порыва порыва порыв порыван порыван порыван порыва порыв порывист порывист порывист порывист порыв порыв порыв пор пор порыска порядк порядк порядк порядк порядк порядк порядок порядочн порядочн порядочн порядочн порядочн порядочн порядочн порядочн порядочн посад посад посад посад посад посад посад посад посад посажен посажен посаж посвата посвата посвата посвеж посвистыв посвят посвят посвяща посвяща посвяща посвящ посвящен пос поседевш посекут посел посел посел посел посел посел пос посередин посет посет посетител посетител посетител посетител посетител посетител посет посет посеч посеща посеща посеща посеща посеща посеща посеща посеща посеща посещен посещен посещен посещен посещен посея посид посидел посидет посид посид посид посид посид посижива посиж посильн посильн посильн посимпатичн посиневш поскака поскачет поскольк поскон поскон поскор поскор посла посла посла посла посла послан посланник посланник посла посла посл последк последн последн последн последн последн последн последн последн последн последн последн последн последн последова последова последова последова последова последовательн последовательн последова последств последств последств последств последств послед послед послед послезавтр послеобеден послеобеден послеобеден пословиц пословиц послуж послуж послуж послуж послуж послуша послуша послуша послуша послуша послуша послуша послуша послуша послуша послуша послуша послушан послушан послушан послуша послуша послушн послушн послушн послушн послушн послушн послушн послыша послыша послыша послыша послыша послышат послыш послыш посматрива посматрив посме посмеет посмеет посмеива посмеива посмел посмел посмет посмеш посмеют посмея посмея посмирн посмотр посмотрел посмотрел посмотрел посмотрел посмотрет посмотр посмотр посмотр посмотр посмотр посмотр посмотр поснима пособ пособ посовест посовест посоветова посоветова посоветова посоветова посовет посовет посолидн посол посоловел посольств посох поспа поспа поспева поспева поспева поспеет поспел поспет поспеша поспеша поспеш поспеш поспеш поспеш поспеш поспеш поспешн поспешн поспешн поспешн поспешн поспеш посп посп посплетнича поспор посред посредин посредник посредник посредств посредств поссор поссор поссор пост постав постав постав постав постав постав постав постав поставл поставлен поставлен поставлен поставля поставц постав поставьт постав постара постара постара постара постара постара постара постаревш постарел постарел постарел постарет постарш постел постел постельк постельк постельн постел постел постепен постепен постепен постепен постепен постиг постига постигнет постигнеш постигнут постила постил постич постла постла постла постла постн постн постн пост постойт постольк пост посторон посторон посторон посторон посторон посторон посторон посторон посторон посторон посторон посторонн посторон постоя постоя постоя постоя постоял постоя постоя постоя постоя постоя постоя постоя постоя постоя постоя постоянств постоя пострада пострада пострада постраща пострел постреленок пострич построек постро построен построен построж постро постро постро постро постройк постройк постройк постройк пост постукива постукиван постукива постукива поступа поступа поступа поступа поступа поступа поступ поступ поступ поступ поступ поступ поступ поступ поступк поступк поступк поступк поступк поступк поступк поступк поступк поступл поступок поступ поступ постуча постуча постуча постуча постуч постуч постыд постыд постыд постыдн постыл пост постыл постыл посуд посудинк посудинк посуд посуд посуд посуд посует посуществен посыла посыла посыла посыла посыла посыла посыла посыла посыла посыла посыла посылк посыльн посыпа посяга пот пот потаен потанчик потапыч потаска потаска потаскун потаскун поташ потащ потащ потащ потащ пот потекл потекл потекут потемневш потемневш потемн потемнел потемок потепл потер потер потер потер потерпел потерп потерт потерт потер потер потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потеря потесн потех потеш потеш потеш потешник потешн потира потира потиран потир потихоньк потиш поток поток поток поток потолк потолк потолков потолк потолку потолок потолоч потолстеет пот потом потомк потомк потомок потомств пот потонет потонул потонут потонут потораплива поторгова потороп потоскова потрат потраф потрач потреблен потребн потребн потребн потребн потребн потребова потребова потребова потребова потребова потреб потреб потреб потребуеш потреб потревож потрепа потрепа потрепа потрепа потрепещут потрет потрогив потрох потрох потруд потруд потрут потряс потряса потряса потряса потряса потряс потряс потрясен потрясен потрясл потрясл пот потуг потуп потуп потуп потуп потуп потуплен потупля потупл потуп потуп потускл потускнел потух потуха потухш потухш потухш потуш потуш потуш потчеван потчеван потчева потч потягива потягива потягив потянет потян потянул потянул потянул потянул поубав поубра поудал поужина поужина поужина поумн поумнича поутр поуча поучен поучительн поуч пофанфарон пофилософствова пофилософствова пофилософству пофилософствуеш пофорс похажив похва похва похвал похвал похвал похвал похвал похвал похвал похвальб похвальн похвальн похвальн похваста похваста похва похит похитител похища похищен похлопота похлопоч похлопыв похмел похмел похмел поход поход поход поход поход поход поход поход поход походимт поход поход поход походк походк походк походк походн поход поход похож похож похож похожден похож похож похож похож похож похож похож похож похож похож похозяйнича похолод похолодел похолодел похолодел похолодел похорон похорон похорон похорон похорон похорон похорон похорон похорон похорон похорон похорон похорон похорон похорон похорошел похот похотлив похудевш похудевш похудееш похудел похудел похудел похудел похуж поцелова поцелова поцелова поцелова поцелу поцелу поцелу поцелу поцел поцелу поцел поцел поцел поцелу поцелу почавка почат почащ почв почв почв почв почел поч поч почерк почерневш почернел почернел почерпа почерпнеш почерпнут почерпнут почеса почест почест почесыва почесыв почетн почетн почетн почетн почет почива почива почива почива почива почиет поч почин починива почин почин починк починков починк почин почист почита почита почита почита почита почитател почита почита почищ почищ почиющ почл почмока почт почт почтальон почтальон почтамт почт почтен почтен почтен почтен почтен почтен почтен почтен почтен почтен почтен почтен почтен почтен почт почт почтительн почтительн почтительн почтительн почтительн почтительн почтительн почтительн почт почтмейстер почт почтов почтов почтов почтов почтов почтов почт почт почт почувствова почувствова почувствова почувствова почувств почуд почуд почуд почуд почу пошалива пошар пошар пошатнет пошатнул пошатыва пошатыв пошевел пошевелива пошевел пошевел пошевел пошевел пошевел пошевел пошевел пошевел пошевел пошел пошепта пошепчут пошл пошл пошл пошл пошленьк пошленьк пошленьк пошлет пошлеш пошл пошл пошл пошл пошл пошл пошлост пошлост пошлост пошл пошл пошл пошл пошляк пошляк пошляк пошляческ пошт пошт пошут пошут пошут пощад пощад пощад пощад пощекота пощекота пощелка пощелкива пощупа поэз поэз поэз поэм поэм поэм поэт поэт поэт поэтическ поэтическ поэтическ поэтическ поэтическ поэтическ поэтическ поэтическ поэтичн поэтичн поэт поэт поэт по поют поющ появ появ появ появ появ появлен появлен появлен появлен появля появля появля появля появля поярч пояс пояс пояс поясн пояснен поясн пояс по прав прав прав прав прав прав правд правд правдив правд правдоподоб правдоподобн правдоподобн правд правд правд прав праведн праведн праведн прав правел прав прав правил правил прав правильн правильн правильн правильн правильн правильн правильн правильн правильн прав правительств правительств правител правлен правнук прав правовед прав прав прав прав православн православн православн православн правосуд прав прав прав прав прав прав прав прадед прадед прадедушк празд праздн праздник праздник праздник праздник праздник праздник праздник праздник праздник праздничн праздничн праздничн праздничн праздн праздн праздн праздн празднолюбц праздн праздн праздност праздност праздност праздн праздн праздн празелен практик практик практик практикова практик практическ практическ практическ практическ практическ практическ практическ праотц праотц прасков прасков прасков прах прах прах прах прачк прачк преавенантненьк преблагополучн пребудет пребуд пребыва пребыван пребыв превесел превозмога превозмоч превознос превознос превосходительств превосходительств превосходительств превосходн превосходн превосходн превосходн превосходн превосходн превосходн превосходств преврат преврат преврат преврат преврат превратн превраща превраща превраща превраща превраща превращен превыша превыша преглуп преград преград преград преград преград пред предава предава предава преда преда пред пред преда преда предан предан предан предан предан преда предан предан предан преда преда преда предан предан предаст предател предательств преда преда предварительн предварительн предварительн предвестник предвест предвечн предвеща предвзят предвзят предвидел предвидел предвидел предвидел предвиден предвиден предвидет предвид предвид предвид предвиж предводител предел предел предел предзнаменован предислов предк предлага предлага предлага предлага предлага предлага предлага предлага предлага предлага предлага предлага предлаг предлог предлог предлог предлог предлог предлог предложат предлож предложен предложен предложен предложен предложен предложен предложен предлож предлож предлож предлож предлож предлож предмест предмет предмет предмет предмет предмет предмет предмет предмет предмет предназнач предназначен предназначен преднамерен преднамерен пред предопределен предопределен предорог предостав предостав предоставля предоставля предостерега предостерега предостережен предостережен предостережен предостереч предосторожн предотвращен предохраня предписа предписан предписа предписа предписа предписыва предполага предполага предполага предполага предполага предполага предполага предполага предполага предполага предполага предполаг предположен предположен предположен предположен предполож предполож предполож предполож предполож предположительн предполож предпосла предпочел предпочита предпочит предпочл предпочтен предпочтет предпримет предпринима предпринима предпринима предприня предприят предприят предприят предприят предприят предприят предрассудк предрассудк предрассудк предрассудк предрассудк предрассудк предрассудок предрассудочн предрассудочн предрек председател председател председательств председател предсказа предсказан предсказыв предсмертн предсмертн представа представ представ представ представ представ представ представ представительниц представ представ представ представлен представлен представлен представл представля представля представля представля представля представля представля представля представля представля представля представля представля представля представл представ представьт представ предста предстанет предста предсто предстоя предстоя предстоя предстоя предстоя предстоя предстоя предстоя предстоя предубежден предубежден предубежден предуведом предуведомл предугада предугада предугадыва предуготовительн предузна предупред предупред предупред предупредительн предупред предупрежда предупрежда предупрежд предусмотрел предусмотрительн предчувств предчувств предчувств предчувств предчувств предчувств предчувствова предчувствова предчувствова предчувствова предчувствова предчувствова предчувствова предчувств предчувств предчувств предчувству предшественник предшествова предшествова предъявля предыдущ предыдущ предыдущ предыдущ предыдущ прежд преждевремен преждевремен прежн прежн прежн прежн прежн прежн прежн прежн прежн прежн прежн прежн президент презира презира презира презира презира презира презира презира презира презира презира презир презревш презрен презрен презрен презрен презрен презрен презрен презрен презрительн презрительн презрительн презрительн презрительн презрительн преимуществ преимуществ преимуществ преимуществен преимуществ преимуществ преимуществ преисподн преисполн преисполн преклон преклон преклон преклон преклон преклоня прекомическ прекослов прекрас прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрасн прекрат прекрат прекрат прекрат прекраща прекраща прел прелест прелестн прелестн прелестн прелестн прелестн прелест прелест прелом преломля прельст прельща прелюбопытн прем премудр премудрств премудр премудр пренаивн пренебрег пренебрега пренебрега пренебрег пренебрежен пренебрежен пренебрежен преоблада преоблада преобража преобраз преобраз преобразован преогромн преодолел преодолет преподаван препода препода препода препочтен препровожда препровож препьян препятств препятств препятств препятств препятств препятствова препятств прерва прерва прерва прерва прерва прерва прерва прер прерыва прерыва прерыва прерыва прерыва прерыва прерыва прерыва прерыва прерыв прерывист прерывист пресвят пресекл пресекл пресек пресекш пресин прескладн преследова преследова преследован преследова преслед преслед преслед пресмыка преспокойн пресс преставл престарел престарел престарел престол престра преступа преступа преступлен преступлен преступлен преступлен преступлен преступлен преступлен преступленьиц преступлен преступн преступн преступник преступник преступник преступник преступник преступник преступник преступник преступниц преступн преступн преступн преступн претендует претенд претенду претенз претенз претенз претенз претенз претерпел прет преткновен преткнул претрудн прет преувеличен преувеличен преувеличен преувеличен преувелич преувелич преувеличива преувеличива преувеличива преувелич преувелич преувелич преумножа преуспеян преуспеян прехитр прехорошеньк пречистенк прещ при прибав прибав прибав прибав прибав прибав прибав прибав прибав прибав прибавк прибавк прибавл прибавлен прибавлен прибавлен прибавл прибавл прибавля прибавля прибавля прибавля прибавля прибавля прибавля прибавля прибавочн прибав прибавьт прибега прибега прибега прибег прибегнул прибежа прибежа прибежа прибежа прибеж приберегл приберет прибереш приб приб приб прибил прибира прибира прибира прибира прибит приближа приближа приближа приближа приближа приближ приближен приблиз приблиз приблиз приблизительн приб прибор прибо прибра прибра прибра прибра прибрежн прибреж прибудет прибудут приб прибыва приб приб приб прибыт приб прибьет прибьют привал приватн приватн приведен привед приведет привед привед приведут привез привезет привез привезл привезл привезт привезт привез привезут привел привел привел привел привел привес привест привет приветлив приветлив приветн приветств приветств приветств приветств привеча привеш привиден привиден привиден прив привилег привилег прив привлек привлека привлека привлекательн привлека привлека привлекл привлеч привод привод привод привод привод привод привоз привоз привол привол привол привскоч привскоч привста привста привста привста привстанет привык привыка привыка привыкл привыкл привыкнет привыкнет привыкнеш привыкн привыкнут привыкш привыкш привычек привычк привычк привычк привычк привычк привычк привычк привычн привычн привычн привычн привычн привычн привяж привяжут привяза привяза привяза привяза привяза привязан привязан привязан привязан привяза привяза привязк привязчив привязыва пригвожд приглажива приглас приглас приглас приглас приглас приглас приглаша приглаша приглаша приглаша приглаша приглаша приглаша приглашал приглаша приглаша приглаша приглаш приглаш приглашен приглашен приглашен приглашен приглаш приглашен приглядет пригляд приглядыва приглядыва приглядыва приглядыв приглядыв приглянут пригнул пригнул пригнул пригнул приговарива приговарива приговарив приговарнва приговор приговор приговорен приговорен приговор приговорк приговор приговор пригод пригод пригод пригодн пригод пригорк пригорк пригорк пригорок пригорюн пригорюн приготов приготов приготов приготов приготов приготов приготов приготовл приготовлен приготовлен приготовлен приготовлен приготовлен приготовлен приготовл приготовл приготовля приготовля приготовля приготовля приготовля приготовл приготов пригрез пригроз придава придава придава придава придава придав придав придав придав придав придавл придавлив прида прида прид прида придан придан придаст придаток прида придач придвиг придвинул придвинул придворн прид придержа придержа придержа придержива придержива придержива придержива придержива придержива придержив придержив придет придет придет придеш прид придира придира придирк придирч прид придорожн прид придума придума придума придума придума придума придума придума придумыва придумыва придумыва придумыв придут придут прид приед приедет приедет приедеш приед приедут приезд приезд приезд приезд приезд приезжа приезжа приезжа приезжа приезжа приезжа приезжа приезжа приезжа приезжа приезж приезж приезж приезж приезж при прием прием прием прием приемлеш приемл приемн приемн приемн прием при прием приеха приеха приеха приеха приеха приеха прижа прижа прижа прижа прижа прижа прижат прижат прижива приживал приж прижима прижима прижима прижима прижима прижим прижим прижмеш прижм прижм призадума призва призван призван призван призван призва призва призна признава признава признава признава признава признава признава признава признав признав призна призна призна призна призна признак признак признак признак признак признак признак признак призна призна призна призна признан признан признан признан призна призна признан признательн признательн призна призна призна призна призовет призовеш призрак призрак призрак призрак призрак призрак призрак призрачн призрен призрен приз призыва призыва призывающ призыва призыв призыв призывн призывн приидет приид приид прииска приискан прииска приискива приищет прийт прикажет прикажет прикажеш прикаж приказ приказа приказа приказа приказа приказан приказан приказан приказан приказан приказан приказа приказа приказн приказчик приказчик приказчик приказчик приказыва приказыва приказыва приказыва приказыва приказыва приказыва приказыва прикаплива прикаса прикас прикидыва прикинеш прикинул прикинут прикладыва прикладыва прикладыв прикле приклон приключа приключен приключен прикова приковыва приколот прикомандирова прикосновен прикосновен прикоснул прикоснул прикоснул прикоснут прикрас прикрасн прикреп прикрут прикр прикрыва прикрыва прикрыва прикрыв прикр прикр прикр прикрыт прикрыт прикрыт прикрыт прикрыт прикрыт прикр прикуп прикус прилага прилад прилад прилажен прилажива прилажива приласка приласка приласка прилег прилега прилежан прилежн прилежн прилеп прилеп прилеп прилеплива прилепля прилет прилет прилеч прил прилив прилива прилив прилив прилив прилив прилип прилипчив прилипш прилич прилич прилич прилич приличн приличненьк приличн приличн приличн приличн приличн приличн приличн приличн приличн приличн прилож приложат прилож приложен прилож прилож прилож прилож прилож прилуков прильнет прильнув прильнул прильнул прильнут прильпн приляг приляжет приманива примачива примелька прим примен примен примен применя пример пример пример примерива примерив пример пример примерн пример пример пример примерчик примерчик пример примеря примеря примес примет примет примет примет примет примет примет примет примет приметн приметн приметн приметн приметн приметн приметн примет примет примет примеча примечательн примеча примешива примешива примешива прим примирен примирен примирительн примирительн примирительн примир примир прим примолв примолв примолк примолкл примолкл примочк примочк прим примут примут примыка примят принагнувш принадлежа принадлежа принадлежа принадлежа принадлежа принадлежа принадлежа принадлежа принадлежат принадлежа принадлежа принадлежа принадлеж принадлеж принадлежн принадлежн принадлежн принадлеж принаряд принес принесен принесен принесен принесен принесен принес принес принесет принесет принесеш принес принес принесл принесл принесл принест принес принесут прин принижа принижен принижен принижен принижен приниж приник приникл приникш принима принима принима принима принима принима принима принима принима принима принима принима принима принима принима принима принима принима принима принима приним приним принос принос принос принос приносим принос принос принос приношен принош принуд принуд принуд принуд принужда принужд принужд принужден принужден принужден принц принцип принцип принципиальн принцип принцип приня приня приня приня приня приня приня приня приня прин принят принят принят принят принят принят приня приня приобрел приобрел приобрест приобрест приобрета приобрета приобрета приобрета приобрета приобрета приобрета приобрета приобрет приобретен приобретен приобретен приобретен приобща приодел приостанов приостанов приотвор приотворен приотвор приотвор приоткрыт припа припада припад припад припадк припадк припадк припадк припадк припадк припадок припадочек припадочн припадочн припа припа припас припас припас припас припас припахива припев припеваюч припер приписа приписа приписыва приписыва приписыва приписыва приплюснут приподнима приподнима приподнима приподним приподним приподня приподня приподня приподня приподня приподня приподня приподня приподнят приподня приподня припомаж припомина припомина припомина припомина припомина припомина припомин припомн припомн припомн припомн припомн припомн припомн припомн припомн припомн припомн припрыгнул припрята припрята припрята припуска припута припухл припухш припухш припухш припухш припухш прираста приращен приревн природ природ природн природн природн природн природ природ природ прирожден прирос приросл приросл прируч присвата присватыва присвистнул присво прис приседа приседа приседа присед присел присел присест прискорб прискорб присла присла присла присла присла присла присла прислонен прислон прислон прислон прислон прислуг прислуг прислуг прислуг прислужива прислужива прислужниц прислуша прислуша прислуша прислушива прислушива прислушива прислушива прислушива прислушива прислушива прислушив присматрива присмиреет присмирел присмирел присмирел присмотр присмотр присн присн присн присн присн присн присовокуп присочин приспел приспособ приспособ приспособ приспособл приста пристав пристава пристава пристава приставан пристава пристав пристав приставл приставлен пристав приста приста приста приста приста пристальн пристальн пристальн пристальн пристальн пристальн пристальн пристальн пристанет пристан пристанищ пристан приста пристрастн пристро пристро пристро пристро пристройк пристукнул приступ приступ приступ приступ пристяжн присужд присужд присутствен присутствен присутствен присутствен присутствен присутствен присутств присутств присутств присутств присутствова присутствова присутствова присутствова присутствова присутств присутств присутств присыла присыла присыла присыла присыла присылк присягнул присяг присядет присяд присяд присядьт присяжн прита прита прита прита притащ притащ притащ притащ прит притвор притвор притвор притвор притвор притвор притвор притвор притвор притворн притворн притворств притворств притворщик притворя притворя притворя притворя притвор прит притихл притихл притихнет прит притон приторн приторн притрогива притуп притуплен притуш притягива притягив притязан притянут приудар приумножа приун приуч приуч приуч прихлопнут прихлынувш прихлынул приход приход приход приход приход приход приход приход приход приход приход приход приход приход приход приход приход приход приход приход приход приходя прихож прихож прихож прихож прихот прихот прихотлив прихотлив прихотлив прихрамыва прицелива прицеп прицеп прицеп причал причаст причастн причащен прич причеса причеса причеса причеса причеса причеса причеса причеса прическ прическ прическ прическ причесыва причесыва причешут причин причин причин причин причин причин причин причин причислен причисл причисля причита причита причита причитан причита причтет причуд причуд пришедш пришедш пришедш пришедш пришел пришел пришива пришив приш приш пришл пришл пришлет пришл пришл пришл пришл пришл пришл пришлют пришол пришпор прищелкива прищур прищур прищур прищур прищур приют приют приют приют приятел приятел приятел приятел приятельниц приятельниц приятельниц приятельск приятел приятел приятел приятел приятел прият приятн приятн приятн приятн приятн приятн приятн приятн приятн приятност приятн приятн приятн приятн приятн приятн приятн про пробавля пробавля пробалтыва пробега пробега пробега пробега пробег пробежа пробежа пробежа пробежа пробеж пробива пробива пробива пробива пробива проб проб проб проб пробира пробира пробира пробира пробир проб проб пробк проблеск проблеск пробова пробова пробова пробо пробок проболта проболта пробор пробормота пробормота пробормота пробор пробочк пробра пробра пробра проб пробудет пробуд пробуд пробуд пробуд пробуд пробуд пробужда пробужда пробужда пробужда пробужден пробужден пробужден пробу проб проб проб провал провалива провал провал провал прова провал провал провал проваля провал проведа провед провед проведен проведен проведен проведет провед провед провезут провел провел провер провер провесн провест проветр провиантск провиантск провиантск провидел провиден провиден провиден провид провиж провиз провиз провинциалк провинциальн провинциальн провинц провинц провод провод провод провод провод провод провод провод проводим провод провод провод провод провожа провожа провожа провожа провожа провожа провожа провожа провожа провожа провож провожден провож провозглас провоз провоз провоз проволок провор проворкова проворн проворн проворн проворн проворон проворств проворча проворча проворчат провра провспомина проглот проглот проглоч проглядел проглядыва проглядыва проглянул прогна прогна прогна прогнал прогна прогнева проговарива проговарива проговарив проговор проговор проговор проговор проговор проговор проговор проговор проговор проговор проголода прогон прогон прогон прогон прогоня прогон прогоня программ программ программ прогресс прогресс прогрессивн прогрессивн прогрессивн прогрессист прогрессист прогрессист прогресс прогресс прогулива прогулив прогулк прогулк прогулк прогулк прогулок прогуля прогуля прогуля прода продава продава продава продава продава продавец продав продавлен продавц продадут прода прода прода прода продаж продаж продажн продаж прода прода прода прода прода прода прода продаст прода продаш прода прода продева продева продева продев проделк проделк продержа продикт продикт продира продл продл продл продовольств продовольств продолговат продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолжа продолж продолж продолжен продолжен продолжен продолжен продолж продолж продолжительн продолжительн продолжительн продолжительн продолжительн продолж продра продра продувн продувн продувн продукт продукт продукт продума продума проедет проед проезд проезд проезжа проезжа проезжа проезжа проезжа проезж проезж проект проект проел проеха проеха проеха проеха прожда прожег прожектор прожж прожжет прожива прожива прожива прожива прожива прожива прожива прожива прожив проживет проживет проживеш прожив проживут прож прож прож прожит прожит прожиток прожит прож прожит прожорлив прожужж прозва прозва прозван прозвенел прозвуча прозвуча прозвуч проз прозира прозорл прозорлив прозор прозоров прозоров прозрачн прозрачн прозрачн прозрачн прозрачн прозрева прозрен прозрет прозяба прозяба проигра проигра проигра проигра проигра проигрыш произвед произвед произведен произведен произведен произведен произведен произведен произведен произвед произведет произвел произвел произвел произвел произвест производ производ производ производ производ производ производительн производ производ производ произвол произвол произвольн произнес произнесен произнес произнес произнесл произнест произнос произносим произнос произнос произн произнос произойдет произойт произошел произошл произошл произошл происход происход происход происход происход происход происход происход происходя происхожден происхожден происшедш происшедш происшеств происшеств происшеств происшеств происшеств пройд пройдемт пройден пройдет пройдет пройдет пройдеш пройд пройд пройдох пройдох пройд пройд пройдут пройд пройд пройт проказ проказ проказ проказ проказник прокарка прокат прокатимт прокат прокат проклина проклина проклина проклина проклина прокля проклянеш прокляст прокл проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проклят проконсул прокорм прокорм прокормл прокоф прокофьич прокофьич прокофьич прокрича прокрича прок прокурор пролегл пролежа пролежа пролежа пролежа пролеж пролез пролеза пролеза пролез пролезт прол пролета пролета пролетар пролетар пролетевш пролетевш пролетел пролетел пролет пролетк прол пролива пролива пролив проливн прол прол пролит пролит пролит пролог пролог проложат пролож пролож пролюб промалчива пром промах промахнул промах промедлен промедл промеж промежутк промежутк промежуток промежуточн промелькнет промелькнувш промелькнул промелькнул променя променя променя променя промете промозгл промозол промокш промолв промолв промолв промолв промолча промолча промоч промуч промысл промысл промыча промыч промышленник промышленник промышленник промышля промышля промышля промямл пронес пронесет пронесет пронесл пронесл пронесл пронесл пронес пронес пронза пронза пронза пронза пронзен пронзен пронзен пронз пронз пронз пронз пронзил пронзительн пронзительн пронзительн прониза пронизыва проник проника проника проника проникл проникнет проникнув проникнут проникнут проникнут проникнут проник проникш проница проницательн проницательн проницательн проницательн проница пронос пронос пронос пронос пронося пронюха пронюха пропаганд пропаганд пропагандирова пропагандн пропада пропада пропада пропада пропада пропада пропада пропада пропада пропада пропадет пропадет пропадеш пропад пропадут пропа пропа пропа пропа пропаст пропаст пропаст пропаст пропаст пропаст пропа пропа проп пропека пропел пропел пропел пропеллер пропет пропива проп прописа прописа прописыва пропита пропита пропита пропита проплака проплывет пропл пропл проповедова проповедова проповед проповед пропоет проползет пропуска пропуска пропуска пропуск пропуст пропуст пропуст пропуст пропуст пропуст пропуст пропущен пропылен пропьет пропят прорв прорва прорва проревел прорезыва прорепетирова прорех прорех прорех пророк пророк пророк пророн пророн пророн пророн пророчат пророческ пророческ пророческ пророчеств пророчеств пророч проруб прорыва прорыв прорыва прорыв просвата просвет просвет просветлевш просветлеет просветлел просветлен просвечива просвещен просвещен просвещен просвещен просвещен просвещен просвирн просеван просед просеет проселк проселок просид просидел просидел просидел просидет просид просижива просижива прос прос прос прос прос просил прос просим просипел прос прос просител просител просительн прос прос прос прос просия проскака проскакива просквоз проскользнет проскользнул проскользнул проскользнут проскрежета проскуча прослав прославлен прослед проследова прослез прослез прослез прослуж прослуж прослуж прослуша прослуша прослуша прослыша прослыша прослыша просмотрел просмотрет просмотр проснет проснет проснеш просн просн проснувш проснувш проснувш проснувш проснул проснул проснул просн проснут прос проспа проспа проспа проспект проспект проспект проспект просрочен просрочен просрочен просроч прост прост простаков прост простегива прост прост прост простенк простеньк простер простерл простерт простерт простерш прост прост прост прост прост прост прост прост простира простира простира простир прост прост простительн проститутк прост прост прост прост простоват простоват простоват простоват простоволос простоволос прост простодуш простодуш простодуш простодушн простодушн простодушн простодушн прост прост простокваш простокваш простолюдин прост простона простона простонародн простонарод простонет простор простор простор просторн просторн просторн просторн просторн простор простот простот простот простот простот простот прост простоя простоя простоя простра пространств пространств пространств пространствова прострел прострет простуд простуд простуд простуд простуд простуд простуд простуд простужива простуж проступа проступок простучат прост прост прост прост прост прост простын простын простын простын простын простын прост прост просунет просунул просунул просунут просуш просыпа просыпа просыпа просыпа просыпа просыпа просыпа просыпа просыпа просыпа просыпа просыпл просьб просьб просьб просьб просьбиц просьб просьб просьб про прос прос прос протанцева протанцева протанцева протащ протека протекл протекц протекш протер протерет протерпел протерт протесн протесн протесн протесня протесн протесн протест протестова протестова протестова протест протест протест протест прот против против противн противн противник противник противник противн противн противн противн противн противн противн противоестествен противоположн противоположн противоположн противоположн противоположн противоположн противоположн противоположн противоположн противопостав противореч противореч противореч противореч противореч противореч противореч противореч противореч противуположн против протискив протиснул проткнет протокол протолкнул протолкуеш протопоп протопопов протопопов протопта протор протягива протягива протягива протягива протягива протягива протягив протяж протяжн протяжн протянет протянеш протян протян протянув протянувш протянувш протянул протянул протянул протянул протянут протянут протянут протянут протянут протянут протянут протянут протянут протянут проулок профессор профессор профессор профессор профессорск профессор профессорш профил профил профил прохажива прохажива прохажива прохват прохватыва прохлад прохладн прохладн прохладн прохладн прохладн прохладн прохлад прохлад проход проход проход проход проход проход проход проход проход проход проходимц проход проход проход проход проходн проходн проход проход проход проходя прохож прохож прохож прохож прохож прохож прохож прохож прохож прохор прохрипел процвета процвета процвета процед процедур процежив процент процент процентн процент процентщиц процентщиц процентщиц процент процесс процесс процесс процесс процесс процесс процесс проч проч проч проч прочел прочест проч проч проч проч прочита прочита прочита прочита прочита прочита прочита прочита прочитыва проч прочл прочн прочн прочн прочност прочт прочтен прочтен прочтет прочтет прочтеш прочт прочт прочт прочт проч прошагнул прошедш прошедш прошедш прошедш прошедш прошедш прошедш прошел прошел прошепта прошепта прошепта прошибет прошипел прошл прошл прошл прошл прошлогодн прошлогодн прошлогодн прошлогодн прошл прошл прошл прошл прошл прошл прошл прошл прошля прошмыгнул прошмыгнут прош прошумевш прошумел проща проща проща проща проща проща проща прощальн прощальн прощан прощан прощан прощан проща проща проща проща проща прощ прощ прощ прощен прощен прощен прощ прощен прощипа прощ прояв прояв прояв проявлен проявлен проявлен проявлен проявля проявля проявля проявля проявля прояснел прояснел проясн проясн проясн проясня проясня проясня пруд пруд пруд пруд пруд прудов пруд пружин пружин пружин пружин пружин прусск прусск прыг прыга прыга прыга прыга прыга прыга прыга прыган прыга прыга прыг прыгнул прыгнут прыжк прыжк прыжок прынцесс прыска прыснет прыснул прыснут прыт прыт прядет пряд пряжк прям прямехоньк прям прям прям прямодуш прямодуш прямодушн прям прям прям прям прям прям прям прям прям пряничн прян прян пряност прянул прян прята прята прята прята прята прята прята пряч пряч прячет прячет прячеш прячут прячут прячущ пса псам психическ психоз психолог психологическ психологическ психологическ психологическ психологическ психологическ психологическ психологическ психолог психолог психолог психолог психопат псом псу псы птенц птиц птиц птиц птиц птиц птиц птиц птиц птичек птич птичк птичк птичк птичниц птич птич птич пуант публик публик публик публик публик публичн публичн публичн публичн публичн публичн публичн публичн пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуга пуг пуглив пуглив пуглив пуглив пуглив пуглив пуговиц пуговиц пуговиц пуговк пуд пуд пуд пудинг пуд пудр пудр пужлив пузат пузат пузырек пузыр пул пульс пульс пульхер пульхер пульхер пульхер пул пул пул пулярк пункт пункт пункт пункт пункт пункт пункт пункт пункт пунш пунш пуп пуританск пурпуров пуска пуска пуска пуска пуска пуска пуска пуска пуска пуска пуска пуска пуска пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пуст пустот пустот пустот пустот пустош пустош пуст пуст пуст пуст пуст пуст пустын пустын пустын пустын пустын пустын пустын пустын пустын пустын пустын пустын пустын пустыр пустыр пустыр пуст пуст пустяк пустяк пустяк пустяк пустяк пуст пустячк пустяшн пустяшн пустяшн пута пута пута путаниц путаниц пута пута пута пута пут путеводител путеводительн путеводн пут пут путешественник путешественник путешеств путешеств путешеств путешеств путешеств путешеств пут путилк путилк путилк путник путник путн путн пут пут пут пуф пух пух пухленьк пухл пухл пухов пухов пух пучат пучин пучин пучк пушечн пушист пушист пушк пушк пушкин пушкин пушкинск пушкин пушкиньянц пушк пушок пуща пущ пущен пущен пущ пхайл пчелк пчеловодств пчел пшениц пшеницын пшеницын пшеницын пшеницын пшеницын пшла пшол пыжиков пыл пыла пыла пыла пыла пыла пыла пыл пыл пыл пылинк пылинк пылк пылк пылк пыл пыл пыл пыльн пыльн пыльн пыльн пыльн пыльн пыльн пыльн пыл пыл пыта пыта пыт пыт пытк пытк пытк пытк пытлив пытлив пытлив пытлив пыток пыхт пышн пышн пышн пышн пышн пышн пышн пышн пышут пьан пьедеста пьем пьер пьес пьес пьес пьес пьес пьет пьет пьеш пью пьют пьющ пьющ пьющ пьян пьян пьян пьяненьк пьяненьк пьяненьк пьян пьяне пьяниц пьяниц пьяниц пьяниц пьяниц пьян пьян пьян пьян пьянств пьянств пьянствова пьянствова пьянств пьянств пьян пьян пьян пьян пьян пьян пэб пял пял пяст пятак пятак пятак пятак пятак пят пятачк пятачк пятачок пят пят пятер пят пятидесят пятидесятилетн пятидесятилетн пятидесятирублев пятидесят пятилетн пятилетн пятиминутн пятипроцентн пятипроцентн пятипроцентн пятипудов пятисот пятист пят пятиэтажн пятк пятк пятн пятнадцат пятнадцатилетн пятнадцат пятнадца пятн пятн пятн пятниц пятниц пятниц пятниц пятн пятн пятнышк пятнышк пят пят пят пят пяточк пят пят пят пят пятьдес пятьсот пят пят р ра раб раб раб раб раб раб работ работ работа работа работа работа работа работа работа работа работа работа работа работ работ работа работ работа работа работ работник работник работник работник работник работник работ работ работ рабоч рабоч рабоч рабоч рабоч рабск рабск рабск рабств раб равен равеннск равенств равенств равн равнин равнин равн равновес равновес равновес равнодуш равнодуш равнодуш равнодуш равнодуш равнодуш равнодушн равнодушн равнодушн равнодушн равнодушн равнодушн равнодушн равн равномерн равносильн равн равн равн равня равня равня равня равня рагулин рад рад радехонек рад радикальн радикальн радиус радищ рад радова радова радова радова радост радост радостн радостн радостн радостн радостн радостн радостн радостн радост радост радост радост рад радуеш радужн радужн радуш радуш радуш радушн радушн рад раду рад раз раз разбавля разбега разб разберет разбереш разбер разбер разберут разберут разбива разбива разбива разбива разб разб разб разб разб разб разбира разбира разбиран разбира разбир разб разбит разбит разбит разбит разбит разбит разбит разб разблаготвор разбогатевш разбогател разбогатет разб разбойник разбойник разбойник разбойник разбойник разбойник разбойнич разбойн разболел разболел разболта разболта разболта разбор разборч разбор разбран разбрасыва разбредут разбрел разброса разброса разброса разброса разброса разброса разброса разбуд разбуд разбуд разбуд разбуд разбуд разбуд разбуд развалива развал развал развал развал развал развалин развалин развалин разв развед разведыва развел развел развел разверза разверза развернув развернувш развернул развернул развернул развернут развернут развернут развернут развернут развертыва развертыв развесел развесел развесел развесел развест развешива развешива развешива развива развива развива развива развива развива развива развива развива разв разв разв разв развинт разв развит развит развит развит развит развит развит развит развит развит разв развлека развлека развлек развлечен развлеч развод развод развозим разворчат разврат разврат разврат разврат развратник развратник развратнича развратн развратн развратн разврат развращен развращен развяжеш развяж развяза развяза развяза развяза развяза развяз развязк развязк развязк развязк развязн развязн развязн развязн развязн развязн развязыва разгада разгада разгадк разгадк разгадк разгадк разгар разгиба разглад разглад разглажива разгляд разглядел разглядел разглядел разглядет разгляд разгляд разгляд разглядыва разглядыва разглядыва разглядыва разглядыва разглядыв разгнева разговарива разговарива разговарива разговарива разговарива разговарива разговарива разговарив разгове разговен разговор разговор разговор разговор разговор разговор разговор разговор разговор разговор разговорчив разговор разговор разгоня разгора разгоревш разгорел разгор разгорожен разгоряч разгоряч разгоряч разгул разгулива разгульн разгульн разгуля разгуля раздава раздава раздава раздава раздав раздав раздав раздавл раздавл раздавлен раздавлен разда разда разда разда разда разда раздар раздаст разда раздвига раздвига раздвиг раздвинув раздвинул раздвинул раздвинул раздвинут разд раздева раздева раздева раздева раздев раздевш раздел раздела раздела разделен раздел раздел раздел раздел раздел раздельн раздельн разделя разделя разделя разделя разделя разделя разделя разделя разден раздет раздет раздира раздол раздол раздор раздор раздосадова раздража раздража раздража раздража раздража раздража раздража раздража раздража раздража раздража раздраж раздраж раздраж раздражен раздражен раздражен раздражен раздражен раздражен раздражен раздражен раздражен раздражен раздражен раздражен раздраж раздраж раздраж раздраж раздраж раздражител раздражительн раздражительн раздражительн раздражительн раздражительн раздражительн раздражительн раздражительн раздражительн раздражительн раздражительн раздраж раздразн раздробл раздробл раздробля раздува раздува раздува раздув разду раздул раздума раздума раздумчив раздумыва раздумыва раздумыва раздумыв раздум раздум раздум раздут раздушен разев разжа разжалоб разжалоб разжалова разжига раззадор разик разин разинув разинул разинул разинут разинут разин раз разлаком разлаком разлаком разлег разл разлет разлет разл разлива разлива разлива разлив разлива разлив разливн разл разл разл разл разлит разлит разлит разлит различа различа различа различа различ различ различ различ различн различн различн различн различн разложен разложен разлож разложен разлома разлук разлук разлук разлуча разлуч разлуч разлуч разлюб разлюб разлюб разлюб разлюбл разлюб размазыва размара разм размах размах размахива размахива размахива размахива размахив размахнувш размахнул размах размашист разм разменива разменя разменя разменя разменя разменя разменя размер размер размер размер размерен размер размер размета размечта размина размина размнож размозж размолвк размуч размышлен размышлен размышлен размышля размышля размышля размышл размягч размягч разн разнеж разнеж разнесет разнесет разнесл разнесш разн разнима разниц разниц разниц разновеков разн разн разнокалиберн разноличн разнообраз разнообраз разнообраз разнообразн разнообразн разнообразн разнообразн разнообразн разнообразн разнообраз разнородн разнорядиц разнос разнос разнос разнос разност разносчик разносчик разнофасон разнохарактерн разноцветн разноцветн разн разн разн разн разн разн раз разобидел разобиж разобра разобра разобра разобра разобра разобра разобра разобьют разовьют разогна разогна разодет разодет разодет разодет разодра разодра разожжен разозл разозл разозл разойд разойт разольет разольет раз разомкнут разорва разорва разорва разорва разорва разорва разорва разорва разорва разорва разорва разорв разорен разор разор разор разор разор разоря разоря разостла разостла разочарова разочарован разочарован разочарова разочарова разочарова разочаровыва разочек разошл разошл разработа разработк разработыван разраз разраз разраз разраз разраз разраз разреж разреза разреша разреша разреша разреша разреша разреша разрешен разрешен разреш разрешен разреш разреш разреш разреш разреш разреш разреш разреш разреш разрознен разроня разросл разруша разруша разруша разруша разруша разруша разруша разруша разруш разрушен разрушен разрушен разрушен разруш разруш разрушител разрушительн разрушительн разрушител разруш разр разрыв разрыва разрыва разрыва разрыва разрыв разрыв разрыв разряд разряд разряд разряд разряд разряд разряжен разряжен раз разубед разубежда разубра разувер разувер разувер разуверя разудал разузнава разузна разузна разузнамш разузна разузна разукрашен разукрашен разукрашен разум разум разумеет разумеет разумееш разумел разумел разумел разумен разумихин разумихин разумихин разумихин разумихин разумн разумн разумн разумн разумн разумн разум разуч разъединен разъезд разъезд разъезд разъезжа разъезж разъеха разъеха разъеш разъяснен разъяснен разъяснен разъясн разъясн разъясн разъясн разъясн разъясня разыгра разыгра разыгра разыгра разыгра разыгрыва разыгрыва разыгрыва разыгрыва разыгрыва разыгрыва разыгрыва разыска разыска разыска разыска разыскива разыскиван разыскива разыскива разыщ разя ра райск райск райск рак раковин раковин рам рам рамк рамк рамк рам рамочк рамп рам ран ран ран раневск раневск ран ран ран ранен ранешеньк ран ран ран ран ран ран ран ран ран ран ран ран ран ран ран раньш ран рапорт раска раска раскаива раскаива раскаива раскаив раскален раскален раскален раскат раскат раскачнувш раская раскаян раскаян раскаян раскаян раская раскида раскинет раскинув раскинул раскинул раскинул раскинут раскладыва раскладыва раскладыва раскланива раскланя раскл расколдуеш расколот расколот раскольник раскольников раскольников раскольников раскольников раскольников раскольнич раскрасавиц раскрасневш раскрасневш раскраснел раскрича раскрич раскрыва раскрыва раскрыва раскрыва раскрыв раскр раскр раскр раскр раскр раскрыт раскрыт раскрыт раскр раскус раскус раскус распа распар распахива распахнувш распахнул распахнул распахнул распахнут распашк распечата распечата распечатыва распечатыва распечатыв распивочн распивочн распивочн распивочн распивочн распивошн распира расписа расписа расписан расписк расписок расписочк расписыва расписыва расплака расплат расплат расплат расплат расплат расплачут расплевыва расплеска расплест расплод расплыва расплыва расплыв распл расплюев распн распознава распозна располага располага располага располага располага располага располза расползл располнел располож расположен расположен расположен располож располож распомажен распоряд распоряд распоряд распорядительниц распорядительн распорядительн распоряд распоряд распорядок распоряд распоряжа распоряжа распоряжа распоряжа распоряжен распоряжен распоряжен распоряжен распоряжен распоряж расправ расправ расправ расправл расправ распредел распр распрода распростерл распростерл распростерт распростерш распространен распространен распростран распростран распростран распростран распростран распространя распространя распр распуга распуска распуска распуска распуска распуска распуст распуст распуст распуст распуст распуст распуст распута распутиц распухш распущен распущен распущ распыла распя распят распя рассад рассаж рассвет рассвет рассвет рассветет рассвет рассвиреп рассвирепел рассеет рассел рассерд рассерд рассерд рассерд рассерд рассерд рассерд рассерд рассерд рассерд рассерж рассержен рассерж рассеч рассея рассея рассеян рассея рассеян рассеян рассеян рассеян рассея рассея рассея рассея рассея расскажет расскажет расскажеш расскаж расскаж расскаж расскаж расскажут рассказ рассказ рассказа рассказа рассказа рассказа рассказ рассказ рассказа рассказа рассказа рассказ рассказ рассказ рассказ рассказ рассказчик рассказчиц рассказ рассказыва рассказыва рассказыва рассказыва рассказыва рассказыва рассказыва рассказыва рассказыва рассказыва расслаблен расслаблен расслаблен расследова расслуша расслуша расслыша расслыша расслыша расслыша рассматрива рассматрива рассматрива рассматрива рассматрива рассматрива рассматриван рассматрива рассматрива рассматрив рассмеет рассмешат рассмеш рассмеш рассмеш рассмеш рассмея рассмея рассмея рассмотр рассмотрен рассмотрет рассор расспрашива расспрашива расспрашива расспрашива расспрашива расспрашив расспрос расспрос расспрос расспрос расспрос расспрос расстава расстав расстав расстав расстав расставл расставля расста расста расста расста расста расстан расстановист расстановк расстановк расста расста расстегнул расстегнут расстелет расстила расстила расстила расстоян расстоян расстоян расстраива расстро расстро расстроен расстроен расстроен расстроен расстроен расстро расстро расстроива расстро расстро расстро расстро расстро расстро расстройств расстройств расстройств расступ рассуд рассуд рассуд рассуд рассуд рассуд рассуд рассудительн рассудительн рассудительн рассудительн рассуд рассудк рассудк рассудкин рассудк рассудок рассужда рассужда рассужда рассужда рассужда рассужда рассужда рассужд рассужден рассужден рассужден рассужден рассужден рассчита рассчита рассчита рассчита рассчита рассчита рассчитыва рассчитыва рассчитыва рассчитыва рассчитыва рассчитыва рассчитыва рассчитыва рассчитыв рассыпа рассыпа рассыпа рассыпа рассыпа рассыпчат расталкива расталкива растая раствор растворен раствор раствор раствор раствор раствор растен растен растерза растеря растеря растеря растеря растеря растеря растеря растеря растеря растеря растеря растет раст растлел растолка растолкова растолкова растолковыва растолк растолк растопа растопчет растопырен расторопн расторопн расточа расточа расточа расточ расточител расточительн растравля растревож растреклят растреклят растрепа растрепа растрепа растрога растрога растрога растрога растут растущ растущ растягива растянет растянул растянул расфранчен расфуфырен расхажива расхажива расхищен расхлебыва расхляба расход расход расход расход расход расходн расход расход расход расхохота расхохота расцвел расцвел расцвет расцвета расцвета расцветан расцветан расцвет расцветш расчеса расчеса расчесыва расчесыв расчет расчет расчет расчет расчетлив расчет расчет расчет расчет расчища расчувствова расшал расшата расшевел расшевел расшиб расшибан расшибет расшибет расширен расшир расширя расширя расшир расщедр расщедр рат рат раут рафаэлев рафаэлевск рафаэл рац рациональн рациональн рациональн рачител рачительн ра ра рвал рвал рвал рван рван рван рванул рванул рванул рванут рван рват рват рвен рвет рвет рвеш рвеш рву рдеет рдел рдеют рде реализм реализм реальн ребенк ребенк ребенк ребенк ребенок ребеночек ребер ребр ребр реб ребят ребятишек ребятишк ребятишк ребятишк ребяческ ребяческ ребячеств ребяч ребяч рев ревел реверанс рев ревизор ревматизм ревматизм ревнив ревнив ревнив ревнив ревнив ревнова ревнова ревнова ревност ревност ревност ревн ревну револьвер револьвер революц революцион революц рев ревущ регистраторш регистраторш регр регулирова регулярн редактор редакц редакц редакц редеет редет редк редк редк редк редк редк редк редк редк редк редкост редкост редьк редьк редьк реет реж режет реж реза реза реза резв резв резв резв резвост резв резв резв резв резед резед резед резиденц резинов резк резк резк резк резк резк резк резк резк резкост резн резн резок резон резон результат результат результат результат результат результат резц резч резюм рейнгольд рейт рек рек рек рекам рек рек рек рекомендац рекомендац рекомендац рекомендова рекомендова рекоменд рекоменд рекоменд рекоменду рекорд рек ректор рек религ религ религ религиозн рельефн рельс рельс рельс реляц реляц ремен ремесл ремесленник ремесленник ремеслен ремесл ремн ремн ренегат реникс рен рент реомюр репетитор реплик реп реп репутац репутац репутац репутац реп ресниц ресниц ресниц ресниц рессл рессорн рестора рестора ресторан ретив ретирова реторт ретроград ретроградн рефлекс реформ реформ рехнул рехнул рецепт рецепт рецепт реч реч реч реч речист речк речк речк речк речк речн реч реч реша реша реша реша реша реша реша реша реша реша реша решат реша реша реша реш реш реш решен решен решен решен решен решен решен решен реш решетк решетчат решетчат решетчат решетчат реш реш реш реш реш реш реш реш реш реш реш реш реш решим решим решим реш реш решител решительн решительн решительн решительн решительн решительн решительн решительн решительн решительн реш реш реш решотк реш реш рея ржав ржав ржав ржав ржан ржи риз риз рим рим римлян римск ринет ринул ринут ринут рис риск риск рискнул рискнул рискнул риск рисова рисова рисова рисова рисова рисован рисова рисов рис рис рисуеш рисунк рисунк рисунок рис рис рису ритм ритм ритмическ ритм риторик риторик рифм рифмова ро робел робел робел робенок роберт робеют робе робк робк робк робк робк робк робк робк робк робок робост робост робост робч ровесник ровн ровн ровнехоньк ровн ровн ровн ровн ровн ровн ровн ровня рог рог рог рог рогат рогат рог рогов рогож род род род род род роденьк род род род род род род родименьк родим родим родин родин родин родин родин родион родион родион родион родионыч родител родител родител родительск родительск родительск родительск родител родител род род родн родн родн родн родн родн родн родн родн родн родн родн родн родн родн родн род род родств родственник родственник родственник родственник родственник родственник родственниц родственниц родственниц родственниц родствен родствен родствен родствен родствен родствен родствен родств род родьк родьк родькин родэ род род род род ро роет роеш рож рож рожда рожда рожда рожда рожда рожда рожда рожда рожда рожда рожден рожден рожден рожден рожден рожден рождеств рождеств рождествен рождеств рождеств рождеств рож рож рожк рожок рож рож рож роз роз роза роз розн розн розн розовеет розовеют розове розов розов розов розов розов розов розов роз розоперст роз роз ро ро рок рок роков роков роков роков роков роков рокот рокотан рокот рокот рокот роланд рол рол рол ром рома рома роман роман романист романическ романическ романическ роман романович романович романович романовн романовн романовн романовн романовн романс романс романчик рома романыч романыч романыч романыч роме ром роня роня роня рон ропот ропот ропта ропта ропта ропщ рос рос рос росинк росист роскош роскошн роскошн роскошн роскошн роскошн роскошн роскошн роскошн роскош роскош росл росл росл росн росписн росс росс российск росс росс россияд россиянин рост рост рост ростовщик ростовщиц рост рост рос рот рош рощ рощ рощ рощ рощ рощ роют роют роял роял роял роял ро роя рт рта ртам ртом рту рты руб рубах рубах рубашек рубашечк рубашечн рубашк рубашк рубашк рубашк рубашк рубашк рубашонк рубеж рубеж рубец руб руб рубикон руб руб рубин рубин рубинштейн руб руб рубищ рубищ рубиян рубл рублев рубл рублик рубл рубл рубл рубл рубл рубл рубнут рубц руга руга руга руга руга руга руган ругательск ругательск ругательств ругательств ругательств руга руга руга руг руг ругнул руд рудник руж руж руж рук рук рука рукав рукав рукав рук рук рук рук рук руководител руководител руковод руководств руководств руководствова руководств руководству рук рукомесл рукомойник рукопис рукоп рукоплескан рукопожат рукопожат рук рук рул румя румян румянец румян румян румян румян румян румянц румянц румян румян рун рус русак русалк русалк русалк русалоч рус русл русск русск русск русск русск русск русск русск русск русск русск русск русск русс рус рус ру рус рутин рутин рутинер рутин рухляд рухнул рухнул рухнул рухнул рухнут руц руча руча руча руч рученьк ручищ ручк ручк ручк ручк ручк ручн ручн ручонк ручонк ручонк руч руч руч руч рушат руш руш рыб рыбак рыбак рыбач рыбинск рыб рыб рыб рыб рыда рыда рыда рыда рыда рыда рыдан рыдан рыдан рыдан рыдан рыда рыда рыда рыд рыж рыжеват рыж рыжик рыка рыл рыл рыл рыл рынк рынк рынк рынк рынок рысак рытвин рыт рыхл рыцар рыцар рыцар рычаг рычан рычан рыщет рыщ рьян рю рюмк рюмк рюмк рюмк рюмк рюмочк рюмочк рюш ряб ряб рябин ряб рябчик рябчик рябчик ряб ряд ряд ряд ряд ряд ряд ряд рядышк ряд ряжен ряжен рязанск с сабл сабл сабл сава саван савинов савич савич савишн саврас савраск савраск савраск сад сад сад садик садик садик сад сад сад сад сад сад сад садк сад садовник садовник садовник садов садовск садовск садов садов садов сад сад сад сад садя сажа сажа сажа сажа сажа сажа саж саж сажен сажен сажен саж сайк сайк саксон саксонск саксонск салазк салат сал сал сал салом саломе салон салон салон салон салоп салоп салоп салоп салоп салоп салфетк салфетк сальн сальн сальн сам сам сам сам сам сам сам самовар самовар самовар самовар самовар самоварчик самовлюблен самовольн сам самодержавн самодовольн самодур сам самозабвен самозабвен самозабвен сам самойл самоличн самолюб самолюбив самолюбив самолюбив самолюбив самолюбив самолюб самолюб самолюб сам самомал самомнен сам самонаслажден самонужн самоотвержен самоотвержен самоотвержен самопожертвован самосознан самосохранен самостоятельн самостоятельн самостоятельн самостоятельн самостоятельн самостоятельн самоубийств самоубийств самоубийств самоубийств самоубийц самоуверен самоуверен самоуверен самоуверен самоуверен самоунижен самоуправств сам сам сам сам сам сам сам сан сандал сан санк санкт сановник сапог сапог сапог сапог сапог сапог сапог сапог сапог сапожник сапожн сара сара сара сара саранч сарат сарафан сар сара сара сарказм сарказм сарказм саркастическ саркастическ саркофаг саркофаг сатанинск сатир сатир сатир сатир сатир сатир сафья сафьян сахар сахарн сахарниц сахарниц сахарн сахар сбавк сбавк сбалмошн сбега сбега сбега сбега сбега сбега сбега сбежа сбежа сбежа сбежа сбережен сбережен сбережет сбереч сбива сбива сбива сбива сбива сбива сбива сбива сбива сбива сбив сбивчив сбивчив сбивш сбивш сбивш сбил сбил сбил сбил сбил сбира сбира сбит сбит сбит сбит сбит сближа сближа сближен сближен сближен сблиз сбок сболтнул сборник сборник сборн сбрасыва сбрасыв сбред сбрил сбрил сброд сброс сброс сброс сброс сбро сбру сбру сбудет сбыва сбыва сбыва сбывш сбыл сбыл сбыт сбыт сбыт сва свадеб свадьб свадьб свадьб свадьб свадьб свалива свалива свал свал свал свал свал свал сваля свал свар свар свата свата сват сватовств сватовств све сведен сведен сведен сведен сведет свед сведущ свеж свеж свеж свеж свеж свеж свеж свеж свежеодет свежепросолен свежест свежест свежест свеж свеж свеж свеж свеж свеж свеж свезт свез свезут свел свел свергнут сверк сверка сверка сверка сверка сверкан сверкан сверка сверка сверка сверка сверкнет сверкнув сверкнувш сверкнувш сверкнул сверкнул сверкнул сверкнул сверкнут сверля свернет свернет свернул свернул свернул свернул свернут свернут свертк свертыва свертыва свертыва сверх сверх сверхъестествен сверхъестествен сверчок сверша сверша сверш сверш сверш сверш сверш сверш сверш свер свес свест свет свет света света свет светел светелк свет светик свет свет свет свет свет свет свет свет свет светл светл светл светл светлел светленьк светленьк светлиц светлиц светл светловыбрит светл светл светл светл светл светл светл светл светл светл светл светл светляк светов светозарн свет светопреставлен светск светск светск светск светск светск светск свет свет свет светя свеч свеч свеч свеч свеч свечк свечк свечк свечк свечк свечн свеч свеч свешива свива свидан свидан свидан свидан свидан свидан свидан свидетел свидетел свидетел свидетел свидетел свидетельниц свидетельск свидетельств свидетельств свидетельств свидетельствова свидетельствова свидетельств свидетельств свидетельств свидетельству свидетел свидетел свидетел свидригайл свидригайлов свидригайлов свидригайлов свидригайлов свидригайлов свидригайлов свинец свинкин свинств свинств свинтус свинцов свинцов свинц свин свин свин свирб свирел свирел свиреп свиреп свиреп свирепств свиса свист свист свистел свист свист свистк свистнет свистнул свисток свист свит свиток свит свищеш свобод свобод свобод свободн свободн свободн свободн свободн свободн свободн свободн свободн свободн свободн свободн свободн свобод свобод свод свод свод свод свод свод свод сводн свод сво своевольн своевол своевремен своевремен сво сво сво сво сво своз сво сво сво сво сво свойск свойств свойств свойств свойств свойств свойствен свойствен свойствен свойствен свойствен свойствен свойств сворачива сворот сворот свороч сво сво свысок свыш свяж свяж связа связа связа связа связа связа связа связа связа связа связ связ связк связн связыва связыва связыва связ связ связ связ свят свят святк святк свят свят свят свят святок свят свят свят свят свят святын святын святын свят священ священник священник священник священник священ священ священ священ священ сгиб сгиба сгинеш сгинул сгинут сгин сглажива сглаз сглаз сглуп сглуп сгнил сгниют сговарива сговарива сговор сговор сговор сговор сговор сговор сгон сгоня сгоня сгора сгора сгор сгорблен сгорблен сгорел сгорел сгорел сгорет сгор сгоряч сготов сгреб сгруппирова сгуб сгуб сгуб сгуб сгуща сгуща сгуща сгуща сдав сдав сдает сдает сдал сдан сдат сдач сдач сдач сдач сда сдают сдвига сдвинеш сдвинув сдвинул сдвинул сдвинул сдвинул сдвин сдвинут сдвинут сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сдела сделк сдержа сдержа сдержа сдержа сдержа сдержа сдержан сдержан сдержа сдержа сдержа сдержа сдержива сдержива сдержива сдержива сдержива сдержива сдержива сдержива сдержив сдержив сдерж сдернет сдернул сдерут сде сдобн сдруж сдует сдур се себ себ себятешен севастопол севастопол севастьяныч сев север север северн северн северн север севш сегидил сег сегодн сегодняшн сегодняшн сегодняшн сегодняшн сегодняшн сед седет седеют седе седл седл сед сед сед седок седок сед сед сед сед сед сед седьм седьм седьм седьм сеет сезон се сейчас сек секир секрет секрет секретар секретарш секретарш секретар секретар секретар секрет секретн секретн секрет секрет секрет сектант секунд секундант секунд секунд секут секуч секущ сел сел селадон сел сел селедк селедк селедк селен селен сел сел сел сел сельд сельск сельск сельтерск сельц селянк сем семафор семафор семг семейн семейн семейн семейн семейн семейн семейн семейн семейн семейств семейств семейств семейств семейств семейств сем сем семен семен семен семен семенович семенович семенович семенович семенович семеновн семеновн семеновн семеновн семеновн семен семен семеныч семеныч сем семидесят семидесят семик семик семилетн семинар семинарист семинарист семинарист семицветн семнадцат семнадца сем сем сем сем семьдес сем сем сем семьсот сем сем семьянин сем сен сен сенат сенат сенатор сенатор сенатск сенатск сенат сен сен сен сен сен сен сен сен сенова сенова сеновал сеновал сенокос сен сентенц сентенц сентенциозн сентенц сентенц сентиментальн сентябр сентябр сентябр сен сен сен септим сер серафим сер сервирова сервирова сергевн сергевн сергевн сергеевич сергеевн сергеевн сергеевн сергеевн сергеич серг сердечк сердечн сердечн сердечн сердечн сердечн сердечн сердечн сердечн серд серд серд серд серд сердит сердит серд сердит серд сердит серд серд сердобольн сердолик сердц сердц сердц сердц сердц сердц серд серебр серебр серебрист серебрист серебр серебр серебряк серебряков серебряков серебрян серебряник серебрян серебрян серебрян серебрян серебрян серебрян серег серед середин середин середин середин середин серед сережк сережк сер сереньк серж сер сер сероват сер сер сер сер серп сер серча сер сер сер сер сер серьг серьг серьезн серьезн серьезн серьезн серьезн серьезн серьезн серьезн серьезн серьезн серьезн серьезн серьезн серьезн сестер сестр сестр сестр сестрин сестрин сестриц сестриц сестриц сестриц сестриц сестриц сестр сестр сестр сест сет сетк сетк сетк сетова сеттинья сет сет сечет сеч сеч се сеют се сжав сжавш сжал сжал сжал сжал сжал сжал сжал сжал сжал сжал сжат сжат сжат сжат сжат сжат сжева сжег сжеч сжига сжима сжима сжима сжима сжима сжима сжима сжим сзад сибар сибарит сибир сибирк сибир сивиллин сиволап сигар сигарет сигар сигар сигар сига сигна сидевш сидевш сидевш сидевш сидевш сидел сидел сидел сидел сидел сиден сидет сид сид сид сид сид сид сид сид сидя сидя си си сиенск сиенск сиж сиз сиз сиз сикстинск сил сил сил сил сил силач сил сил сил сил сил сил сил сил силушк силуэт сил сильв сильн сильн сильн сильн сильн сильн сильн сильн сильн сильн сильн сильн сильн сильн сильн сильн сильн сил сим символ символ символизм символизм символизм символист символическ симеон симеонов симеонов сим симметрическ симметричн симпат симпат симпатич симпатичн симпатичн симпатичн симпатичн симпат симпат симплон симптом синев синев синев син син син син синеньк синеньк синеньк синеок синечерн синеющ син син син син син синодальн синодальн синоним синтаксис син синьор синьор син синяк син сипел сипен сипен сипет сипл сипл сипл сипл сипл сиренев сиренев сиренев сирен сирен сирен сирин сириус сироп сирот сирот сирот сиротк сиротлив сиротлив сиротск сиротск сиротск сирот сист систем систематическ систем сист систем сит ситх ситцев ситцев ситцев ситцев ситцев ситц сих си си сия сия сия сия сия сия сиян сиян сиян сиян сиян сиян сиян сиятельств сия сия сия сия сия сия сия сия скабрезн скаж скажет скажет скажеш скаж скаж скаж скажут сказ сказа сказа сказа сказа сказа сказа сказа сказал сказан сказан сказа сказа сказа сказа сказа сказа сказа сказа сказк сказк сказк сказк сказк сказк сказк сказок сказочник сказочн сказыва сказыва сказыва сказыва сказыва сказыва сказыва скака скака скакан скакан скакан скака скакнет скакнут скал скал скалдырничеств скал скалист скал скал скалк скал скал скал скал скальд скал скамеечк скамеечк скамеечк скам скамейк скамейк скамейк скамейк скам скам скам скам скам сканда скандалезн скандалезн скандализиру скандальн скандальн скандиру сканирова скаредн скаредн скаредн скатерт скатерт скатерт скат скач скачет скачк скачк скважин скважин сквер скверн скверн скверн скверн скверн скверн скверн скверност скверн скверн скверн скверн сквоз сквоз сквоз сквозн сквозняк сквоз сквоз сквозя скворц скворц скворц скелет скептик скидыва ски скинет скинут скиньт скит скита скита скита скитальц скитальц скитальц скитан скита скит скит склад склад склад складен складк складк складк складк складк складк складн складн складок складыва складыва складыв склеп склон склон склон склонен склон склон склон склон склон склон склон склон склон склон склон склон склон склонност склон склон склон склоня склоня склоня склоня склон склоня склоня склоня склон склуб склянк склянк склянк скобк скобк сковород сковород сковород сковородк сковород сковород сковыва ског ско сколот сколочен сколочен скол скольз скольз скольз скользк скользк скользк скользнет скользнул скользнул скользнул скользнут скольз скольз скользя скользя скольк скольк скольк ском скомка скомка скомка скомпрометирова сконфужен сконфужен сконфуж сконфуз сконфуз сконфуз сконфуз сконфуз сконфуз сконфуз сконча скоп скоп скоп скоплен скопл скорб скорбет скорб скорб скорбн скорбн скорбн скорбн скорбн скорб скорб скорб скор скор скореньк скорлуп скорлуп скорлуп скорняк скор скор скороговорк скороговорк скор скор скор скор скоропостижн скорч скор скор скор скос скот скот скотин скотинин скотин скотниц скотниц скотн скотоводств скотт скошен скош скрадыва скрас скребет скребл скрежет скрежет скрежета скрежещ скрежещущ скреп скреп скреп скрест скрест скрив скрив скрив скрив скрижа скрижал скрип скрипач скрипел скрипел скрип скрип скрипк скрипк скрипк скрипк скрипк скрипк скрипнул скрипок скрип скрип скрипуч скрипуч скрипуч скрип скро скро скроет скроет скром скромн скромн скромн скромност скромн скромн скромн скромн скрут скруч скрыв скрыва скрыва скрыва скрыва скрыва скрыва скрыва скрыва скрыва скрыва скрыва скрыва скрыва скрыва скрыва скрыв скрыв скрыл скрыл скрыл скрыл скрыл скрыл скрып скрыт скрыт скрытн скрытн скрыт скрыт скрыт скрыт скрыт скрыт скрыт скрыт скрыт скрыт скрюч скрюч скряг скуд скудн скудн скудн скудн скудн скует скук скук скук скук скук скул скулист скульптор скуп скуп скупа скупа скуп скупост скупост скупц скуп скуча скуча скуча скуча скуча скуча скуча скуча скуча скуча скуч скуч скучен скучищ скучн скучн скучн скучн скучн скучн скучн скучн скучн скучн скучн скучн скучн скуша ску слаб слаб слаб слаб слабеет слабел слабеньк слабеньк слабеньк слабе слаб слаб слаб слаб слаб слабонервн слабосил слабост слабост слабост слабост слаб слаб слаб слаб слаб слаб слаб слав слав слав слав славн славненьк славн славн славн славн славн славн славн слав слав слав славяноф славянск славянск слага слага слад слад сладк сладк сладк сладк сладк сладк сладк сладк сладк сладок сладостн сладостн сладостн сладостн сладостраст сладостраст сладострастн сладострастн сладост сладчайш сладчайш слаж слажен сласт сласт слат слащ слев слег слегк слегл след след след след след след след след след след след след след следова следова следова следова следовател следовател следовател следовательн следовател следовател следовател следова следствен следствен следств следств следств след след след след след след след след следу след след след следя слеж слез слез слез слез слез слезинк слезинк слезл слезн слез слезш слез слезьм слеп слеп слепнут слеп слеп слеп слеп слепот слепот слеп слеп слеп слепя слесарн слесар слесар слета слета слета слетел слетел слетел слетет слет слив слива слива слива слива слив сливк сливк сливок сливочк сливш слил слил слипа слит слит слит слич слишк слиян слов слов слов слов словар слов слов словесн словесн словечк словечк словечк словечк словн слов слов словоохотлив словоохотлив словоохотн слов словц слог слог слог слог сло сло слож слож слож сложен сложен сложен сложен сложен сложен слож слож слож слож слож слож слож слож сложил слож слож слож сложн сложн сложн сложн сложн слоист слоист слома слома слома слома слома слома слома слом слом слом слом слонов слоня сло сло слуг слуг слуг слуг слуг слуг служак служак служанк служанк служанк служанк служат служа служа служб служб служб служб служб служб служебн служен служен служ служ служ служ служ служив служ служ служ служ служ служ служ служ служ служ слух слух слух слух слух случ случа случа случа случа случа случа случайн случайн случайн случайн случайн случайн случайн случайн случайн случайн случайн случайн случайн случайн случайн случайн случа случа случа случа случа случ случа случа случ случ случ случ случ случ случ случ случ случ слуша слуша слуша слуша слуша слуша слуша слуша слуша слуша слуша слуша слуша слуша слуша слуша слушател слушател слушател слушательниц слушател слушател слуша слуша слуша слуша слуша слуш слыв слыл слыл слыха слыха слыха слыха слыха слыха слыха слыхива слыхива слыхива слых слыш слыша слыша слыша слыша слыша слыша слыша слышамш слыша слыша слыша слышат слышат слыша слыш слыш слыш слыш слыш слыш слышн слышн слышн слышн слышн слышн слышн слыш слыш слюб сляг слякот смаза смаза смаза смаза смазлив смазн смак смастер сматыва смахнет смахнул смахнул смеет смеет смеет смеет смееш смееш смежен смеж смежн сме сме смейт смейт смека смекнул смел смел смел смел смел смел смел смел смелост смелост смел смел смел смел смел смел смен смен смен смен смен смен смен смен смен сменя сменя сменя сменя сменя сменя сменя смерд смер смерка смерка смеркл смерт смертельн смертельн смертельн смертельн смерт смерт смертн смертн смертн смертн смертн смертн смертн смертоубийств смерт смерт смерч смест сме смета смета сметан сметан смета смет сметлив смет смет смет смех смех смех смех смех смеша смеша смеша смеша смеша смеша смеша смешен смешива смешива смешива смешив смеш смеш смеш смешл смешлив смешлив смешлив смешн смешн смешн смешн смешн смешн смешн смешн смешн смешн смешн смешон сме сме смеют смеют смеющ смеющ смеющ смеющ сме смея смея смея смея сме смея смигнув смир смирен смирен смирен смирен смирен смирен смир смирительн смир смир смирн смирн смирн смирн смирн смир смиря смиря смиря смог смогл смогл смог смоет смож сможет смол смоленск смолист смолк смолка смолка смолка смолкл смолкнет смол смолол смолча смольн смор смор сморка сморка смородин смородин смородин смородин смородин смородиновк смородинов смородинов смороз сморщен сморщ сморщ сморщ смотр смотревш смотревш смотревш смотревш смотрел смотрел смотрел смотрел смотрел смотрет смотр смотр смотр смотр смотрител смотр смотр смотр смотр смотр смоч смочен смоч смо смрад смрадн смрадн смрад смугл смугловат смугл смугл смугл смугл смугл смугл смугл смут смут смут смут смут смут смутн смутн смутн смутн смутн смутн смутн смутн смут смуща смуща смуща смуща смуща смуща смуща смуща смуща смуща смуща смуща смуща смущ смущ смущ смущен смущен смущен смущен смущен смущен смущен смущен смущен смущен смыва смыва смыв смыка смык смысл смысл смысл смысл смысл смысл смысл смысл смысл смысл смыт смыт смычк смычк смычок смягча смягча смягч смягчен смягч смягч смягч смял смят смятен смятен смятен смятен смят смят смят сна снабжа снадоб снам снам снаруж снаст снах снача сне снег снег снег снегов снегов снегов снегов снег снег снеда снеда снед снежинок снежк снежк снежк снежн снежн снежн снежн снежн снежн снежн снежн снежн снес снес снесеш снес снес снесл снест снизойт снизошл сниз снил снил снил снил снима снима снима снима снима снима снима снима снима сним снимет сним сним снисходител снисходительн снисходительн снисходительн снисходительн снисходительн снисходительн снисходительн снисходительн снисходительн снисходительн снисхожден снисхожден снисхожден снит снит снов снов снова снова сновиден сновидц сном сноп сноп сноровк снос снос сносн сносн сносн сносн сношен сношен сношен сну снурк снурк снурок снуют снующ сны снюха снял снял снял снят снят снят снят снят со собак собак собак собак собак собак собак собач собачк собачонк собачонк собачонок собач собач собач соберет соберет собер собер соберут собеседник собеседник собеседник собеседник собира собира собира собира собира собира собира собира собира собира собира собирательн собирател собира собира собира собира собир соблазнен соблазнител соблазнительн соблазнительн соблазнительн соблазнительн соблазнительн соблазнительн соблазнительн соблазнительн соблазнител соблазн соблазня соблазня соблазня соблазн соблюда соблюда соблюден соблюст соб соболезнован соболезну собор собор собор собор соб собра собра собра собра собра собра собра собра собра собра собра собран собран собран собран собран собрат собра собра собствен собствен собствен собствен собствен собствен собствен собственноручн собственноручн собствен собствен собствен собствен собствен собствен собствен событ событ событ событ событ событ событ собьет собьеш собьеш собьют сов сова сова соверша соверша соверша соверша соверша соверша соверша соверша соверша соверша совершен совершен совершен совершен совершен совершен совершен совершеннолет совершен совершен совершен совершен совершен совершен совершен соверш совершенств совершенств совершенству соверш соверш соверш соверш соверш соверш совест совест совест совестлив совестн совест совест совет совет совет совет советник советник советник советник советник совет советова советова советова советова советова советова совет совет совет совет совет совет совеща совещан совеща совин совлада совладет совлечет совмеща совокупн совокупн совпада совпада совпаден совпаден совпаден совпа совра совра совра совра современ современ современ современ современ совреш совс совьет согб согбен соглас соглас соглас соглас соглас соглас соглас соглас соглас соглас соглас соглас согласн согласн согласн согласн согласн соглас соглаша соглаша соглаша соглаша соглаша соглаша соглаша соглашен соглашен соглаш соглядата согна согнет согнув согнувш согрева согрева согрел согрет согрет согрет согреш согреш согр содержа содержан содержан содержан содержан содержател содержа содерж сод содра содрога содрога содроган содроган содрогнет содрогнул содрогнут сод соединен соединен соедин соедин соедин соедин соедин соединя сожалел сожален сожален сожален сожален сожален сожалет сожал сожале сожгл сожж сожж сожжен сожжен сожж сожител сожительниц сожител сожмет созва созва созвезд созвуч создава создава созда созда созда созда создан создан создан создан созда созда созда созда созда созда созда созда созда создан создан создательниц созда созда созда созерцан созерцан созерцан созерцательн созерца созна сознава сознава сознава сознава сознава сознав созна созна созна созна созна созна созна созна сознан сознан сознан сознан сознан сознан сознан сознан сознательн сознательн сознательн сознательн сознательн сознательн созна созна созна созна созна созрева созрева созревш созреет созрел созрел созрел созрел созрет созыва сойд сойдет сойдет сойдеш сойд сойд сойдут сойд сойт сойт сок сок сок сокол соколин сокол сокол сократ сокраща сокращен сокращен сокращ сокровен сокровен сокровен сокровищ сокровищ сокровищ сокр сокруша сокрушен сокрыт солга солга солг солдат солдат солдат солдатк солдат солдатск солдатск солдатск солдат солен солен солен солен солен солен солен солжет сол солидн солидн солидн солидн солидн солидн солидн солидн солидн солнечн солнечн солнечн солнечн солнечн солнечн солнц солнц солнц солнц солнышк солов солов солов соловьин соловьин соловьин соловьин солов солом солом соломен соломинк солон солонин солонин солонк солонк солонк сол сольнес сольнес сол сольют сом сомкнет сомкн сомкнут сомкнут сомнева сомнева сомнева сомнева сомнева сомнев сомнен сомнен сомнен сомнен сомнен сомнен сомнител сомнительн сомнительн сомнительн сон сон сон сонетк сонечк сонечк сонечк сонечкин сонечкин сонечкин сонечк сонечк сон сонин сонлив сонлив сонм сонм сон сонник сон сон сон сон сон сон сон сон сон сон сон сон сон сон сонюшк сон сообража сообража сообража сообража сообража сообража соображ соображен соображен соображен соображен соображен сообраз сообраз сообраз сообраз сообраз сообраз сообраз сообраз сообразн сообразн сообразн сообраз сообща сообща сообща сообща сообща сообща сообщ сообщен сообщ сообществ сообщ сообщ сообщ сообщ сообщ сообщ сообщ сообщительн сообщительн сообщ сообщ соответствен соответствова соответств соответств соперник соперник соперник соперниц сопернича сопет соп сопл сопляк сопостав сопоставлен сопоставлен сопоставля соприкаса соприкосновен соприкосновен сопричастн сопровожда сопровожда сопровожда сопровожда сопровожда сопровожда сопровожда сопровожда сопровожда сопровожда сопровожда сопровожден сопротивлен сопротивлен сопротивля сопротивля сопротивля сопряжен сопутник сопутствова сопутств соп сор соразмеря соратник сорва сорва сорва сорва сорва сорва сорва сорванец сорва сорва сорвет сорвет сор сор сорин сорин сорин сорин сор сорн сорок сорок сорокалетн сороков сороков сор соромник сорт сортировок сорт сор соса сосед сосед сосед сосед соседн соседн соседн соседн соседств соседств соседств сосед сосед сос сосет сос соскандал соскоч соскоч соскуч соскуч соскуч соскуч сосла сослов сослуживец сослуживц сослуживц сослуживц сосновк сосновк сосновк соснов сосн соснул соснут сосн сосредоточен сосредоточен сосредоточен сосредоточен сосредоточен сосредоточен сосредоточен сосредоточ сосредоточива сосредоточива сосредоточив сосредоточ сосредоточ сосредоточ сосредоточ сосредоточ сосредоточ соста состав состав состав состав состав состав состав состав составл составля составля составля составля составля составля составля составля состав состар состар состар состар состар состар состо состо состо состо состо сост состоя состоя состоя состоя состоя состоя состоя состоян состоян состоян состоян состоян состоян состоя состоя сострада сострадан сострадан сострадан состр состряпа состряпа состяза сосуд сосуд сосульк сосчита сосчита сосчита сосчита сосчита сосчита сот сот сотвор сотка соткут сотн сотн сотр сотряса сотряса сотряс сотрясен сот соус соусник соус соус соф соф соф софистик соф софочк софочк софочк соф соф соф соф соф сохнут сохранен сохранен сохран сохран сохран сохран сохран сохран сохран сохран сохран сохран сохран сохран сохраня сохраня сохраня сохраня сохраня сохраня сохраня сохран сох социалист социальн социальн социальн социальн социальн социальн соцьялизм сочета сочета сочин сочинен сочинен сочинен сочинен сочинен сочин сочинил сочинител сочинител сочинител сочинител сочин сочиня сочиня сочиня сочиня сочл сочл сочн сочн сочт сочтет сочт сочувствен сочувств сочувств сочувств сочувствова сочувствова сочувств сочувств сочувству сошедш сошел сошел сошл сошл сошлет сошл сошл сошл сошл сошлют сошьет сош сощур сощур спавш спавш спада спада спад спадет спазм спал спал спал спален спал спал спал спальн спальн спальн спальн спальн спальн спан спан спарж спарж спарж спас спаса спаса спаса спас спас спасен спасен спасен спасен спасен спас спасен спасен спасет спас спасиб спас спасительн спасительниц спасительн спасительн спасительн спасительн спас спаст спаст спас спасш спат спая спб спектакл спектакл спекулянт спел спеленут спел спел сперв сперед сперл спермацетн сперт спесив спет специальн специальн спеш спешат спеша спеш спеш спеш спеш спеш спеш спеш спеш спеш спи спим спин спин спинк спинк спинк спинк спин спин спин спин спин спин спира спира спирал спиридоныч спиридоныч спирт спирт спирт спирт спирт списк списк список спит спит спит спит спит спиц спиц спич спичек спичк спичк спиш сплав сплел сплел сплест сплет сплетен сплетен сплетн сплетн сплетник сплетн сплетут сплин сплошн сплош сплю спляш сподручниц сподряд спо споемт споет спо спойт споко спок спокойн спокойн спокойн спокойн спокойн спокойн спокойн спокойн спокойн спокойн спокойн спокойств спокойств спокойств спокойств спокойств сполетск сполз сполза сполза сполз сполн спор спор спор спор спор спор спор спор спор спор спор спорн спор спор спор спор спор спор способ способ способ способ способн способн способн способн способн способн способн способн способн способн способн способн способн способн способн способн способ способствова способствова способствова способствова способств способ споспешествован спотка споткнул споткнул спотык спохват спохват спо справ справедлив справедлив справедлив справедлив справедлив справедлив справедлив справедлив справедлив справедлив справедлив справ справ справ справ справк справля справля справля справля справля справок справочк справьт справ спрашива спрашива спрашива спрашива спрашива спрашива спрашива спрашива спрашива спрашива спрашива спрашив спро спровад спрос спрос спрос спрос спрос спрос спрос спрос спрос спрос спрос спросон спрос спрош спрут спрыгн спрыгнувш спрыгнул спрята спрята спрята спрята спрята спрята спрята спрята спрята спрята спрята спрята спрята спрячет спряч спрячьт спугнет спугнув спугнул спугнут спуск спуска спуска спуска спуска спуска спуска спуска спуска спуска спуск спуск спуст спуст спуст спуст спуст спуст спуст спуст спуст спута спута спута спутник спутник спутник спутник спущ спущ спьян спьян спят спят спящ спящ спящ спящ спящ спящ спящ сравн сравнен сравнен сравнен сравнива сравн сравнительн сравн сража сража сражен сражен сраз срам срам срам срам срам срам срам сребреник сребрист сребрист сребрист сред сред сред сред средиземн средин средин средн средн средн средн средн средн средн сред средств средств средств средств средств средств средств средств сред сред сред среза среза среза среза сродн срок срок срок срок срок срок сруб сруб срыва срыва срыва срыв срыв сряд ссад ссин ссор ссор ссор ссор ссор ссор ссор ссор ссор ссор ссор ссор ссыла ссыла ссыла ссыл ссылк ссылк ссылк ссылк ссылк ссыльн ссыльн ссыпа ста став става став став став став став став став ставк ставл ставн ставн ставн ставш став ставьт став став став стад стад стадвадцатипятирублев стад ста стака стака стакан стакан стакан стакан стаканчик стаканчик стаканчик стаканчик стаканчик стака стакнул стал стал стал сталкива сталкива сталкива сталкива сталкив стал стал стал стальн стальн стальн стальн стальн стал стам стан стан стан стан станет станет станет станеш станислав станк станов станов станов станов станов станов станов станов становитц станов станов становл станов станов станов стан стан станут станц станц станц станц стан станьт стар стар стара стара стара стара стара старан старан старан старан стара стара стара стар стар стар старе старе стареньк старец стареющ стареющ старик старик старик старик старик старик старик стар старин старин старин старин старин старин старин старин старин старин старин старин старин старин стар старичк старичк старичк старичок старичонк стар стар стар стар стар стар старост старост старост старост старост старост старост стар старух старух старух старух старухин старухин старухин старухин старухин старухин старухин старух старух старушеч старушеч старушк старушк старушк старушк старушк старушк старушонк старушонк старушонк старушонк старушонок стар старц старц старц старческ старческ старческ старш старш старш старшеньк старш старш старш старш старш стар стар стар стар стар стар стаскива стат статейк статейк статейк стат стат статистик статн статочн статск статск статск статск стат стат стату стат стат стат стат стат стат стат сташн стащ стащ стащ стащ ста ста ствол ствол стебл стеган стегнул стез стека стекл стекл стекл стекл стекл стекл стекл стекля стекля стекля стекля стекля стеклярус стелет стелющ стел стемнел стен стен стен стен стенан стен стен стенк стенк стенк стен стен стен стен стен степанид степанид степан степен степен степен степен степ степн степн степн степн степн степн степ степ степ степ стер стерв стерегл стерегл стерегут стережет стереотипн стереотипн стерет стереч стерляж стерпел стерт стерт стеснен стеснен стеснен стеснен стеснен стесн стесн стесн стеснительн стесн стесня стесня стесня стесня стесня стесня стесн стесн сти стил стилистическ стил стимер стира стира стира стира стира стирк стискива стиснув стиснул стиснул стих стих стиха стих стих стих стих стих стих стихийн стих стих стихл стих стихотворен стихотворен стихотворц стлал стлал стлив сто стог стог сто сто сто сто сто сто сто сто стоическ сто сто стойк стойк стойл стойт стол стол стол стол столб столб столбик столбняк столбняк столб столбов стол столет столет столетн столечк столик столик столик столик столик столик столик столиц столиц столиц столиц столичн столкновен столкнувш столкнул столкнул столкнул столкова стол столов столов столов столов столов стол столп столп столп столп столп стол стол столыпин стол стольк стольк стольк столяр стон стон стона стона стон стон стонет стон стон стонут стон стоп стопта стопта стоп сторгова сторож сторож сторожев сторож сторож сторож сторож сторон сторон сторон сторон сторон сторон сторонк сторонк сторон сторон сторон сторублев сторублев стотысячн стотысячн стошн сто сто стоя стоя стоя стоя стоя стоя стоя стоя стоя стоя стоя сто стоя стояч стояч стоя стоя страв страда страда страда страда страда страда страда страдальн страдальческ страдальческ страдан страдан страдан страдан страдан страдан страдан страдан страдан страданьиц страдан страдан страда страда страда страда страд страж страж страж страж страж стран стран стран стран стран страниц страниц страниц страниц страниц страниц стран стран странник странник стран стран стран стран стран стран странност странност странност стран стран стран стран стран стран стран стран стран странств странствова странству стран страст страст страстишек страст страстн страстн страстн страстн страстн страстн страстн страстн страстн страстн страстн страстн страстн страст страст страст страст страусов страусов страх страх страх страх страх страх страх страх страш страш страш страшн страшн страшн страшн страшн страшн страшн страшн страшн страшн страшн страшн страшн страшн страшн страшн страща страща стрекоз стрекоз стрекочет стрел стрел стрел стрелк стрел стрелочник стрел стрельб стрельб стреля стреля стреля стреля стреля стреля стремгла стрем стрем стремительн стремительн стрем стрем стремлен стремлен стремлен стремлен стремлен стрем стрем стрем стрижен стрижен стриндберг стрич строг строг строг строг строг строг строг строг строг строг строг строг строг строг строгост строгост строгост строгост строг строг строев строев стро стро строж строител стро стро стро стройк стройк стройк стройн стройн стройн стройн стройн строк строк строк строк строк строптив строч строчк строчк строчк стро стро строя стру стру стру стру стру струйк струйк струн струн струн струн струн струн струн струс струс струс струс струс струс стру стру стру струя струя стрясл стряхива стряхнул стряхнут сту студенист студен студент студент студент студентик студент студент студент студенческ студенческ студенческ студенческ студен стуж стуж стуж стуж стук стук стука стука стука стукан стукан стука стук стукнет стукнеш стукнув стукнувш стукнул стукнул стукнул стукнул стукнут стук стук стул стул стул стул стул стул стул стул стул ступа ступа ступа ступа ступа ступен ступен ступен ступеньк ступеньк ступеньк ступеньк ступеньк ступен ступ ступ ступ ступ ступ ступк ступк стуч стуча стуча стуча стуча стуча стуча стуча стуч стучат стучат стуча стуча стуч стуч стуч стуч стуч стуч стушева стушевыва стыд стыд стыд стыд стыд стыд стыд стыд стыд стыдл стыдлив стыдлив стыдлив стыдлив стыдлив стыдлив стыдлив стыдлив стыдлив стыдлив стыдн стыдн стыд стыд стыд стыд стыдя стыдя стыж стыж стыл стынет стяг стяг стягива стягива стяжа стянул стянут стянут су суббот суббот суббот субъект субъект субъект сугроб сугроб сугуб сугуб суд суд судак сударын судар суд судеб судебн судебн судебн судебн судейск судейск судейск суд суд суд суд суд суд судк суд судопроизводств судопроизводств судорог судорог судорог судорог судорог судорожн судорожн судорожн судорожн суд судыр судьб судьб судьб судьбинск судьб судьб судьб судьб суд суд суд суд суевер суевер суеверн суеверн суеверн сует сует сует сует сует сует сует суетлив суетлив суетлив сует сует сует сует суеш суеш сужд сужден сужден сужден сужден сужден сужден сужд сужд сужив суж суздальц суз су сукн сукн сукон сул сул сул султа сул суля сумароков сумасброд сумасброд сумасшедн сумасшедш сумасшедш сумасшедш сумасшедш сумасшедш сумасшедш сумасшедш сумасшедш сумасшедш сумасшеств сумасшеств сумасшеств сумасшеств сумасшествова сумасшеств суматох суматох суматох суматох сумеет сумееш сумел сумел сумел сумерк сумерк сум сумеют сумк сумк сумлен сумлен сумлен сумм сумм сумм сумм сумм сумочк сумрак сумрак сумрак сумрач сумрачн сумрачн сумрачн сундук сундук сундук сундук сундук сундук сундучок сунет сунув сунул сунул сунут суп суп суп супруг супруг супруг супруг супруг супруг супружеск супружеск супружеск супружеств супружеств супружеств суп сургуч сургуч сургуч сурдин сурм сур суров суров суров суров суров суров суров суров суров суров суров суров суров суров сурок сусальн сусл сутк сутк суток суточн суточн суточн сутуловат сут сутяжничеств суфлер сух сухар сухар сухарик сухарик сухар сухар сухар сух сух сух сух сух сух сух сух сухост сух сучок суч сушен сушен сушен суш суш сушильн суш сущ сущ существ существ существ существен существен существен существ существова существова существован существован существован существован существован существова существ существ существу существ существует существ существ существ существ существ сущност сущност сфер сфер сфер сфер сфер сфер сфер сфер сфинкс сфинкс схват схват схват схват схват схват схват схват схват схват схватк схватыва схватыва схватыва схватыв схват схвач схвач схимник схитр схитр схлебнув сход сход сход сход сход сход сход сход сход сход сход сход сход сходк сходк сходк сходн сходн сход сходств сходств сходствова сходств сход сход сход сход сход схож схож схож схоластик схорон схорон схорон схорон схорон сцен сцен сцен сцен сцен сцен сцеп сцеп сцеплен счаст счаст счаст счаст счаст счастл счастлив счастлив счастлив счастлив счастлив счастливец счастлив счастлив счастлив счастлив счастлив счастлив счастливц счастлив счастлив счастлив счастлив счастлив счастлив счаст счаст счаст счаст счаст счел счест счет счет счет счет счет счет счет счет счет счита счита счита счита счита счита счита счита счита счита счита счита счита счита счита счита счита счит сшедш сшиб сшил сшит сшит съеда съеда съеда съед съедет съед съеж съеж съезд съезд съезд съезд съезд съезд съезжа съезжа съезжа съезжа съезжал съезжа съезжа съезжа съезж съел съел съем съест съестн съестн съестн съестн съест съеха съеха съеха съеш сыворотк сыгра сыгра сыгра сыгра сыгра сыгра сыгра сыгра сызмалетств сызнов сын сын сын сын сынк сын сыновн сыновн сын сын сын сыпа сыпа сыпа сыплет сыплющ сыпн сыр сыр сыр сыр сыр сыр сыр сырост сырост сырост сыр сыр сыр сыр сыска сыска сыска сыскн сыт сыт сыт сытост сыт сыт сыт сычих сычуг сыщ сыщет сыщеш сыщ сыщик сыщик сыщност сыщ сыщут сь сю сюд сюжет сюрприз сюрпризик сюрприз сюртук сюртук сюртук сюртучишк сюртучок сяд сядет сяд сядут сяд сядьт сяк сяк сям т та табак табакерк табакерк табакерк табак табак табачн табачн табачок таблиц таблиц таблиц табун тавариществ таверн тает таз та та та та та та та таин таинств таинствен таинствен таинствен таинствен таинствен таинствен таинствен таинствен таинствен таинствен таинств та тайк тайн тайн тайн тайн тайн тайник тайн тайн тайн тайн тайн тайн тайн тайн тайн тайн тайн тайн тайн тайт так так так такж так так так так так так таков таков таков таков таковск таков таков таков так так так так так так таксомотор такт такт тактик такт так так талант талант талант талантл талантлив талантлив талантлив талантлив талантлив талантлив талант талант талант талер тал тал тал тал тал тал тал тальк тальм тальмочк тальм тал там там тамар тамошн тамошн тамошн танец танц танц танц танц танц танцева танцева танцевальн танцева танц танц танцу танц тар тарака таракан таракан таракан тарака тарантас тарант тарантьев тарантьев тарантьев тарантьев тарар тарас тарас тараск тараск таращ таращат таращ тарелк тарелк тарелк тарелк тарелк тарелк тарелк тарелок таска таска таска таскал таска таска тас татар татарв татарин татарск татарск татарск татарск тат татья тафт тащ тащ тащ тащ тащ тащ тащ тащ тащ тащ та тают тающ та тая тая таян та та твар твар твар тверд тверд твердеют тверд тверд тверд тверд тверд тверд тверд тверд тверд тверд тверд тверд твердост твердост твердост тверд тверд тверд тверд тверд тверез тверж твер тво тво тво тво тво тво тво тво тво тво тво творен творец твор твор твор творим твор твор творог творц творческ творческ творческ творческ творческ творчеств твор тво тво те театр театр театр теб теб текл текл текл текст текст текут текущ текущ тел тел тел телег телег телег телег телегин телегин телеграмм телеграмм телеграмм телеграф телеграф телеграф телеграфирова телеграфирова телеграфир телеграфистк телеграфн телеграфн телеграфн телег тележк тележк тележк тележн телемак телесн телефон телефон тел тел тел тельц телят телятин телятин телятин телятин тем тем тем темен темен тем темляк темн темн темневш темневш темн темн темнел темненьк темнеют темнеющ темнеющ темн темновлас темн темн темн темнолик темн темнот темнот темнот темнот темн темн темн темн темн темн темн темн температур тем тем тем тен тен тенист тенист тенист тенист тенист тенор тен тен теодор теоретик теоретическ теор теор теор теорийк теор теор теор теор тепел теперешн теперешн теперешн теперешн теперешн теперешн теперешн теперешн теперешн теперешн теперич тепер тепл тепл тепл тепл тепл тепл тепл теплиц тепл тепл тепл тепл тепл теплот теплот теплот тепл тепл тепл тепл тепл тер тереб теребьев теребьев тереб тер терет терза терза терза терза терза терза терза терза терза терзан терзан терза терза терз термин терпевш терпел терпел терпелив терпелив терпелив терпен терпен терпен терпен терпет терп терп терп терп терпл терп террас террас террас террас террас террас теря теря теря теря теря теря теря теря теря теря теря теря теря теря теря тер тесемк тесемочк тес тесн тесн тесн тесн тесн тесн тесн тесн тесн теснот тесн тесн тесн тесн тесн тесн тесня тесов тест тест тестов тесьм тесьм тет тетеньк тетив тетив тетк тетк тетк тетк тетк теток тетрад тетрад тетрадк тетрадк тетрадк тетрадк тетрадк тетрад тетрад тетушк тетушк тет тех техник технолог технолог течен течен течен течен течет тешат тешат теш теш теш теш тещ тещ тиатр тигр тик тил тимофеевн тип типограф типограф тип тирад тирад тира тиран тирол тирольск тиск тиск тит тита титулярн титулярн тиф тиф тих тих тих тих тих тих тих тих тих тих тих тих тих тихоновн тихоньк тихоньк тих тих тициа тиш тиш тишин тишин тишин тишин тишин тишин тиш тка ткал ткан ткан ткан ткет ткнул тко тлеет тлеющ то тоб тоб товар товар товарищ товарищ товарищ товарищ товарищ товарищ товарищ товариществ товарищ товарищ товарн товар товар товар товар тогд тогдашн тогдашн тогдашн тогдашн тогдашн тог тож то токар токм толк толка толка толка толка толкал толк толка толка толк толка толк толк толкл толкл толкнув толкнул толкнул толкнут толк толкова толкова толкован толкова толков толков толков толкотн толк толк толкует толк толкуч толкуч толкуч толк толк толоч толп толп толп толп толп толп толп толп толп толп толп толп толп толп толст толст толст толстеет толстеньк толстеньк толстоват толст толстогуб толст толстот толст толст толст толст толст толст толстяк толч толчен толчет толчк толчк толчк толчок толщин тольк том том том том том томик том том том том том том том томительн томительн том том том том том томлен томлен томлен томлен томлен томлен томлен томл томн томн томн томн томноок томн томн томн томн томн том том том том том томя тон тон тон тоненьк тоненьк тоненьк тоненьк тоненьк тоненьк тоненьк тоненьк тонет тонк тонк тонк тонк тонк тонк тонк тонк тонк тонк тонкост тонкост тонкост тонк тон тонок тон тон тонул тонул тонут тончайш тончайш тончайш тончайш тон тон топ топа топ топ топ топ топлив топнув топнул топнул тополев тополев топол топол топол топол топор топор топор топор топорн топор топор топорщ топот топочет топта топта топчеш топч топчут топ топ торг торг торгова торгова торгова торгова торгова торговк торговк торговл торговл торгов торгов торгов торговц торговц торгов торг торг торг торжеств торжеств торжеств торжествен торжествен торжествен торжествен торжествен торжествен торжествен торжествен торжествен торжествен торжеств торжествова торжествова торжеств торжеств торжеств торжеств торжествующ торжеств торжеству тороват тороп тороп тороп тороп тороп тороп тороп тороп тороп тороп тороплив тороплив тороплив тороплив тороплив тороплив тороплив тороплив торопл тороп тороп тороп торт торт торф торча торча торча торча торча торчат торч торчк тоск тосканск тосканск тоск тоск тосклив тосклив тосклив тосклив тосклив тоскова тоскова тоскова тоск тоск тоск тоску тоск тоскуеш тоск тоск тоск тоск тоску тот тотчас точ точек точ точ точ точк точк точк точк точк точк точн точн точнехоньк точн точн точн точн точност точност точност точн точн точн точ тошн тошн тошнот тощ тощ тощ тощ то тра трав трав трав трав трав травк травк травл трав трав трав трагед трагед трагед трагик трагик трагическ трагическ трагическ трагическ трагическ трактат трактир трактир трактир трактир трактиришк трактиришк трактирн трактирн трактирн трактир трактир трактир трам трамва транспарант трат трат трат трат трат трат трат трат трат трат трат траур траур траур траурн траурн траурн траурн траурн траурн траурн трахтир трач требова требова требова требова требова требован требован требован требован требован требова треб требуем треб треб требуеш треб треб требух треб треб треб треб требу тревог тревог тревог тревог тревог тревог тревог тревог тревог тревож тревож тревожат тревож тревож тревож тревож тревож тревож тревож тревож тревож тревож тревож тревожн тревожн тревожн тревожн тревожн тревожн тревожн тревожн тревожн тревож тревож тревож тревожьт треволнен треволнен трезв трезв трезв трезв трезвон трезв трезв трезор треклят треклят трельяж трем тремяст тренька трепа трепан трепан трепа трепет трепет трепета трепета трепета трепета трепета трепета трепетан трепетан трепет трепетн трепетн трепетн трепет трепет трепещ трепещет трепещут трепещущ трепещущ трепещущ трепещущ трепещущ трепещущ трепещущ трепл треплев треплев треплев треплет треплют треплют трепл треск треска треск треск трескотн трескотн трескуч трет трет третн трет трет трет третьегодичн третьегодичн третьегодн трет трет трет трет трет трет трет трет трефов трех трехлетн трехлетн трехлетн трехрублев трехрублев трехрыбн трехсот трехст трехударн трехэтажн трещ треща треща треща треща трещат треща трещин трещин трещин трещ три трибун трибун тригорин тригорин тригорин тригорин тридцат тридцатипятилетн тридцатирублев тридца тридцат трижд тризн тринадцат тринадцатилетн тринадцат тринадца трист триумф трихин трога трога трога трога трога трога трога трога трога трогательн трогательн трогательн трогательн трога трога трога трог тро троек троекратн троекратн троекратн троекратн трож тро тро тро троиц троицын тройк тройк тройк тройн тронет тронет тронеш тронул тронул тронут тронут тронут тронут тронут трон троньт троп троп тропинк тропинк тропинк тропинк троп троп троп тросточк трост трост троттуар троттуар тротуар тротуар тротуар тротуар тротуар тротуар тротуар трофим трофимов трофимович трофимов трофимов трофимыч трофимыч троюродн тррреклят труб труб труб труб труб трубк трубк трубк трубк трубк трубк трубн трубн трубочист трубочк труб труб труд труд труд труд труд труд труд труд труд труд труд труд трудн трудн трудн трудн трудн трудн трудност трудност трудн трудн трудн трудн трудн труд трудов трудов трудов трудов трудолюбив трудолюбив трудолюбив трудолюб трудолюб труд труд труд труд труд труженическ труж труп трупн труп труп трус трус трус трус трусишк трусл труслив труслив труслив трусоват трусост трын трюфел тряпк тряпк тряпк тряпк тряпок тряп тряп трясет трясл трясл трясл тряс трясут тря тряс тряхнул тс тсс тся ту туалет туалет туалетн туалет туг туг туд туж туж туз тузенб тузенбах тузенбах тузенбах туз туз туз туловищ туловищ тулон тулон тулуп тулуп тульск тума тума туман туман туман туман туман туман тума тума тума тума тума тума тума тума тума туман туман тума туман тумб тумб тунеядств туп тупеет туп тупик тупиц туп туп туп тупост тупоум тупоумн туп туп тур тур турген тургенев тургеневск турецк турецк турецк турецк турист турк тускл тускл тускл тускл тускл тускл тускл тускнел тут туфл туфл туфл туфл тухл тухл туч туч туч туч туч туч туч тучк тучк тучк тучн туч туш тушат туш туш тш тщательн тщательн тщеслав тщеслав тщеслав тщеслав тщеславн тщет тщетн тщетн ты ты тыка тыкв тысяч тысяч тысяч тысяч тысяч тысяч тысячелет тысячелет тысячелетн тысяч тысячн тысяч тысяч тысящ тычет тычут тьма тьме тьмо тьму тьмы тьфу тюк тюлев тюмен тюменев тюремн тюремн тюремн тюрьм тюрьм тюрьм тюрьм тюрьм тюрьм тютчев тюфяк тягл тяг тягост тягостн тягостн тягостн тягостн тягост тяготеют тягот тягот тягот тягот тягот тягот тяготя тяг тяжб тяжб тяжебн тяжел тяжел тяжел тяжел тяжел тяжеленьк тяжел тяжел тяжеловес тяжел тяжел тяжел тяжел тяжел тяжел тяжел тяжел тяжел тяжел тяжел тяжест тяжест тяжк тяжк тяжк тяжк тяжк тяжк тянет тянет тянул тянул тянул тянул тянул тянут тянут тяп у у уайльд убав убав убаюка убаюкива убаюкива убега убега убега убега убег убег убед убед убед убедительн убедительн убедительн убедительн убед убед убед убед убежа убежа убежа убежа убежда убежд убежд убежд убежден убежден убежден убежден убежден убежден убежден убежден убежден убежден убежден убежден убежд убежден убеж убеж уб уб убережеш уберет убереч убереш убер убер уб убива убива убива убива убива убива убива убивал убива убива убив убивец убивств убийств убийств убийств убийств убийств убийц убийц убийц убийц убийц уб уб уб убира убира убира убира убира убира убиран убира убира убира убир уб убит убит убит убит убит убит убит убит убит убит убит уб убог убог убор убор убор уборк уборк уборк уб убоя убра убра убра убра убра убра убра убра убра убра убра убранств убранств убра уб убытк убытк убытк убытк убыток уб убьет убьет убьеш убьеш уб уважа уважа уважа уважа уважа уважа уважа уважа уважа уважа уважа уважа уважа уважа уважа уважа уважен уважен уважен уважен увар увед увед уведом уведом уведом уведомл уведомлен уведомля уведомля увез увез увез увезл увезл увезт увез увезш увековеч увел увел увеличива увеличива увеличива увелич увелич увелич увелич увенча увенча увер увер уверен уверен уверен уверен уверен уверен уверен уверен уверен уверен увер уверен увер увер увер уверительн увер увер уверова уверова уверова увертел увертк увертк увертыва увер увер уверя уверя уверя уверя уверя уверя уверя уверя увер увеселен увеселительн увеселительн увеселительн увеселя увесист увест увест увечн увещев увида увида увида увида увида увида увид увидел увидел увидел увидет увид увид увид увид увид увид увид увид увиж увиж увит увлек увлека увлека увлека увлека увлекательн увлека увлека увлека увлек увлекл увлекл увлекл увлек увлек увлеч увлечен увлечен увлечен увлечен увлечен увлечен увлеч увлеч увлеч увод увод увоз увол увол увольня увольт увраж ув увяда увяда увяд увядш увядш увяз увяза увязнув увязыва увя увя угада угада угада угада угада угада угада угада угада угада угада угадыва угадыва угадыва угадыва угадыва угадыва угадыва угадыв угар угарн угас угаса угасл угасл угасл угаснет угаснувш угасш угл угл угл угл угл угловат угловат угловат угловат углов угл угл углуб углуб углуб углуб углубл углублен углублен угл угл угнета угнета угнет уговарива уговарива уговарива уговарив уговор уговор уговор уговор уговор уговор уговор уговор угод угод угодлив угодн угожден угожден угол уголк уголк уголк уголк уголк уголовн уголовн уголок угол угол угольк угольн угол угорел угорел угор угост угоща угоща угоща угощен угощен угощен угощ угрожа угрожа угрожа угрожа угрожа угрожа угроз угроз угроз угроз угроз угроз угрыза угрызен угрызен угрюм угрюм угрюм угрюм угрюм угрюм угрюм угрюм угрюмств угрюм угрюм угрюм угрюм угр удава удава удав удав удав удав уда уда удален уда удал удал удал удал уда уда удал удаля удаля удаля удаля удар удар удар удар удар ударен удар удар удар удар удар удар удар удар удар удар удар удар удар удар удар удар удар удаст удач удачн удачн удвоен удвоен удвоен удвоива удво удво удво удво удел удержа удержа удержа удержа удержа удержа удержан удержа удержа удержива удержива удержива удержива удержива удержива удержива удержив удержив удерж удерж удерж удерж удив удив удив удив удив удив удив удив удив удивительн удивительн удивительн удивительн удивительн удивительн удивительн удивительн удивительн удив удивл удивлен удивлен удивлен удивлен удивлен удивлен удивлен удивлен удивлен удивлен удивлен удивлен удивлен удивл удивля удивля удивля удивля удивля удивля удивля удивля удивля удивля удивл удив удив уд уд уд удобн удобн удобн удобн удобн удобопонятн удобств удобств удовлетвор удовлетвор удовлетворен удовлетворен удовлетворен удовлетворен удовлетворен удовлетворен удовлетворен удовлетвор удовлетвор удовлетворительн удовлетворительн удовлетворительн удовлетворительн удовлетворительн удовлетворительн удовлетворительн удовлетвор удовлетвор удовлетвор удовольств удовольств удовольств удовольств удовольств удовольствова удостаива удостоверен удостовер удостовер удостоив удосто удосто удосто удочк удочк удочк удра удручен удручен удушлив удушлив удушлив удуш удуш уд уед уедет уедет уедеш уединен уединен уединен уединен уединен уединен уединен уединен уедин уедин уед уедут уезд уезд уезд уезд уездн уездн уездн уездн уездн уезд уезжа уезжа уезжа уезжа уезжа уезжа уезжа уезжа уезжа уезжа уезжа уезжа уезжа уезж уеха уеха уеха уеха уж ужален ужал ужал ужас ужас ужаса ужаса ужас ужас ужаса ужас ужас ужас ужасн ужасн ужасн ужасн ужасн ужасн ужасн ужасн ужасн ужасн ужасн ужасн ужаснувш ужаснул ужаснул ужасн ужасн ужасн ужасн ужасн ужас ужас ужас ужас уж ужел ужел ужива ужива ужин ужин ужина ужина ужина ужина ужина ужина ужина ужин ужин ужин уж уз узаконен уздц узел узелк узелок узеньк узеньк узеньк узк узк узк узк узк узк узк узл узл узл узл узл узна узнава узнава узнава узнава узнав узна узна узна узна узна узна узна узна узна узна узна узна узна узор узор узор узорн узорн узорн узорн узор узор узор узост узр узр уз уйд уйдемт уйдет уйдет уйдеш уйд уйд уйд уйдут уйм уйм уйт укажет укажет укаж укаж указа указа указа указа указан указан указан указан указан указа указательн указательн указа указк указыва указыва указыва указыва указыва указыва указыв укачив уклад укладк укладк укладк укладк укладк укладк укладок укладыва укладыва укладыва укладыва укладыва уклон уклон уклончив уклон уклоня уклоня уклоня уклоня уклоня уклон укокош укор укор укорен укоризнен укоризн укоризн укор укор укоря укоря укоря укоря укоря укоря укоря украден украден украдк укра укра украс украст укр украша украшен украшен украшен укреп укреп укреп укрепля укрепля укрепля укроет укрощ укрыва укр укр уксус уксус уксус укус укус улад улад улад улад улад улан улегл улегл улег улегш улета улета улета улета улетевш улетел улетет улет улет улет улизнет улизнул улизнут улик улик улик улик улик улисс улиц улиц улиц улиц улиц улиц улиц улиц улича уличк уличн уличн уличн уличн уличн уличн уличн уличн уличн уличн улов улов уловим улов улов уловк уловк уловля улож улож улож улож улож улуч улучшен улучшен улучшен улучш улыба улыба улыба улыба улыба улыба улыба улыба улыба улыба улыба улыба улыба улыба улыб улыбк улыбк улыбк улыбк улыбк улыбк улыбк улыбк улыбнет улыбнет улыбнеш улыбнувш улыбнул улыбнул улыбнул улыбн улыбнут улыбок улыбочк ум ум умалива умаливан умалчива умаля умая умбр умбрск ум умевш уме умеет умеет умееш ум умел умел умел умельч ум умен умен уменьшен уменьшен уменьш умен умер умерен умерен умерен умерен умерен умерен умерен умерет умерл умерл умерл умертв умертв умерш умерш умерш умерш умерш умерш умерш умерш умет умеща ум умеют уме умилен умилен умилен умилен умиля умира умира умира умира умира умира умира умира умира умира умира умир умир умн умн умн умн умн умненьк умненьк умник умник умник умниц умниц умниц умнича умничан умнича умн умн умн умн умн умн умн умн умн ум умозрен умозрительн умозрительн умозрительн умол умолк умолк умолка умолка умолка умолка умолк умолкл умолкш умолот умолот умолоч умолча умолч умол умоля умоля умоля умоля умоля умоля умоляющ умоля умоля умол ум умопомешательств умопомешательств умор умор ум умр умрет умрет умреш умр умр умр умрут умствен умствен умствен умствен умствен ум умча умча умч ум умыва умыва умыва умывальн умыва ум ум умысел умысл умысл умысл ум умышлен умышлен умышлен унес унесен унес унесет унес унесл унесл унест унесут университет университет университет университетск университетск университетск университет университет унижа унижа унижа униж унижен унижен унижен унижен унижен униж унижен унижен униз унизительн унизительн унизительн унизыва унима унима унима унима унима унима уничижен уничижен уничтожа уничтож уничтож уничтожен уничтож уничтож уничтож уничтож уничтож уничтож уничтож уничтож унос унос унос унос унос унос унося унтер ун уныл ун уныл уныл уныл унын унын унын унын уня уня упа упа упа упада упада упадет упадет упад упадк упадк упадок упад упадут упа упа упа упа упасл упаст упер уперл уперл упер упеч упива упив упира упира упира упира упира упир упир уписа уписа уп уплат уплат уплат уплат уплат уплат уплат уплывеш упл упован упова уподоблен упо упоен упоительн упоко упок уполномоч уполномоч упомина упомина упомина упомина упомина упомина упомина упомн упомн упомянет упомян упомян упомянул упомянул упомянул упомянут упомянут упомянут упор упорн упорн упорн упорн упорн упорн упорн упорн упорств упорств употеб употреб употреб употреб употреблен употреблен употреблен употребл употребля употребля употребля употребля употребл управ управ управител управ управ управлен управлен управлен управл управля управля управля управля управля управля управля управля управл управ управ упраздн упрашива упрашива упрашива упрашиван упрашиван упрашива упрашив упрек упрек упрека упрека упрека упрека упрека упрек упрек упрекнеш упрекнул упрекнул упрекнут упрек упрек упрет упрет упрос упрос упрос упроч упроч упруг упруг упруг упрыга упрямец упрям упрям упрямств упрямств упрямств упрям упрям упрята упуска упуска упуска упуска упущен упущен упущен упущ упыр упьет ур урага уразумел уразумен урезон урн урод урод урод уродлив уродлив урод урожа урожа урожден урок урок урок урок урок уронен уронен урон урон урон урон урон урон урон урон урочн урывк урывочк ус усадеб усад усад усад усад усадьб усадьб усадьб усадьб усажива усажива усажива усажива усажива усажива усажив усажив ус усат усахар усач усач усач усво усво усел усел усел усерд усерд усерд усердн усердн усердн усердн усидел усидел усидет усид усид усик усилен усилен усилен усилен усилен усилен усилива усилива усилива усилива усилив усил усил усил усил усил усил усил усил усил усил ускольза ускольза ускольз ускользнет ускользнул ускользнут ускорен ускоря ускор услад услад услад услажда услажда усла услед услов услов услов услов услов услов условн условн услуг услуг услуг услуг услужлив услужлив услыха услыха услыха услыха услыха услыш услыша услыша услыша услыша услышат услыша услыш услыш услыш услыш услыш услыш усмеха усмех усмехнет усмехнувш усмехнул усмехнул усмехнул усмешк усмешк усмешк усмешк усмотрет усмотр уснет уснеш усн усн уснул уснул уснут уснут ус усовещива усомн усопш усопш усопш усп успева успева успева успев успевш успевш успе успеет успеет успееш успел успел успел успен успен успет успех успех успех успех успех успех успех успех усп успеют успокаива успокаива успоко успокоен успокоен успоко успокоива успокоива успокоива успокоива успокоива успоко успоко успоко успоко успоко успоко успоко успоко успоко успокоительн успокоительн успокоительн успокоительн успокоительн успокоительн успоко успоко успоко успоко успок успок успокойт успок успок успоко уст уст устава устава устава устава устава устав устав устав устав устав устав устав устав устав уставлен устав устав уста уста уста устав уста уста устал уста уста устал устал устал устал устал устал устал устал устал устал устал устанет устанеш установ установ установ установ установител установител установ установ установл установлен установл устан устанут устаревш устареет устарел уст уста устила устла устла усто усто устоя устраива устраива устраива устраива устран устран устраня устраня устран устран устрем устрем устрем устрем устремл устремл устремлен устремлен устремлен устремлен устремл устремля устремля устремля устриц устро устро устроен устроен устроен устро устро устро устро устро устро устро устро устро устро устро устройств устройств устройств устройств устройств устр устро уступа уступа уступа уступа уступа уступ уступ уступ уступ уступ уступ уступчив устыд усумн усчита ус усыпа усыпа усып усып усыплен усыпля усядет усяд утайк утаскив утащ утащ утверд утверд утвердительн утвержда утвержда утвержда утвержда утвержд утвержд утекл утер утер утерпел утерпел утерпет утечет утеша утеша утеша утеша утеша утеша утеша утешен утешен утешен утешен утешен утешен утешен утешен утеш утеш утешительн утешительн утеш утеш утеш утеш утира утира утира утира утир утиха утиха утиха утихл утихл утихл утихнет утк уткнув утл утол утол утом утом утом утом утом утомл утомл утомлен утомлен утомлен утомлен утомлен утомлен утомлен утомлен утомлен утомлен утомл утомл утомл утомля утомля утомля утонувш утонувш утонул утонул утонут утонут утончен утончен утончен утопа утопа утопа утопа утопа утоп утоп утоп утоп утоп утоп утоп утопленниц утопленниц утопленниц утороплен утороплен утр утр утр утрат утрат утрат утрат утрат утрат утрат утрачен утрачен утреет утрен утрен утрен утрен утрен утренник утрен утрен утрен утречк утр утр утр утр утружда утружден утучня утыка уф ух ух ухажива ухажива ухажива ухажива ухаживан ухват ухват ухват ухват ухват ухват ухватк ух ух ухитр ухитр ухитря ухитря ухищрен ухлопа ухлопа ухлопа ухмыля ухмыл ухн ух уход уход уход уход уход уход уход уход уход уход уход уход уход уход уход уходя уходя ухож ух ух уцелевш уцелевш уцелел уцелел уцелел уцеп уцеп уцеп уцеп уч участвова участвова участвова участв участ участ участ участ участ участ участк участник участник участниц участок участ участ учат учат учащен учащен учебн учебн учен учен учен ученик ученик ученик ученическ учен учен учен учен учен учен учен учен учен учен учен учен учетвер уч уч уч уч уч уч уч уч уч учин уч уч учител учител учител учительниц учительниц учител учител уч уч уч уч уч учнет учрежда учтив учтив уч уч уш уш ушат уш уш ушел уш ушиб ушиб ушибет ушибл ушибл ушиблен ушибл ушк ушл ушл ушл ущел ущерб ущербн ущипнет ущипнул у уют уют уютн уютн уют уязв уязв уязв уязвл уязвлен уязвля уясн уясн уясн ф фабрик фабрик фабрик фабрик фабрик фабричн фабричн фабричн фаддеевн фаддеевн фазис факел факел факт факт факт факт факт факт фактическ фактическ фактическ факт факт факт факультет факультет факультет фаланстер фаланстер фаланстер фалд фальш фальшив фальшив фальшив фальшив фальшив фальшив фальшив фальшив фальшив фальшив фальшив фальшив фамил фамил фамил фамильн фамильн фамильн фамильяр фамильярн фамильярн фамильярн фамильярн фамильярн фанатизм фанатизм фант фантазерк фантаз фантаз фантаз фантаз фантаз фантаз фантаз фантаст фантастич фантастическ фантастическ фантастическ фантастическ фантастическ фантастическ фантастичн фантастичн фанфарон фанфаронишк фанфаронств фанфарон фартук фартук фартучек фарфор фарфоров фарфоров фарширова фасон фат фат фатальн фатер фатер фатер фатеришк фатер фатер фат фат фат фат фаун фауст фауст феврал феврал феврал федерац федор федор федоровн федоровн федоровн федос федот федотик федотик федотик фед федяев фейерверк фейерверк фельдшер феном феноменальн ферапонт ферапонт ферапонт ферапонт ферм фермер фермуар ферм ферул феск фестон фестон фестон фет фефел фи фиалк фигляр фигур фигур фигур фигурк фигурк фигур фигур фигур физиологическ физиолог физиолог физионом физионом физионом физионом физионом физическ физическ физическ физическ физическ физическ физическ фил филин филипп филипп филипп филиппик филипп филипп филистер философ философ философистик философ философ философ философск философск философствова философствова философству философств философствует философств философств философ фильк фим финанс финансист финансов финик финифтян финлянд финлянд финлянд финск финск фиолетов фирс фирс фирс фирс фистул флакон фламандк фланг фланелев фланер флегматическ флейт флер флеров флер флигел флигел флигел флигел флобер флобер флорентийск флорентийск флоренц флоренц флоренц флор флот фокус фокус фом фомин фомич фомич фомич фомич фомич фомич фон фонар фонар фонар фонар фонарик фонарн фонар фонар фонд фонд фон фонта фонтан фонтан фон форел форел форел форм форм формалистик формальн формальн форм форм форм форм формен формен формен формен формирова формирова формир форм форм формулирова формулярн форм фортепиан фортепиа фортепья фортепья форточек форточк форточк фортун фортун фотограф фотограф фотограф фотограф фра фраз фраз фраз фраз фраз фразер фраз фраз фрак фрак фрак фрак франс франт франт франт франт франт франц францевн францевн францевн франц франциск франц француженк француженк француженок француз француз француз француз французск французск французск французск французск французск французск французск французск француз фрегат фреск фри фривольн фрукт фрукт фу фу фунт фунт фунт фуражечк фуражк фуражк фуражк фуражк фуражк фуражк фурор фур футляр футляр футляр футляр футляр футляр футуризм футуризм футурист фуфайк фуфайк фырка фырка фыркнул фыркнул фье фьезол х ха хажива халат халат халат халат халат хам хандр хандр хандр хандр хандр ханж ханск хаос хаос хаос характер характер характер характеристик характеристик характеристик характерн характерн характерн характерн характерн характер характер характер хар хар харка харлам харламов харчев харчевн харчевн харчевн харчевн харьк харьков харьков харьковск харькоев хат хвал хвал хвал хвал хвалим хвал хвал хвал хвал хвал хвал хваста хваста хваста хвастлив хвастун хвата хвата хвата хвата хвата хвата хвата хвата хвата хвата хват хват хват хват хват хват хват хват хват хват хвора хвора хвора хвор хвост хвостат хвост хе хен херасков херес херес херувим херувим херувимов хижин хижин хижин химер химер хин хирург хитер хитон хитр хитр хитр хитр хитр хитр хитрец хитрец хитр хитр хитр хитр хитр хитр хитрост хитростн хитрост хитрост хитрост хитр хитр хитр хитр хихика хихика хихика хихикан хихикан хихика хихик хищник хищниц хищн хищн хлад хлад хлад хладн хладнокров хладнокров хладнокров хладнокровн хладнокровн хладн хладн хладн хлам хлам хлеб хлеб хлеб хлебник хлебник хлебнувш хлебн хлеб хлебц хлеста хлестнул хлещет хлещ хло хлоп хлопа хлопа хлопан хлопан хлопа хлопнул хлопов хлопот хлопота хлопота хлопота хлопота хлопот хлопот хлопота хлопот хлопотлив хлопотлив хлопотлив хлопотун хлопот хлопоч хлопочет хлопочет хлопоч хлопоч хлопочут хлоп хлороформ хлынет хлынувш хлынул хлынул хлынул хлынул хлынут хлын хлыст хлыст хлыстик хлыст хме хмелел хмел хмел хмельн хмельн хмел хмел хмур хмур хмур хмур хмур хмур хмурьт хмур хныка хнычет хнычеш хнычущ хо ход ход ходата ход ход ход ход ход ход ходил ход ход ход ход ход ход ходьб ходьб ход ход ходяч ходя хожал хожден хожден хожден хожден хож хоз хозя хозяев хозяев хозяин хозяин хозяин хозяин хозяйк хозяйк хозяйк хозяйкин хозяйкин хозяйкин хозяйкин хозяйк хозяйк хозяйнича хозяйнича хозяйнича хозяйск хозяйск хозяйск хозяйск хозяйск хозяйск хозяйск хозяйск хозяйств хозяйств хозяйствен хозяйствен хозяйствен хозяйствен хозяйствен хозяйствен хозяйств хозяйств хозяйств хозяюшк холер холер холм холм холм холм холм холмик холм холм холод холод холод холодеет холодел холод холодеющ холоде холодк холодн холодн холодн холодн холодн холодн холодн холодн холодн холодн холодн холодн холодн холодн холодн холодн холодок холод холод холод холоп холост холост холост холст холстин холстин холстин хомут хомут хор хор хорват хор хорек хоровод хоровод хоров хор хорон хорон хорон хорош хорош хорош хорош хорош хорош хорош хорошеньк хорошеньк хорошеньк хорошеньк хорошеньк хорошеньк хорошеньк хорошеньк хорошеньк хорош хорош хорош хорош хорош хорош хорош хор хорунжин хотевш хотевш хотел хотел хотел хотел хотел хотел хотет хот хот хот хот хот хохл хохл хохлушк хохот хохот хохота хохота хохота хохота хохот хохот хохоч хохочет хохочеш хохочут хочет хочет хочеш хоч хош храбр храбрец храбр храбр храброст храм храм хран хран хран хран хран хран хран хран хран хран храп храпен храпен храпет храп храп хребт хрен хрен хрен хрипел хрипен хрипет хрип хрипл хрипл хрипл хрип христ христ христианин христианск христианск христ христов христ христос христ хром хром хром хроник хроническ хрупк хрупк хрупк хруст хруст хруста хрустал хрустальн хрустальн хрустальн хрустальн хрустальн хрустальн хрустальн хрустел хруст хруст хрычовк худ худ худ худеньк худеньк худеньк худеньк худ худ художествен художествен художествен художествен художествен художествен художествен художеств художник художник художник художник худ худосочн худоща худощав худ худ худ хуж хулен хул хуторк хуторок хуторочк цапл цар царевн царевн царевн царевн цар цар цариц цариц цариц царск царск царск царск царств царствен царств царств царств царствова царствова царств царств царств цар царьград царьградск цар цар цвел цвел цвел цвел цвет цвет цвет цвет цвет цвет цветен цветет цветеш цветк цветк цветк цветк цветн цветник цветник цветник цветн цветн цветн цветн цвет цветок цветочк цветочк цветочк цвет цветут цветущ цветущ цвет цезар цел цел целебн цел цел цел целков целков целков целков целова целова целова целова целова целован целова целова цел цел цел цел цел целомудр целомудрен целомудрен целомудрен целомудрен целомудр целомудр целомудр целостн целост цел цел цел цел целуеш цел цел цел цел целу цел цел цел цел цел цел цел цельн цельн цельн цел цел цел цен цен цен цен цен цен цен цен цен цен цен центр центр центр цен цен цен цен цен цеп цепене цеп цепк цепля цепн цепочк цепочк цепочк цепочк цепочк цепочк цеп цеп церемон церемон церемон церемон церемон церемон церкв церкв церкв церковн церковн церков церков цехов цивилизова цикор цикор цилиндр цилиндр циммерма циммермановск цинизм цинизм циник циник циник циническ циническ циничн циничн цирк циркуляц цирф цирюльник цитат цит цифр цифр цифр цифр цицикар цокан цссс цугундер цуниг цусим цыганк цыганк цыганск цыганск цыганск цыганск цыганск цып цыпленк цыпленок цыпл цыплят цыплят цыплят цыпочк цыпочк цыпочк ча чад чад чад чадин чадр чад ча ча ча чаишк ча чайк чайк чайк чайк чайн чайник чайник чайник чайниц чайн чайн чайн чайн чайн чалм чалм чар чародейн чарод чар чар чар чар час час час час час часик часик час часов часовенк часовн часовн часовн часов часок час частеньк част част частн частн частн частн частност частн частн частн частн част част част част част част част част час час чахнет чахнут чахнут чахотк чахотк чахотк чахотк чахотк чахоточн чахоточн чахоточн чахоточн чахоточн чахоточн чахоточн чаш чаш чашек чашечк чаш чашк чашк чашк чашк чашк чаш чащ чащ чащ ча ча ча че чебар чебаров чебаров чебаров чебутыкин чебутыкин чебутыкин чебутыкин чебутыкин чег че чекмен чел челк челк челн челнок чел человек человек человек человек человек человек человек человечек человеческ человеческ человеческ человеческ человеческ человеческ человеческ человеческ человеческ человеческ человеческ человечеств человечеств человечеств человечк человечн человечн человеч чел чем чемода чемода чемодан чемодан чемода чем чепец чепрак чепух чепух чепух чепц чепц чепц чепц чепц чепц чепчик чепчик чепчик чепчик чер червов червон червон черв червяк чердак чердак черед чередова черед через черемух черемш черемш чер черенк череп череп черепах черепк черепк черепок череп череп чересчур черн черн черн чернеет чернеет черн чернел чернел черненьк чернеют черн черн черн чернил чернильниц чернильниц чернильниц чернильниц чернильниц черн чернобров черноволос черноглаз черн черн черноземн черн черн черн черноок чернорабоч чернот чернот чернот черн черн черн черн черн черн черн черн черпа черпа черпнут черств черств черств черт черт черт черт черт черт чертеж чертеж чертеж черт черт чертик черт черт черт черт чертовск чертог черт черточек черточк черточк черточк черт черт черт черт чеса чеса чеса чеснок честв чест чест чест чест честн честн честн честн честн честн честн честн честн честн честност честност честн честн честн честн честн честн честн честолюб честч чест чест чет чет четверг четверг четверг четвер четвертак четверт четверт четверт четверт четверт четверт четверт четверт четверт четверт четверт четверт четвер четк четкост четыр четыр четырест четырех четырехугольник четырехэтажн четырехэтажн четырнадцат четырнадцатилетн четырнадцатилетн четырнадца чехартм чехартм чехл чехл чех чеченск чешет чешет чешут чеш чи чиж чиж чик чикан чин чин чин чин чин чин чин чин чин чин чин чин чин чин чиновник чиновник чиновник чиновник чиновник чиновник чиновник чиновниц чиновниц чиновниц чиновничеств чиновнич чиновнич чиновн чиновн чиновн чиновн чин чин чин чирика чирика чисел числ числ числен числ числ числ числ чист чист чист чист чист чистеньк чистеньк чистеньк чист чист чист чист чистк чист чист чист чист чист чист чистоплот чистоплотн чистосердечн чистосердечн чистот чистот чистот чистот чистот чист чист чист чист чист чист чист чист чист чистюльк чит чита чита чита чита чита чита чита чита чита чита чита читал читальн читальн читател читател читател читател чита чита чита чит читк чит чихан чиха чихнул чихнут чищ чищ чищен чищен чищ член член член член член член член чмока чока чопорн чопорн чопорн чорт чорт чревовещательниц чред чрез чрезвычайн чрезвычайн чрезвычайн чрезвычайн чрезвычайн чрезвычайн чрезвычайн чрезвычайн чрезвычайн чрезмерн чрезмерн чт чтен чтен чтен чтен чтен чтим чтит чтит что чтоб чтоб что чту чу чубук чуб чув чувств чувств чувств чувств чувств чувств чувствительн чувствительн чувствительн чувствительн чувствительн чувствительн чувствительн чувствительн чувствительн чувств чувствова чувствова чувствова чувствова чувств чувств чувству чувств чувствует чувств чувствуеш чувств чувств чувств чувству чугун чугун чугун чугунк чугун чугун чугун чугун чуд чудак чудак чудак чудак чудак чудачеств чудачк чуд чуд чудес чудес чудес чудесн чудесн чудесн чудесн чудесн чудесн чудесн чудесн чудесн чудесн чуд чудищ чудищ чудн чудн чудн чудн чудн чудн чудн чуд чудовищ чудовищ чудовищ чудовищ чудовищ чудовищн чудовищн чудовищн чудовищн чудовищн чудовищн чуд чуд чуд чует чуеш чуж чужд чужд чужда чужда чужд чужд чужд чужд чужд чуж чуж чуж чуж чуж чуж чуж чуж чуж чуж чула чулан чула чулк чулк чулк чулк чулк чулок чулочк чулочн чум чумичк чумичк чурба чутк чутк чутк чутк чутк чуткост чутошн чут чухонец чухонк чухонк чухонц чухонц чучел чучел чуш чуют чу чуя чье чьег чье чьи чьим чьим чьих чья шабаш шаг шаг шага шага шага шага шаг шаган шаг шага шага шаг шаг шаг шагн шагнул шагнул шагнут шаг шаг шаг шажищ шайк шайк шакал шал шал шал шал шаловл шаловлив шаловлив шалост шалун шалун шал шал шал шал шал шама шампанск шампанск шампанск шампанск шамра шамраев шамраев шамшевк шанс шапк шапк шапк шапк шапочк шапочк шар шар шар шар шар шар шаркнул шаркнул шарлатанств шарлотт шарлотт шарлотт шарманк шарманк шарманк шарманк шарманщик шарманщик шарманщик шарманщик шармер шар шар шаровар шар шарфик шаршав шар шар шата шата шата шата шата шатан шата шата шата шат шатер шатк шатк шафер шахматов шахт швам швах швейцар швейцар швейцар швейцарск швейцарск швейцар швыря швыря швыр ше шевел шевел шевел шевел шевел шевел шевел шевел шевел шевел шевельнет шевельнувш шевельнул шевельнул шевельнул шевельнут шевел шевел ше ше ше шейн шекспир шекспир шел шелест шелест шелест шелест шелк шелк шелк шелковист шелков шелков шелков шелков шелков шелков шелков шелк шеловек шелом шелопаев шелохнут шельм шельм шельм шепнет шепнеш шепнул шепнул шепот шепот шепотлив шепот шепот шепта шепта шепта шепта шептан шепта шепч шепч шепчет шепчут шепчут шепчущ шерстк шерст шерстян шерстян шерстян шерстян шест шест шеств шестер шест шестидесят шестнадцат шестнадцатилетн шестнадцатилетн шестнадца шест шест шест шест шест шест шестьдес шестьсот ше ше ши шикан шил шиллер шиллер шиллеровск шил шил шил шинел шинел шипевш шипел шипен шип шипк шиповник шип шипя шир ширин ширин ширин ширин шир ширм ширм ширм ширм ширм широк широк широк широк широк широк широк широк широк широк широк широк широк широк широк широк шир шир шит шит шит шит шит шит шит шиш шиш шишк шкалик шкандал шкап шкап шкап шкатулк шкатулк шкатулк шкаф шкаф шкаф шкафик шкаф шкаф шкафчик шкаф школ школ школ школ школ школьник школьник шкур шла шлафрок шлафрок шлезвиг шле шлейф шлем шлепохвостк шлепохвостниц шлет шлеш шли шло шлю шлют шляеш шлял шлял шляп шляп шляп шляпк шляпк шляпк шляпк шляпник шляп шляп шляп шлярк шлят шмел шмыга шмыга шмыгнул шнурк шнурочек шныря шныря шопенгауэр шопот шопот шопот шопот шорка шоркнул шорох шосс шоссейн шоссейн шотландск шотландск шпаг шпал шпил шпилек шпильк шпор шпор шпор шпыня шта штаб штабн штабс штан штан штат штат штат штатск штатск штатск штатск штатск штатск штат штейн штиблет што штольн штольц штольц штольц штольц штольц штольц штопа штор штор штор штор штор штоф штоф штраф штрих штрих штрюк штук штук штукатурк штук штук штук штучк штучк штучк штык штык штык шуб шуб шубенк шуберт шубк шубк шубк шуб шуб шулер шулер шулер шум шум шума шум шумел шумет шум шумилов шумиловск шум шумн шумн шумн шумн шумн шумн шумн шум шум шум шумя шурин шурш шурш шут шут шут шут шут шут шут шут шут шут шутк шутк шутк шутк шутк шутк шутлив шутник шутовск шутовск шуточк шуточн шут шут шут шуч шушукан шхун шьет шьют щад щад щад щад щад ще щебет щебечут щеголеват щеголих щегольск щегольск щегольск щегольск щегольск щегольск щегольск щегольск щеголя щеголя щедр щедрин щедрост щедрот ще щек щек щек щек щек щек щекота щекота щекотл щекотливеньк щекотлив щекотлив щекотлив щек щел щел щелка щелкан щелкан щелка щелка щелкнул щелкнут щелк щелочк щелочк щелочк щелочк щелчк щел щемя щемя щемя щенк щенк щенк щенок щепоточк щербат щетин щетинист щетин щетк щетк щетк щетк щечк щечк щечк щи щин щипа щипа щиплет щипц щит щит щит щук щупа щупленьк щур ы э э эв эвон эв эг эгоизм эгоизм эгоизм эгоист эгоистк эгоистк эгоист эгофутуризм эдак эдак эдгар эдгар э эйфелев эк эк эк экзам экзаменова экзекутор экземпляр экземпляр экземпляр экзотическ эк эк экипаж экипаж экипаж экипаж экипаж эк эк эк эконом эконом экономическ экономическ экономическ экономическ экономическ эконом эконом экономк экономк экономк экс экспансивн экспансивн экспедиц экстраординарн эксцентрич эксцентрическ эксцентрическ эксцентрическ эксцентрическ эксцентричн элег электрическ электричеств электрон электрон электрон элемент элемент элемент эмал эмансипац эмансипац эмблем эмеритур эмс энерг энерг энергическ энергическ энергическ энергичн энерг энтузиазм энтузиазм энтузиазм энциклопедическ эп эпиграф эпидем эпизод эпизодическ эпизодическ эпизод эпикуреец эпилог эпилог эпилог эпилог эпитаф эпитет эполет эпоп эпох эпох эпох эпох эпох эрар эрланген эрмитаж эр эр эскамил эск эспаньолк эссенц эстет эстетик эстетик эстетик эстетическ эстетическ эстетическ эстрад эстрад эстрад эстрад эт этаж этаж этаж этаж этажерк этажерк этажерк этажерк этажерк этаж этаж этаж этак этак этак этак этак этак этак этак этак этак этак этак этак этак этак эт эт эт эт эт эт эт эт эт этот эт этт эт этюд эфемерн эфемерн эфес эфир эфирн эфир эффект эффект эх эх эшафот эшафот эшафот эшелон ю юбил юбк юбк юбк юбк юбк ювелир ювелирск ювелирск ювена юг юг юдол южн южн южн юл юл юмор юн юнкер юн юн юност юност юност юнош юнош юнош юнош юношеск юношеск юношеск юношествен юношеств юнош юнош юн юн юн юн юпитер юридистик юридическ юридическ юридическ юридическ юридическ юридическ юридическ юридическ юриспруденц юрист юрист юрист юр юркнул юродив юродив юродив юродив юрт юсуп юсупов юсупов юшин ющинск я яблок яблок яблок яблок яблок яблок яблон яблон яв яв яв яв яв яв яв яв яв яв явк явк явк явлен явлен явлен явлен явлен явлен явлен явл явля явля явля явля явля явля явля явля явля явл явн явн явн явн явн явн явствен явствен яв яв ягод ягод яд яд ядовит ядовит ядовит ядовит ядовит ядовит ядовит ядовит ядовит яд ядр ядр яд язв язв язв язв язв язв язв язвительн язвительн язвительн язвительн язвительн язв язык язык язык язык язык язык язык яиц яичк яичниц яичниц яичн яйц яйц яйц яйц як якоб як яков якор якор якор ямайск ямайск ям ямб ямб ямб ям ямк ямк ям ямочк ямск ям ямщик ямщик ям январ январ январ янтарн янтарн янтар ярк ярк ярк ярк ярк ярк ярк ярк ярк ярк ярк яркост яркост ярк ярлык ярлычк ярмарк ярмарк ярмарк ярмарк ярм яров ярок ярославл ярославск ярост яростн яростн ярост ярост ярч яр яс ясенев ясл ясн ясн ясн яснеет ясн ясн ясн ясн ясн ясност ясност ясн ясн ясн ясн ясн ясн яств ястреб ястреб ят яхонтов ячмен ячмен ячмен ячмен яш яш яш яш яшнев яш ящик ящик ящик ящик LucenePlusPlus-rel_3.0.4/src/test/testfiles/russian/testUTF8.txt000066400000000000000000000015031217574114600247350ustar00rootroot00000000000000Вместе с тем о силе электромагнитной энергии имели представление еще, скажем, жрецы Древнего Египта. Но знание это хранилось в тайне, в узком кругу посвященных. Всякий временной виток, принося с собой новые технологии, на самом деле раскрывает потаенное знание прежних веков. Мы уже говорили, что новая информация становится доступной широкому кругу пользователей только в тех случаях, когда сознание общества готово ее воспринять и воспользоваться ею. LucenePlusPlus-rel_3.0.4/src/test/testfiles/russian/wordsUTF8.txt000066400000000000000000032015031217574114600251210ustar00rootroot00000000000000а абиссинию абонемента абонировался абонируйся абонируюсь абрикосы август августа августе авдотье авдотьей авдотьи авдотью авдотья авенантненькая аверка аверкой авиатор авиаторов авиация авось авраама австрии австрийский автобиографию автографом автомобили автомобиль автор авторам авторитет авторитета авторитеты авторов автором ага агадир агамемнон агаповну агафье агафьей агафьи агафью агафья агента агонии агроном агрономии ад ада адам адамант адвокат адвокатом адвокатского адвокатской адвокатскую административный административных администратор адмиралы адом адрес адреса адресный адресовано адресом адресу адрианополя адриатической адская адский ажно азарта азартом азбуку азвольте азиатской азиатчина азии азию азраил азы аи ай айвазовского айда ак академии академию акации акацию акварелью аккомпанемент аккомпанировал аккорд аккорда аккордом аккуратнее аккуратно аккуратного аккуратный акмеизм аксельбантами аксинье аксиома акт акта акте актер актерам актером актеры активной актриса актрисой актрисы акулина акулине акулиной акулину акулины акурат акушером акцентом акцизе акции акций акциями акциях ал алая алгебра алгеброй алгебру алебастра алеко александр александра александринского александрович александровна александровне александровной александровну александровны александром александру алексевна алексеев алексеева алексеевич алексеевича алексеевичем алексееву алексеевым алексеевыми алексеич алексеичем алексей алексея алела алена алене алену алены аленькие алеши алешка алеют алея али аллах аллегории аллее аллеи аллею аллея аллеям алмаз алмаза алмазах алмазы алой алом алоэ алтарь алтарю алтаря алчущим алые алый алым алыми алых аль альбертыч альбом альбомами альмы альтшауер альянов амалией амалии амалию амалия амаль амбар амбара амбарах амбары амбиций амбицию америка американским америке америки америку аметист аминь амням ан анализ анализа анализировал анализирующей анализом анализу аналоем анамнясь ананас ананасы анастасий анатомии анатомию ангажемент ангары ангел ангела ангелины ангелов ангелом ангелочка ангелу ангелы ангельски ангельские ангельской англии английская английски английский английского английском английскому англичан англичанах англичане англичанин англию андрее андреев андреевич андреевна андреевне андреевной андреевну андреевны андреевым андреем андреич андреича андреичем андреичу андрей андрею андрея андрюша андрюшанчик андрюше андрюшей андрюши андрюшка андрюшке андрюшку андрюшу ане аней анекдот анекдоты анечка ани анисье анисьей анисьи аниська анисью анисья анна анне анной анну анны антверпен антип антипа антипка антипке антипу антон антонов антракт антракте анфилада анфиса анфисе анчоусы аню аня апатии апатичен апатически апатический апатическим апатию апатия апелляции апельсинничаешь апельсинов аплодирует аплодируя аполлона апоплексический апоплексию аппетита аппетитно аппетитом апраксина апреле апрель апрельских апреля аптека аптекарь аптеке аптеки аптеку аптечного апухтинские ар аравийской арапка арапку арбуз арбузы аргументом арена арендатором аренду аренды арене арену арены арест арестант арестанта арестантов арестантских арестантской арестанту арестовать арестовывают арестуйте арестуют аресты арий аринушка аристократическим аристократическими аристократических аристократического аристократическом аристократическою арифметика арифметике арифметику ариша арию аркадий аркадина аркадиной аркадию аркадия аркой аркою армейский армией армии армию армиями армяк армяками армяке армячишка аромата аромате ароматном ароматные ароматным ароматом ароматы артели артельных артельщик артельщика артельщиком артельщику артемий артемью артист артиста артистически артистической артистка артистке артисткой артистов артисты артуром арфа арфам арфе арфы архангел архангельск археолог архив архива архимедова архитектор архитектором архитектору архитектура аршин аршина аршине асессором асессорше аскет ассигнацию ассигнациями ассоциациях астма астрахани астраханские астров астрова астрову астровым астрономии ась атакует атласам атласная атласном атласным атласными атмосфере атмосферы атомах атомов атрибуты аттестат аттестате ау аудитории аукцион аукциона ауле аус афанасий афанасию афанасия афанасьев афанасьевич афанасьевича афанасьевичу афиша африке африки африку афросиньюшка афросиньюшки аффектацией ах ахали аханья ахают ахиллес ахиллесовской ахиллесу ахиллов ахматовой ахнул ахнула ахнули ахнуть ацтеки аэроплан аэроплана б ба баб баба бабенка бабками бабой бабок бабочка бабочки бабочку бабу бабушек бабушка бабушке бабушки бабушкин бабушкина бабушкиной бабушкой бабушку бабы бабье бабьей бабьи бабьим бабья багаж багрово багровой багровому багровые багровым багряный багряным бадья бает базар базарах базаре базилик баиньки бай байро байрон байрона бакалеева бакалееву бакенами бакенбард бакенбарда бакенбардами бакенбарду бакенбарды бал бала балаган балаганы балалайка балалайками балами балах бале балерины балета балете балканах балканы балке балкон балкона балконами балконах балконе балконом баллада баллотироваться баловал балованых баловать баловень баловника баловню баловство балочов балу балуй балующий бальзак бальтазару банальной бане бани банк банках банке банки банкирскую банкирша банков банковский банку баночку барабан барабанил барабанит барабанить бараков баран барана бараний баранина бараниной баранину баранины бараном бараньим барашками барашком барже барин барина барине барином барину барканом барках барки бармгерциге барон барона бароне бароном барону бароны барская барские барский барских барского барское барской барском барскую барство бартола бархат бархата бархате бархатную бархатные бархатный бархатных бархату бархатцами барчонка барчонком барчонок барщина барщину бары барыни барынин барыню барыня барыш барыше барышень барышне барышней барышни барышниной барышню барышня барышнями барышом бас баск баском басманной басни баснословных басом баста басурманов батальонных батальоны батареи батарейный батарейным батарею батарея батистовый батюшка батюшке батюшки батюшков батюшкова батюшкой бах бахрома бахроме бахромой бахрому бахромы бахуса бац башен башка башлыком башмаках башмаки башмачки башмачонки башне башни баюкает бдение бдительного беато беатриче бег бега бегавшая бегавшими бегает бегаешь бегай бегал бегала бегали бегать бегах бегаю бегают бегающих бегая беги бегите бегло беглый беглым беглых бегом беготне беготней беготни беготня бегство бегу бегуны бегут бегущего бегущий бегущую бед беда бедам беде беден бедная беднее беднейшее беднейшей беднейшие беднейшую беднел бедненького бедненькому бедно бедного бедное бедной бедном бедному бедности бедность бедностью бедною бедную бедны бедные бедный бедным бедных бедняжка бедняжки бедняжку бедняк бедняки бедовали бедой бедр бедрам бедро бедственное бедствий бедствия бедствиями бедствует беду беды бежавших бежал бежала бежало бежать бежим бежит бежишь без безбожная безбожник безбожника безбожникам безбожно безбожные безбольно безбородкин безбрежной безверие безвестные безвестный безвкусица безвкусно безвозвратно безвольно безвреден безвыездно безвыходная безвыходное безграмотно безграмотным безграничная безграничнейший безгранично безграничного безграничной безграничном безгрешным бездарно бездарности бездарность бездарных бездействии бездействию бездействия безделицу безделицы безделку бездн бездна бездне бездной бездну бездны бездомна бездомней бездомным бездонна бездонной бездонностью бездонную бездонным бездонных бездорожье бездорожью бездушном бездыханна бездыханном бездыханность безжалостно безжалостный беззаботен беззаботная беззаботно беззаботной беззаботность беззаботный беззаботным беззаботными беззаветно беззаветный беззакатного беззакатный беззащитная беззащитную беззащитные беззвездной беззвездный безземельная беззлобен беззубый беззубым безличное безличности безличный безлюдном безлюдность безлюдьи безмерно безмерными безмозглая безмолвен безмолвного безмолвное безмолвны безмолвные безмолвный безмолвным безмолвных безмолвствует безмужних безмятежно безмятежной безмятежность безмятежный безнадежная безнадежней безнадежно безнадежного безнадежной безнадежности безнадежность безнадежный безнаказанно безначальной безначальный безносым безнравственная безнравственно безнравственны безнравственный безнравственным безо безобидно безобидного безоблачного безоблачность безоблачный безобразен безобразие безобразию безобразия безобразна безобразная безобразнее безобразнейший безобразнейшим безобразник безобразничал безобразничать безобразно безобразного безобразное безобразной безобразном безобразною безобразную безобразные безобразный безобразным безобразными безобразных безобразью безопасным безответная безответного безответный безотрадно безотрадной безотрадным безотчетно безотчетной безотчетности безотчетным безошибочно безошибочною безполезны безрадостной безрадостность безразличный безрассудно безрассудное безрассудным безропотно безукоризненна безукоризненно безукоризненное безумен безумие безумна безумная безумней безумно безумного безумное безумной безумном безумную безумный безумным безумными безумных безумствовать безумствует безумца безумцы безумья безупречная безусловно безутешная безучастия безучастно безучастным безызвестно безыменные безысходна безысходней безысходность безысходные безысходным бей бейся бейте бекетов бекешах бел бела белая белее белеет белели беленькая беленькие беленький беленьким беленького белесым белея белизна белизной белизны белильню белинского белится беллетрист беллетристов беллетристом беллетристу белобрысые белобрысыми беловатой беловодовым белого белое белой белокурая белокуренькая белокуренькое белокурою белокурые белокурый белокурыми белом белому белоручка белоснежная белоснежной белою белую белые белый белым белыми белых бельведерского бельгию белье бельем бельмеса белья бензол берг бергом бердичеве берег берега береги берегись берегите берегитесь берегла береглась берегли берегов берегся берегу бередило бережет бережете бережешь бережно березками березку березняк березняка березовая березовую березы берем беременна берет берете берется беречь беречься берешь бери берите берлин берлога берлоге беру берусь берут берутся беря бес бесед беседах беседе беседка беседке беседки беседку беседовавшим беседовал беседовала беседой беседу беседует беседуют бесенок бесился бесится бесконечна бесконечная бесконечно бесконечного бесконечное бесконечной бесконечном бесконечному бесконечности бесконечность бесконечною бесконечную бесконечные бесконечный бесконечным бесконечных бескорыстно бескорыстное бескрайною бескровно бескровных бесноватыми беспамятная беспамятный беспамятстве беспамятство бесперечь беспечальное беспечнее беспечно беспечное беспечности беспечность беспечный беспечными бесплодная бесплодного бесплодные бесплодных бесповоротно бесподобно беспокоен беспокоил беспокоила беспокоилась беспокоили беспокоило беспокоился беспокоим беспокоит беспокоитесь беспокоить беспокоиться беспокоишься беспокой беспокойная беспокойнее беспокойно беспокойного беспокойное беспокойной беспокойны беспокойный беспокойным беспокойных беспокойства беспокойстве беспокойство беспокойством беспокойся беспокойте беспокойтесь беспокою беспокоюсь беспокоят бесполезная бесполезно бесполезное бесполезной бесполезности бесполезную бесполезны бесполезный бесполезных бесполый беспомощен беспомощно беспомощное беспомощном беспомощности беспомощность беспорядка беспорядках беспорядке беспорядков беспорядок беспорядочная беспорядочно беспорядочному беспорядочную беспошлинный беспощадно беспощадной беспредельная беспредельно беспредельной беспредельном беспредельную беспредметная беспрекословно беспременно беспрерывная беспрерывно беспрерывного беспрерывной беспрерывную беспрерывные беспрерывный беспрерывными беспрерывных беспрестанно беспримерно беспристрастно бесприютным беспутной бессвязные бессвязных бессердечие бессилен бессилие бессилии бессилия бессильно бессильного бессильное бессильною бессильны бессильный бессильных бессилья бесследно бессловесный бессловного бессменно бессмертной бессмертную бессмертный бессмертья бессмысленная бессмысленно бессмысленной бессмысленность бессмысленные бессмысленный бессмысленным бессмысленными бессмыслица бессмыслице бессмыслицей бессмыслицу бесснежная бессовестно бессознательная бессознательно бессознательной бессознательном бессознательным бессоницы бессонная бессонницы бессонной бессонные бессонный бессонных бесспорно бесспорное бесспорной бесстрастен бесстрастная бесстрастно бесстрастной бесстрашная бесстрашней бесстыдник бесстыдники бесстыдно бесстыдные бесстыдным бессчетно бестактно бестия бестолковую бестолковые бестолковых бестужеву бесхарактерности бесцветен бесцветно бесцветной бесцветны бесцветных бесцельная бесцельно бесцельный бесцельных бесценное бесценный бесцеремонно бесчеловечия бесчеловечно бесчестие бесчестит бесчестная бесчестном бесчестные бесчестным бесчестье бесчестья бесчинства бесчисленно бесчисленного бесчисленных бесчувствен бесчувственна бесчувственная бесчувственного бесчувственному бесчувственный бесчувственным бесчувствия бесшумно бетховен бешеная бешеного бешеной бешеном бешеною бешенства бешенстве бешенство бешенством бешеные бешеным библейские библиотек библиотека библиотекарем библиотеке библию библия бивал бивали бившего бивший биение биением биению биения бил била билась билет билета билетами билетах билетик билетика билетиками билетов билетом билету билеты били биллиард биллиарда биллиарде било билось бился биль бильярд бильярде бильярдной бильярдную бильярдный бинокле бинокль биография бирже биржи бис бисер бисером бисквитов бисквиты битв битве битву битвы битком битое биты битый бить битье биться бифштекс бифштекса бицепсы бич бичами бичурину бишь бла благ блага благим благих благо благовещение благовиднее благоволите благовония благовонной благовоспитанным благовоспитанных благоговеете благоговейно благоговейного благоговейным благоговела благоговение благоговением благоговеть благоговею благодарен благодари благодарил благодарила благодарили благодарим благодарить благодарна благодарная благодарно благодарном благодарности благодарностию благодарность благодарностью благодарю благодаря благодати благодать благодетелей благодетелем благодетели благодетель благодетельной благодетельные благодетельствовал благодетельствовать благодетельствует благодетеля благодеяние благодеяний благодеяния благодушном благолепие благонадежный благонамереннейший благонамеренного благонравной благонравны благополучно благоприобретенные благоприобретенным благоприятного благоприятное благоприятный благоприятствовавшие благоразумие благоразумия благоразумная благоразумнейшей благоразумно благоразумных благороден благородие благородная благороднее благороднейшая благороднейший благородно благородного благородное благородной благородном благородною благородны благородные благородный благородным благородных благородства благородстве благородство благородством благосклонно благосклонной благосклонный благословением благословениями благословенно благословенной благословенном благословенный благослови благословил благословила благословили благословит благословить благословишь благословлю благословляет благословляла благословлять благословляю благословляя благословясь благословят благосостояние благосостояния благости благость благотворители благотворительности благотворительность благотворны благотворный благоугодным благоусмотрение благоуханное благоуханный благоуханных благоуханье благоухающая благоухающею благочестиво блаженно блаженное блаженной блаженные блаженный блаженным блаженных блаженства блаженстве блаженство блаженствовал блаженствовали блаженствовать блаженством блаженствую блаженствуют блажной блажь блажью бланбеками бланбеки бледен бледна бледная бледневший бледнее бледнеет бледнела бледненькое бледнеть бледнеют бледнея бледно бледного бледное бледной бледном бледность бледночернильным бледную бледны бледные бледный бледным бледными бледных блеклыми блеск блеска блеске блеском блеснет блесну блеснувшим блеснул блеснула блеснули блеснуло блеснуть блестели блестело блестит блестками блестят блестящей блестящем блестящему блестящею блестящие блестящий блестящим блестящих блестящую блещет блещешь блещут блещущей блещущий блещущим блеющие ближайшее ближайшем ближайшие ближайший ближайшую ближе ближней ближний ближним близ близится близка близки близкие близким близкими близких близко близкого близкое близкой близком близкую близлежащие близок близорука близость близь блин блинах блинов блины блиставшего блиставшее блистал блистали блистало блистанье блистательной блистательным блистательных блистать блистают блистающий блистающими бло блок блонд блондах блондин блондинка блондинки блондины блонды блохи блудницу блуждает блуждаете блуждал блуждали блуждающий блуждая блузе блузу блю блюд блюда блюдами блюде блюдечками блюдечке блюдечко блюдо блюдом блюдце блюсти блюстителя бо бобах бобик бобика бобику бобов бобровых бобы бог бога богат богата богатая богатеет богатеют богато богатого богатой богатом богатому богатства богатство богатую богаты богатые богатый богатым богатыре богатырь богатырях богатых богач богаче богачи богачом богданович богданыч боге богема богемии богемскую боги богиня богом богомолье богомольно богомольной богомольные богородица богородицу богохульник богу бодр бодрая бодрился бодро бодрое бодрой бодром бодрости бодрость бодростью бодрствовал бодрствовали бодрствую бодрый бодрым бодрых бодягой боевой боем боец боже божества божественно божественной божественным божественных божество божие божией божиим божий божилась божится божию божия божьего божьей божьим божья боитесь боится боишься бой бойкая бойкие бойкий бойким бойко бойкого бойкое бойни бойся бойтесь бок бока бокал бокала бокалах бокале бокалу бокам боками боки бокля боков бокового боковой боковом боковую боком боку болван болваном болвану боле более болеет болезная болезней болезненная болезненно болезненного болезненное болезненной болезненном болезненность болезненную болезненные болезненный болезненным болезненных болезни болезнию болезнь болезнью болезнями болезнях болел болела болели болело болен болеть болеют боли болит болосы болот болота болотистым болотной болотные болотный болото болотом болтает болтается болтай болтал болталась болтали болтались болтался болтать болтаю болтают болтаются болтаясь болтлив болтлива болтливая болтливы болтовней болтовни болтовню болтовня болтун болтунишки болтушка боль больна больная больнее больней больниц больница больнице больницу больницы больно больного больное больной больном больному больною больную больны больные больным больными больных большая больше большего большеголовый большее большей большем большею большие большим большими большинстве большинство большинством больших большого большое большой большом большому большою большую болью болят бомбами бонжур бонне бор бореля борется борис бориса борисович борису бормотал бормотала бормотали бормотание бормотаний бормочет бормочешь боров борода бороде бородкой бородой бороду бороды бороздою боролась боролись боролось боролся бороться борт борта бортов борты борцов борьба борьбах борьбе борьбой борьбу борьбы борются борясь босая босенькая босиком боскете босой босоногих бостон босу босые ботаник ботвинью ботвинья ботинками ботинках ботинки ботиночки бочка бочки бочкой бочком бочонка бочонок бою боюсь боя боявшиеся боявшийся боязливее боязливо боязливый боязни боязнь боязнью боялась боялись боялся боясь боятся бояться браво бравое брак брака браке браки браком браку брал брала бралась брали брался бранда брани бранил бранила бранили бранились бранит браните бранится бранить браниться бранчив брань бранью бранюсь бранясь бранят браслет браслете браслетом браслеты брат брата брате братец братией братишка братом братской братства брату братца братце братцем братцу братцы брать братьев братьи браться братья братьям брачной брачные бревна брег бред бреда бреде бредил бредила бредит бредите бредить бредишь бредом бреду бредят бреется брежжит брежу брезгает брезгайте брезгливости брезговал брезжит брел брелок брелоков бременем бремени бремя бренно бренный бренчало бренчу бретер бригаде бригадой бригаду бриллиант бриллианта бриллиантовый брился брит бритая бритва бритвы бритой брить бриться бровей брови бровь бровью бровями бровях бродивших бродил бродила бродили бродит бродить бродишь бродяга бродяги бродягу бродяжничай бродяжничества бродят бродячие бронза бронзовую бросает бросаете бросаетесь бросается бросал бросала бросалась бросались бросало бросался бросать бросаю бросают бросаются бросающееся бросающую бросая бросаясь бросив бросившись бросил бросила бросилась бросили бросились бросило бросилось бросился бросим бросит бросится бросить броситься бросишь брось бросьте бросят бросятся брошен брошена брошенного брошенное брошенном брошенный брошено брошены брошка брошкой брошу брошусь брошь брошюр брошюру брошюры бррр брудершафт брусничной брут брызг брызгает брызги брызжут брызнув брызнул брызнула брызнули брызнут брюжжит брюзгами брюзгливая брюзгливо брюзгливою брюзгливые брюзжащие брюзжит брюках брюквой брюки брюнет брюнеты брюсову брюхе брюхо брюшком брякает брякну брякнул брякнула брякнуть бряцает бубен бубенчик бубенчики бубенчуками бубновый бубны бугорках бугорки буде будем будемте будет будете будешь буди будил будила будили будило будируя будит будить буднее будней будни будничное будничной будничном будничные будничными будничных будням буднях будок будоражишь будочник будочника будто буду будуар будуаров будут будучи будущая будущего будущее будущей будущем будущему будущие будущий будущим будущих будущности будущность будущую будь будьте будя будят буеракам бузиной буйная буйно буйного буйной буйном буйному буйный буйным буйных буйственных буйство буйством букашками буква буквально буквальное буквами буквой букву буквы букет букета букетами букетом букеты буки букинистами булавки булавку булавой булка булки булку булок булочек булочками булочник булочную бултых буль бульвар бульвара бульваре бульвару бульдоге бульон бумаг бумага бумагам бумагами бумагах бумаге бумаги бумагой бумагу бумажечки бумажка бумажками бумажке бумажки бумажку бумажник бумажнике бумажной бумажный бумажным бумбия бунтовать бунтующее бунчуки буран бурду буре бурей буржуазного бури буркалы бурмейстер бурно бурного бурное бурною бурнус бурнусик бурнусике бурный бурными бурных бурого бурой бурсак бурш бурша бурый бурь бурьян бурьяне бурьяном бурю буря бурями бурях бутончик бутошник бутылка бутылками бутылки бутылкой бутылкою бутылку бутылок бутылочки бутылочку бутылочным буфера буфет буфета буфете буфету буфетчик буфетчика буфеты буффон буха бухте бушевало бушует бушующих буян буянил буянила бы быв бываем бывает бываете бываешь бывал бывала бывали бывало бывалое бывалую бывалые бывалый бывалых бывать бываю бывают бывшая бывшего бывшее бывшей бывшем бывшему бывши бывшие бывший бывшим бывшими бывших бывшую был была былая были былинки было былое былой былые былым былых быльем быстрая быстрее быстрей быстрины быстро быстрого быстрой быстроте быстротой быстротою быстроту быстрые быстрый быстрым быт быта быте бытие бытийственных бытия бытность быту быть бытьи бытья бьемся бьет бьется бьешься бьюсь бьют бьются бьющаяся бьющимся бюджета бюджету бюргера бюргеров бюргером бюргерские бюргерскими бюро бюст бюста в вавиловка вагнера вагон вагона вагоне вагонов вагоном вагоны важная важнее важнейшие важнейшими важничал важно важного важное важной важном важному важности важностию важность важностью важную важны важные важный важным важных вазах вазы вакса вакханка вал валандался валентина валериановых валерию валетами вали валил валился валится валов вальдшнепа вальс вальса вальсе вальсишку вальтера валяется валялась валялись валялось валялся валять валяются вам вами вампир вампирственного ване ваней ванечки ванечкину ванечку вани ванили ваниль ванилью ванну ванька ваню ванюша ванюше ванюши ванюшу ваня варвара варваром варвару варе варений вареники вареников вареного вареной вареную варенц варенье варенью варенья вареньями вари вариаций варили варились варин варит варить варшава варшаве варшавой варшавский варшаву варьяции варьяциях варю варя варяга варят вас василий василиса василисе васильев васильева васильевич васильевна васильевну васильевны васильевский васильевского васильевском васильевскому васильевым васильем васильич васильичем васька ваське васьки ватага ватерлоо ватным ватой ваточной ваточном ватрушки ватрушку вату ваты вафля вахрушин вахрушина ваш ваша ваше вашего вашей вашем вашему вашею ваши вашим вашими ваших вашу вбегает вбежала вбежит вбивали вбил вблизи вбок ввела ввели ввергался ввергающий вверенный вверить ввернет вверну ввернул ввернуть вверх вверху вверяет ввести ввиду вводила вводить ввожу ввозят ввысь ввязался вглубь вглядевшись вгляделись вглядись вглядывается вглядывалась вглядывался вглядываться вглядываясь вдаваться вдавленного вдалась вдалеке вдали вдаль вдвое вдвоем вдвойне вдеваешь вдесятеро вдобавок вдова вдове вдовеет вдовец вдовий вдовиц вдовица вдовой вдову вдовы вдогонку вдоль вдохновение вдохновением вдохновении вдохновению вдохновенно вдохновенное вдохновенный вдохновенными вдохновенных вдохновенье вдохновеньем вдохнул вдохнуть вдребезги вдруг вдругорядь вдумайтесь вдуматься вдумчиво вдумчивое вдумывалась вдумывался вдумываясь ве ведаем ведает ведаете ведаешь ведай ведал ведать ведаю ведая ведению ведения веденной ведет ведешь веди ведите ведома ведомо ведомости ведомостях ведомству ведрами ведре ведро ведром веду ведут ведущая ведущей ведущие ведущим ведь ведьма ведьмой ведьмы ведя веером веет вежды вежлив вежливо вежливой вежливости вежливость вежливый вежливым везде везет везешь везла везти везу везувии везущих век века векам веками веках веке веки веков вековечная вековечные вековой вековою вековые векселей векселек вексель векселя веку вел вела велел велела велели велению велено велеть вели велик велика великана великанами великанов великаны великая велики великие великий великим великих великого великодушен великодушия великодушная великодушнее великодушнейший великодушно великодушного великодушны великодушные великодушный великодушных великое великой великолепий великолепна великолепная великолепнее великолепнейшие великолепние великолепно великолепного великолепное великолепной великолепную великолепные великолепный великолепных великом великому великонек великую велит велите величава величавого величавое величавой величайшая величайшего величайшее величайшей величайшем величайшему величайший величайшим величал величают величественная величественно величество величие величий величина величине величиной величины величье велишь велосипеды велось вельможа вельможе вельможей велю велят венгерский вене венерических венец венецейской венецианская венецианской венеция веник веника веничек веничком венка венками венке венки венком венок венца венце венцу венчаемся венчал венчались венчался венчальный венчаться венчают вены вер вера верблюды вербы вере веревках веревке веревки веревкой веревкою веревочку веревочное верен вереница вереницей вереницу веретена верил верила верили верит верите верится верить веришь верна верная вернее верней вернем вернет вернется вернешь вернешься верни вернись вернитесь верно верного верное верной верному верности верность верностью верною вернув вернувшись вернулась вернулось вернулся вернусь вернутся вернуть вернуться верную верны верные верный верным верных веровавший веровал веровала верований веровать верой вероломна вероломство вероломством верою вероятнее вероятно вероятности вероятность вероятны версия верст верстах версте верстовых версту версты вертел вертелся вертеп вертеры вертеть верти вертись вертит вертится вертишься вертлявая вертун вертя вертясь вертят веру верует веруете веруется веруешь веруй верую верующей верующие верующий верующим веруя верфям верх верхлева верхлеве верхлево верхлевский верхлевского верхнего верхнее верхней верхнем верхнею верхние верхний верхнюю верхняя верховий верховой верховскому верхом верху верхушки верхушку вершать вершина вершинах вершинин вершинина вершинину вершинины вершининым вершининых вершины вершков вершок веры верь верьте верю веря верят вес весах весел весела веселая веселее веселей веселея веселила веселимся веселиться весело веселого веселое веселой веселости веселость веселы веселые веселый веселым веселых веселье весельем весельчаком веселья весеннею весенние весенний весенняя весит весла весле весло весна весне весной весну весны весталку вести вестник весты весть вестью вестями весу весы весь весьма ветвей ветви ветвью ветер ветерка ветерком ветерок ветка ветках ветке ветки веткой ветку ветлы веток ветр ветра ветреная ветреность ветреный ветровые ветром ветру ветха ветхая ветхий ветхим ветхих ветхом ветчиной ветчины вечер вечера вечерам вечерах вечере вечереет вечерело вечерком вечерне вечернего вечернее вечерней вечернем вечерние вечерний вечерних вечеров вечерок вечером вечеру вечная вечно вечное вечной вечном вечному вечности вечность вечную вечные вечный вечным вечными вечных вешает вешал вешались вешаться вешаются веше веши вешней вешнею вешний вешняя вещам вещами вещания вещах вещей веществ вещественные вещи вещим вещих вещица вещицы вещь вещью веял веяло веянье веяньем взад взаимно взаимного взаимное взаимной взаимном взаимности взаимность взаимностью взаимною взаимные взаимными взаимных взаймы взамен взаперти взаправду взапуски взбегал взбежав взбежать взбесившийся взбесился взбесит взбесятся взбешенный взбирались взбираться взбираясь взбитые взбитыми взбороздив взбунтоваться взбунтуется взведет взведете взвел взвела взвесил взвесила взвешивает взвешивал взвешивая взвивается взвизги взвизгнул взвизгнула взвизгов взвизгом взвилась взвился взвод взводимых взводом взволновал взволновалась взволновало взволновался взволнован взволнована взволнованная взволнованно взволнованное взволнованный взволнованных взволнованы взволновать взволнует взвыл взгляд взгляда взглядами взгляде взглядов взглядом взгляду взгляды взглядывает взглядывал взглядывала взглядывали взглядываниями взглядывают взглядывая взглянет взглянешь взгляни взгляните взгляну взглянув взглянул взглянула взглянули взглянуть вздернет вздернута вздор вздоре вздорная вздорные вздорными вздорожают вздором вздору вздох вздохами вздохи вздохнет вздохни вздохну вздохнув вздохнул вздохнула вздохнули вздохнуть вздохом вздрагивавшие вздрагивает вздрагивал вздрагивала вздрагивали вздрагиваньям вздрагивают вздрагивающей вздрагивающими вздрагивая вздремнет вздремнул вздремнула вздрогнет вздрогнув вздрогнул вздрогнула вздрогнули вздрогнут вздули вздулись вздумавший вздумает вздумается вздумал вздумала вздумали вздумалось вздымается вздымаются вздыхает вздыхал вздыхала вздыхатель вздыхать вздыхают вздыхающем вздыхая взирая взламывать взлет взлетающего взлетел взлетит взлечу взломом взлохмачены взмахнет взмахни взмахнув взмахнул взмахом взметутся взнос взобраться взобьет взовьется взойдет взойду взойдя взойти взор взора взоре взоров взором взору взоры взошел взошла взошло взрослые взрослый взрослыми взрослых взрыв взрыва взрывов взрывом взрывы взъерошенный взывает взывал взывала взывают взывая взыграет взыскание взыскании взысканий взысканию взыскания взысканиях взыскательна взыскательнее взыскательному взыскать взыскивать взыскиваю взыщите взяв взявши взял взяла взялась взяли взялись взяло взялось взялся взятки взяток взяточник взяточника взяточников взяты взятые взятых взять взяться вив вид вида видавший видал видала видалась видали видались видано видать видаться видах виде видевший видевших видел видела видели виделись видело виделся видемши виден видена видение видением видений видения видениям видениями виденною виденный виденных виденье виденья видеть видеться видим видимая видимо видимое видимой видимом видимому видимою видимся видимый видимым видимых видит видите видится видишь видишься видна видная виднелась виднелись виднелось видно видного видною видны видов видоизменения видоизменяющаяся видом виду виды видывал видывали видя видят видятся видящий вижу вижусь визави визг визгливо визгливый визгом визжала визжаль визжат визжать визжит визит визита визитами визите визитик визитная визитную визитный визитных визитов визитом визиты вилами виланда вилась вилка вилки вилкой вилку виллу вилой вильне виляй виляя вин вина вине вини винили вините винительный винить винишь винные винным вино виноват виновата виноватая виноватее виновато виноватого виноватою виноваты виноватым виновен виновника виновником виновница виновным виновных виноград винограда виноградные виноградом вином винт винта винтами винтовой винтообразной винтообразный винты вину винца винчи вины виню винюсь виртуоз виртуозом висевшие висел висели висело висеть виси висит вискам висках виски виску висла вислой вислу виснет висок висячую висящие висящий висящих витиеватой витиеватым витое витринах вифании вифлеема вихор вихрастый вихре вихревое вихрем вихри вихров вихры вихрь вице вицмундир вицмундира вицмундире вишен вишеньем вишере вишневого вишневому вишневые вишневый вишни вишню вишня вишь виясь вкладывались включая вколачивали вколотить вконец вкопанная вкопанный вкось вкрадчиво вкрадывается вкралась вкралось вкривь вкруг вкус вкуса вкусе вкусил вкусит вкусная вкусно вкусное вкусный вкусом вкусу вкушать вл влагает влагала влаги влагой владевшие владеет владел владела владелец владельцу владетельного владеть владею владея владимир владимира владимирка владимирке владимирович владимиру владычества владычествовал владычествовать владычица владычицу влажна влажны влажные влажный властвовала властвовать властвует властей властелин властен власти властительно властному властный власть влачат влево влез влезал влезая влезет влезешь влезли влезть влезут влекли влекло влекома влекущая влекущий влетел влетела влетели влетит влечение влечению влечения влеченье влечет влечешь вливается вливалась вливают влил влиявшие влияло влияние влиянием влияний влиянию влияния вложением вложил вложили вложить влопался влюбилась влюбился влюбитесь влюбиться влюблен влюблена влюбленно влюбленного влюбленное влюбленной влюбленности влюбленность влюбленные влюбленный влюбленным влюбленными влюблены влюблюсь влюбляется влюблялись влюбляться влюбляются влюбляясь вместе вместо вмешалась вмешался вмещал внаем внакидку внакладочку вначале вне внезапная внезапно внезапного внезапное внезапной внезапном внезапному внезапною внезапную внезапные внезапный внезапным внезапными внезапных внемлю внемлют внес внесенные внеси внесла внесли внести внесу внеся внешне внешней внешний внешним внешних внешнюю внешняя вниз внизу вникаешь вникал вникать вникая вникните вникнул вникнуть внимай внимал внимание вниманием внимании внимания вниманье вниманью вниманья внимателен внимательнее внимательно внимательные внимательным внимать внимаю внимая внове вновь вносит вносите вносить вносишь внося внука внуки внутренне внутреннего внутреннее внутренней внутреннею внутренние внутренний внутренним внутренних внутренно внутренности внутреннюю внутренняя внутри внутрь внучата внучка внушает внушал внушало внушают внушению внушения внушенная внушенным внушив внушил внушите внушительнее внушительно внушительный внушительным внушить внятен внятно внятной внятным во вобла вовек вовлечен вовремя вовсе вогнать вод вода водворения водворилась водворилось водворить водворялся водворят воде водевиля водил водила водили водились водилось водит водится водить водицы водка водки водкой водку водной водовоз водоворот водоеме водой водопад водопады водочки водочку водою воду воды водяная водянистый водянистым водяного водяной водяную водяные водят воедино военного военное военной военною военную военные военный военным военных воет вожак вожди вождь вожжи воз воза возалкал возами возблагодарили возблагодарю возбудил возбудила возбудило возбудимы возбудить возбудят возбуждает возбуждаете возбуждал возбуждались возбуждало возбуждать возбуждая возбуждении возбуждения возбужденно возбужденное возбужденном возбужденный возведен возведении возвел возвела возвестили возвещено возврат возврата возвратившихся возвратила возвратилась возвратился возвратите возвратится возвратить возвратиться возвратишь возвратного возвратный возвратясь возвращавшийся возвращается возвращаешься возвращал возвращалась возвращались возвращалось возвращался возвращать возвращаться возвращаюсь возвращаются возвращающегося возвращаясь возвращение возвращении возвращения возвращенья возвращу возвысив возвысился возвысить возвышались возвышалось возвышался возвышать возвышаться возвышая возвышаясь возвышенная возвышенный возглаголет возглаголят возглас возгласы воздвиглась воздвигнутых воздел возделывал воздержание воздержнее воздусех воздух воздуха воздухе воздухом воздуху воздушном воздушные воздушный воздушных воздыханий возжаждешь воззвал воззвание воззрение воззрения воззреньях возила возили возились возился возить возиться возишься возле возлелеянную возложил возлюби возлюбила возлюбишь возлюбленного возлюбленной возлюбленные возлюбленный возлюблял возмездие возмездия возможна возможно возможности возможность возможную возможным возможных возмутившее возмутился возмутительно возмутителям возмутить возмутишь возмущает возмущающих возмущаясь возмущен возмущено вознаградить вознаграждать вознаграждая вознаграждение вознаграждением вознамерился возненавидел возненавидела возненавидит возненавидишь возненавижу вознесенском вознесены вознесу возни возник возникает возникал возникала возникать возникающей возникла возникло возникнет возникновения возникшего возносилась возносится вознося возню возня возобновились возобновить возобновляет возобновлялась возов возом возопил возражает возражал возражений возражения возразил возразила возразить возраст возраста возраставшего возраставшее возраставшим возрастало возрастающей возрастая возрасте возрастом возрасты возродить возрождалась возрождение возрождению возросла возросло возу возымел возыметь возьмем возьмет возьмете возьметесь возьмется возьмешь возьми возьмите возьму возьмут возьмутся возят воин воина воину воины воистину вой войдем войдет войдешь войди войдите войду войдут войдя войлок войлоками войлоке войн война войнами войне войницкая войницкии войницкий войницким войницкого войницкому войну войны войска войско войском войти вокзал вокзале вокзалу вокруг вол волге волгу воле волей волен волею воли волк волки волков волн волна волнами волнах волнение волнением волнении волнений волнения волнениями волненье волненьем волненья волновала волновалась волновали волновались волновало волновался волновать волноваться волной волною волну волнуем волнует волнуете волнуетесь волнуется волнуешь волнуешься волнуйся волнуйтесь волнуюсь волнуют волнуются волнующей волнующейся волнующуюся волнуясь волны воло волокита волокиты волоком волом волос волоса волосам волосами волосатые волосах волосенки волосики волоска волоски волости волосы волочит волчком волчок волчьим волшебная волшебника волшебниц волшебница волшебницы волшебного волшебной волшебные волшебств воль вольна вольно вольнодумные вольнодумцам вольное вольной вольном вольному вольною вольные вольный вольным вольтер волю воля вон вона вонзает вонзай вонзал вонзила вонзился вонзить вони вонь вонью вонючего вонючих воображает воображаешь воображал воображала воображали воображать воображаю воображают воображая воображение воображением воображении воображению воображения воображениями воображениях вообрази вообразил вообразила вообразили вообразилось вообразился вообразись вообразит вообразите вообразится вообразить вообще вооружался вооружаться вооружаясь вооружений вооруженная вооруженный вооружились вооружиться вопил вопила вопиющее вопиющим вопле воплей воплем вопли воплотился воплотить воплощался воплощение воплощенного вопль вопля воплям воплями вопреки вопрос вопроса вопросам вопросами вопросах вопросе вопросик вопросительно вопросительного вопросительные вопросительный вопросов вопросом вопросу вопросы вопрошаю вопрошающей вопрошающий вопрошающих вопьется вор вора ворвавшись ворвалась ворвался ворвется воробей воробушек воробушка воров вороваль воровать воровка воровки воровских ворожим вором ворон вороная воронку вороны воронье воронья ворот ворота воротам воротами воротах вороти воротившийся воротившись воротила воротилась воротились воротилось воротился воротись воротитесь воротится воротить воротиться воротишь воротишься воротник воротников воротником воротничках воротнички воротничок воротясь воротятся ворохом ворочает ворочается ворочаешься ворочал ворочала ворочались ворочало ворочалось ворочался ворочать ворочаться ворочающими ворочаясь ворочу ворочусь ворует воруете ворующего ворча ворчавшую ворчал ворчала ворчанье ворчаньем ворчать ворчит ворчишь ворчливые ворчу воры восемнадцати восемнадцать восемь восемьдесят восемьсот воск воскликнул воскликнула восклицал восклицала восклицание восклицанием восклицании восклицанию восклицания восклицаниями восклицанья восклицая восклоняясь воскрес воскресать воскресение воскресении воскресения воскресенье воскресенью воскресенья воскресеньям воскресил воскресила воскресить воскресла воскресло воскреснет воскреснув воскресную воскресный воскресных воскресший воскресят воскрешает воскрешало вослед воспаление воспаления воспаленно воспаленного воспаленный воспаленным воспаленных воспитаешь воспитался воспитан воспитана воспитание воспитанием воспитанию воспитания воспитанная воспитанные воспитанный воспитанных воспитаны воспитаться воспитывалась воспитывался воспитывать воспитывая воспламенившись воспламенился воспламенялся воспользовавшись воспользовались воспользовался воспользоваться воспользуется воспоминание воспоминанием воспоминании воспоминаний воспоминанию воспоминания воспоминаниями воспоминаниях воспоминанье воспоминанья воспоследует воспрещено восприимчив восприимчивая восприимчивом восприимчивость восприимчивый восприняла воспринятию воспроизводится воспротивиться воспрянуть восскорбел восстав восставал восставшего восстал восстали восстанет восстановил восстановила восстановили восстановить восстановлен восстановляло восстановляя восстанут восстанья востер восток востока восторг восторга восторгам восторгами восторгаться восторге восторги восторгом восторженная восторженно восторженного восторженное восторженность восторженностью восторженною восторженный восторженным восторженными восторженных восточной восточный востреньким востренькое востро востроногий вострые вострым вострыми восхвалить восхитительная восхищался восхищаться восхищение восхищенного восхищеньи восходил восходит восходить восходном восхождения восьмерка восьми восьмидесяти восьмидесятых восьмилетнего восьмого восьмой восьмом восьмому восьмую вот воткнул воткнут вотчине воцарилась воцарилось воцаряется вочеловечить вошедшая вошедшего вошедшему вошедший вошедшими вошедших вошедшую вошел вошла вошли вошло вошь воюет воющую воя вояж вояжа вояжем вояжирует впавшими впадавшего впадает впадал впадала впадая впадешь впал впала впали впасть впервой впервые вперегонку вперед впереди вперяя вперяясь впечатление впечатлении впечатлений впечатлению впечатления впечатлительное впечатлительностью впивался впиваясь впилась впился вплелись вплести вплетаю вплоть вповалку вполглаза вполголоса вползает вползало вполне вполовину вполуоборот впопад впопыхах впору впоследствии впотьмах вправду вправе вправо впредь впридачу впрок впрочем впрягают впряжена впрямь впускает впускала впускать впустить впустят впятеро вр враг врага врагами враги врагини врагов врагом врагу вражда враждебно враждебного враждебное враждебную враждебны враждебные враждебными враждебных враждой вражды вражеским вражеских вражий вражьей вражьим враздробь вразумите вразумихин врали вранье вранью врасплох врать врач врача врачах врачебному врачу вращает вращается вращалось вревской вред вреда вреден вредить вредна вредная вредно вредны вредные вредных вредят врезался врезывается врем времен времена временам временем времени временно временного временное временной временном временным временных времечко время врет врете врешь ври вроде врожденная врожденную врожденных врозь вросла вру врубелем врубель врубеля врут врученных вручил вручить врывавшихся врывается врывалось врываясь врыть вряд всажу все всеблагое всевдонимом всевозможные всевозможных всевышнего всевышнему всегда всегдашнее всегдашний всегдашним всегдашнюю всегдашняя всего вседержителя вседневная всеевропейской всезнание всезнании всезнающей всезнающие всей вселенная вселенной вселенской вселились вселился вселитературно вселить вселявшиеся вселяется всем всеми всемирной всемирную всемирный всему всенаполняющего всенародно всенижайший всеной всенощной всеобщего всеобщее всеобщей всеобщих всеобъемлемостью всеоружии всепоглощающий всесветными всесметающая всесторонней всеуслышание всех всецело всею вскакивает вскакивал вскакивала вскакивают вскакивая вскачь вскинув вскинул вскинулась вскинулся вскинутом вскинутые вскипев вскипевшего вскипел вскипело всклокоченный всклоченного вское всколосится всколыхнулись вскользь вскоре вскочив вскочившую вскочил вскочила вскочило вскочит вскочишь вскрикивает вскрикивала вскрикивания вскрикивают вскрикивая вскрикнет вскрикну вскрикнул вскрикнула вскрикнули вскрикнут вскрикнуть вскричал вскричала вскружить вскрывался вскрывать вскрылась вскрыть всласть вслед вследствие вслух вслушивается вслушивалась вслушивался вслушиваться вслушиваясь всматривается всматривался всматриваться всматриваясь всплескивая всплеснув всплеснул всплеснула всплеснулась всплывало всплывут всплыла вспоенная всполохнутый всполошив всполошился вспоминаем вспоминает вспоминается вспоминаешь вспоминай вспоминайте вспоминал вспоминала вспоминалась вспоминали вспоминать вспоминаю вспоминая вспомни вспомнив вспомнил вспомнила вспомнилась вспомнили вспомнились вспомнилось вспомнился вспомнит вспомните вспомнить вспомнишь вспомню вспомнят вспоможение вспоможении вспоможения вспорхнулся вспотевшее вспрыгивает вспрыгнул вспрянула вспугнул вспугнуть вспухла вспухшею вспылив вспылил вспыльчива вспыльчиво вспыльчивые вспылю вспыхивает вспыхивал вспыхивала вспыхивают вспыхнет вспыхнув вспыхнул вспыхнула вспыхнули вспыхнуло вспыхнуть вспышкам вспышки вспышку вспять встав вставай вставайте вставал вставала вставали вставать вставая вставил вставить вставлял вставши вставшие вставший встает встаете встаешь встал встала встали встанемте встанет встанешь встану встанут встань встаньте встарь встать встаю встают встревожен встревоженная встревоженно встревоженного встревоженную встревоженный встревоженным встревоженными встревожившись встревожилась встревожило встревожился встревожить встрепанный встрепенется встрепенулась встрепенулись встрепенулся встрепенутся встретив встретившегося встретивший встретившись встретил встретила встретили встретились встретилось встретился встретимся встретит встретите встретится встретить встретиться встретишь встретя встретятся встреч встреча встречавшемуся встречавшиеся встречаемся встречает встречается встречаешь встречал встречала встречалась встречали встречались встречалось встречался встречать встречаться встречах встречаю встречаюсь встречают встречаются встречаясь встрече встречен встреченных встречи встречного встречной встречному встречные встречный встречным встречных встречу встряхивает встряхну встряхнул встряхнулся встряхнуть встряхнуться вступает вступала вступаю вступают вступая вступив вступившая вступивши вступил вступила вступилась вступили вступился вступить вступление вступления всунув всунул всунуть всхлипнут всхлипывает всхлипывал всхлипывала всхлипывать всхлипывая всходила всходили всходить всю всюду вся всяк всякая всякие всякий всяким всякими всяких всякого всякое всякой всяком всякому всякою всяку всякую всячески втайне втащила втащили втеснилось втираться втихомолку втолковать втоптала вторая вторгается вторично вторичный вторник второго второе второй втором второпях второстепенную вторую вторые вторым вторых втрое втуне втыкается втягивает втягивание втягивать втягивающее втянет втянется втянув втянул вуали вуалью вулкана вулканических вулканической вход входа входе входи входившего входившим входил входила входили входило входит входите входить входя входят входящие входящий входящим входящих входящую вхожу вцепилась вцепился вцепится вчера вчерашнего вчерашнее вчерашней вчерашнем вчерашнему вчерашние вчерашний вчерашним вчерашних вчерашнюю вчерашняя вчерне вчетверо вчуже вшей вши въелась въелись въехал въехали въехать вы выбегает выбегали выбежавшего выбежал выбежала выбежали выбежит выбежишь выберет выберу выберут выбивался выбивая выбилась выбирает выбирай выбирать выбираться выбираю выбирают выбирая выбить выборам выборгской выборгскую выборе выборы выбрав выбрал выбрала выбрали выбрался выбран выбрана выбрасывает выбрасывать выбрать выбраться выбрился выбрит выбрито выбритый выбритыми выбросил выбросить выбрось выбросьте выброшенные выведает выведать выведения выведет выведешь выведи выведу выведут вывезенному вывезенным вывезти вывел вывела вывели вывело вывернул вывернула вывернулся вывернутыми вывернуться выверт вывеска вывескам вывески вывести выветриваемся выветрившегося вывешено вывод вывода выводил выводили выводило выводить выводов выводом выводят вывожу вывороти выворотив выворотил выгадаешь выгадать выгладит выглядишь выглядывавшую выглядывает выглядывал выглядывала выглядывали выглядывало выглядывая выглянет выглянув выглянула выглянуло выгнал выгнали выгнанная выгнать выгнутою выговаривает выговаривал выговаривала выговаривало выговаривалось выговаривать выговариваться выговаривая выговора выговорив выговорил выговорила выговорилось выговорить выговорят выгода выгодами выгодах выгоднее выгодно выгодное выгодной выгодном выгодную выгоду выгоды выгонишь выгоню выгоняют выгорит выгребла выдавай выдавалась выдавали выдавались выдавать выдавая выдавилась выдавливалась выдадите выдает выдаете выдал выдала выдалась выдали выданное выданы выдаст выдастся выдать выдают выдаются выдающеюся выдвинул выделанно выделанным выделка выделывать выделывая выдергивает выдержав выдержал выдержала выдержали выдержать выдерживает выдерживал выдерживаю выдерживают выдерживая выдержит выдержите выдержишь выдержу выдернул выдернула выдохся выдрал выдрессированная выдумав выдумаем выдумает выдумаешь выдумай выдумал выдумала выдумалась выдумали выдуманная выдумать выдумка выдумки выдумку выдумок выдумывал выдумывала выдумывать выдумываю выдумывают выдь выдьте выедем выеду выезда выездные выездных выезжаем выезжает выезжаете выезжал выезжала выезжать выезжают выехал выехала выехали выехать выждал выждать выжечь выжжены выжига выжидал выжидая выжил выжили выжимали выжмет вызвав вызвал вызвала вызвали вызвало вызвался вызванные вызвать выздоравливать выздоравливаю выздоравливая выздоровевший выздоровеешь выздоровел выздороветь выздоровление выздоровлению выздоровления выздоровлю вызов вызова вызовет вызывает вызывала вызывали вызывают вызывающее вызывающею вызывающий вызывающим вызывая выиграет выиграл выиграна выиграть вый выйдем выйдемте выйдет выйдешь выйди выйду выйдут выйдя выйти выказал выказана выказывает выказывал выказывались выказывалось выказывать выкатила выкатились выкладку выкладывал выковывал выколачивать выколоти выколочу выкопал выкопать выкрасили выкрикивал выкрикивала выкричишься выкупала выкупила выкупить выкупленное выкупленные выкуплю выкуренная выкуривает выкурить выл выла вылазки вылакать вылез вылезали вылезая вылепленными вылериановые вылетали вылетел вылетели вылетело вылетят вылечитесь вылечить вылечу вылил вылитую вылить выложи выложил выложила выложит выложить вылупила вылью вымазав выманивать выманила вымела вымерло выметают вымети вымолвила вымолвить вымочит вымоют вымпелами вымыв вымыл вымыла вымыли вымылся вымысла вымыслом вымыты вымытые вымытый вымытыми вымыть вымыться вынес вынесем вынесенного вынесет вынеси вынесла вынесли вынести вынесу вынет вынимает вынимала вынимать вынимая выносившие выносил выносила выносили выносились выносился выносимая выносит выносите выносить выношу вынув вынудила вынужденное вынул вынула вынули вынулось вынуто вынутым вынуть выпадает выпадал выпадала выпадало выпадении выпадет выпал выпала выпали выпало выпачкаешь выпачкался выпачкан выпачканного выпевала выпевать выпей выпейте выпивает выпивается выпивалось выпивательными выпивку выпивши выпил выпила выпили выпиливает выписали выписались выписать выписки выписывал выписывать выписываю выпит выпита выпито выпитого выпитое выпитою выпитым выпить выплакав выплакала выплакаться выплакивает выплатил выплатится выплачу выплеснет выплыли выполнит выправился выпрашивать выпроводил выпросила выпросили выпросит выпросить выпрошу выпрыгнули выпрыгнуть выпрямилась выпрямились выпрямлялась выпрямляя выпрямляясь выпукло выпуклостию выпуклою выпуклую выпуклые выпускает выпускать выпуская выпуске выпустив выпустил выпустила выпустили выпустит выпустить выпустят выпутаться выпутывался выпуча выпучив выпучил выпущен выпущенные выпущены выпущу выпытает выпытала выпытываешь выпытывала выпытывая выпьем выпьет выпьешь выпью выпьют выработал выработалась выработался выработанности выработанные выработать выработаться выработывается выработывались выработывалось выражавшими выражаете выражаетесь выражается выражал выражала выражалась выражали выражало выражалось выражался выражать выражаться выражая выражение выражением выражений выражению выражения выражениях выражено выраженье выраженьем выраженьице выразил выразила выразилась выразились выразило выразилось выразился выразительная выразительно выразительное выразительные выразится выразить выразиться вырастало вырастают вырастет вырасти вырвавшаяся вырвавшемся вырвавшею вырвавшихся вырвал вырвала вырвалась вырвали вырвались вырвалось вырвался вырванного вырвать вырваться вырвется вырву вырезай вырезана вырезать вырождение вырождением вырождения выронил выронила вырос выросла выросли выросло выросшей вырубить выругает выругать выручает выручена вырученные выручи выручил выручим выручит выручки вырывавшиеся вырывается вырывалась вырывали вырывались вырывалось вырываются вырывая высасывал высасывала высвободив высвободил высвободился высвобождаете выселить выселки выселков высечет высидел выскабливаешь выскажет выскажется выскажу выскажусь высказала высказалась высказались высказался высказанных высказать высказаться высказываете высказывается высказывала высказывались высказывало высказывалось высказывать высказываться высказываю высказываясь выскользало выскользнули выскочившего выскочил выскочила выскочит выскочить выскочке выскочки выскребывались выслали выслать выслежу выслужил выслужиться выслушав выслушает выслушаете выслушайте выслушал выслушала выслушать выслушивал выслушивали высматривал высматривать высматривая высморкается высморкался высмотреть высовывалась высок высокая высокенькая высокенькие высоки высокие высокий высоким высокими высоких высоко высокоблагородие высокого высокое высокой высоком высокомерием высокомерия высокомерно высокомерного высокомерные высокомерный высокомерным высокому высокопарные высокородие высокоуважаемая высокую высоте высоту высоты высох высохла высохли высохло высохшем высохшею высочайшей высочайшим выспался выспится высплюсь выспросил выставив выставил выставила выставилась выставили выставит выставлена выставлены выставлю выставляется выставлял выставлять выстоять выстрадав выстрадал выстрел выстрелив выстрелила выстрелить выстрелишь выстрелю выстриженными выстроен выстроить выступал выступать выступают выступившей выступившие выступившими выступил выступила выступили выступить выступление высунув высунулась высунуть высушила высушит высчитал высшего высшее высшей высшему высший высших высшую высылают высыпает высыпал высыпали высь вытараща вытаращены вытаращив вытаращил вытаращила вытаскивает вытаскивала вытаскивать вытаскивая вытащена вытащил вытащила вытащим вытащите вытащить вытащу вытер вытерла вытерпел вытерпела вытерпели вытерпеть вытерпит вытерпливающего вытерплю вытирает вытирать вытирая вытолкай выторговать выточен выточена выточенными выточилась выть вытягиваться вытягивая вытягушкин вытянет вытянув вытянул вытянулись выудить выученного выучи выучивал выучил выучила выучилась выучился выучиться выучку выучу выхватив выхватил выхватила выхлопотанной выхлопотать выхлопочет выход выхода выходе выходи выходившая выходившем выходившему выходивших выходил выходила выходили выходило выходим выходит выходите выходить выходишь выходками выходке выходкой выходку выходной выходов выходом выходу выходя выходят выходящего выходящее выходящие выхожу выцвело выцветшим выцветших вычеркнуть вычертил вычета вычетом вычинила вычислять вычистить вычитал вычитание вычитанные вычитают вычитывая вычищенную вычищенный вычищенных вычищены вышвырнул выше вышедшего вышедши вышедший вышедшую вышел вышесказанными вышеупомянутою вышиб вышивает вышиваньем вышивая вышине вышитыми вышла вышлем вышли вышло вышлю выщипала выяснения выяснилась выяснилось выясняться вьется вьюг вьюга вьюгам вьюге вьюги вьюгой вьюн вьются вьющимися вьющихся вяжет вяжутся вяз вязанки вязанная вязать вяземского вязка вязниковых вязнут вязь вялая вяло вялого вялой вялою вялый вялым вялыми вянут вячеславу г га гавана гавани гаванской гаврилович гавриловича гавриловичу гаврилыч гаврилыча гаврилычу гаги гадай гадал гадала гаданьи гадать гадают гадая гаденькая гадки гадкий гадко гадкое гадкой гадкую гадов гадок гадостны гадость гадят гаев гаева гаеве гаеву гаже газели газет газета газетам газетах газете газетку газетной газетную газетный газетой газету газеты газом газы гайдамаков галатея галдеж галдели галерее галереей галереи галерею галерея галереях галилея галицийских галка галла галлереи галлы галлюсинацию галлюцинации галстук галстука галстуке галстучек галчат гам гамбринусе гамлет гамлета гамлетовского гамом гаму ганц гарантировали гари гармонией гармонии гармоника гармонике гармоники гармоникой гармонировавшие гармонировал гармонировало гармонического гармония гарнитур гарнитуром гарпагон гаруса гарью гаси гасил гасли гасло гаснет гасну гаснул гаснут гаснуть гаснущая гастроном гастрономической гастрономическом гатей гатчинских гвалт гвоздем гвозди гвоздиками гвоздичку гвоздочками гвоздь гвоздями гг где ге гедрюкт гей гелиотропом гельд геморрой генерал генерала генералишка генералишку генералов генералом генералу генералы генеральские генеральский гениальной гениальности гениальные гениальный гениальным гением гении гений генриет генуя географической географию география геологические геометра геометрия георгий герани геранью геранями герасимович герасимовича герасимовичу герасимыч герасимычу гербовая гербовой гербовыми гердера геркулан геркулесовыми германии германию германские героев героем героинь герой геройски геройства герою героя герр гертруды герца герцом гершеля гете гиацинты гибели гибель гибельно гибельной гибельный гибелью гибки гибли гибнет гибнешь гибну гибнут гибнуть гибок гигантской гиганты гимн гимна гимназии гимназист гимназистами гимназическом гимназию гимнастика гимнастикой гимнастику гимны гиппиус гирлянды гиря гитара гитаре гитарные гитарой гитару глав глава главе главная главнейшая главнейшего главнейшие главнейшим главнейших главного главное главной главном главною главную главные главный главным главных главой главу главы глагол глади гладиатор гладил гладила гладит гладить гладкие гладкий гладко гладковыбритым гладкой гладкую гладь гладьте глаженье глаз глаза глазам глазами глазах глазела глазенками глазеночками глазеющих глазея глазищи глазками глазки глазок глазом глазу глас гласила гласное гласности гласом гласят глашатаи глин глиной глину глины глиняной глиняный глиняными глиссе глобус гложет гложут глотает глотками глотков глотку глотнул глоток глохнут глубже глуби глубина глубине глубиной глубину глубины глубок глубока глубокая глубоки глубокие глубокий глубоким глубоких глубоко глубокого глубокое глубокой глубоком глубокомысленно глубокомысленное глубокую глубочайшего глубочайшее глубочайшем глубочайшие глубь глумитесь глуп глупа глупая глупее глупей глупейшего глупейшею глупейшие глупейшими глупенькие глупенькое глупо глуповат глуповатый глупого глупое глупой глупом глупому глупопоспешной глупостей глупости глупостию глупость глупостью глупостям глупостях глупою глупую глупца глупы глупые глупый глупым глупыми глупых глух глуха глухари глухая глухи глухие глухим глухими глухих глухо глухого глухое глухой глухом глухонемая глухота глухую глуше глуши глушит глушь глыба глыбам глядевшему глядел глядела глядели глядеть гляди глядись глядит глядите глядится глядишь глядь глядя глядясь глядят глядящий гляжу гляжусь глянет глянец глянув глянул глянула глянцевитой гм гнал гнала гнать гнев гнева гневается гневайся гневаться гневно гневной гневность гневный гневными гневом гневу гнедой гнезд гнезда гнездится гнездо гнет гнета гнетет гнетете гнетом гнешь гниения гниет гниешь гнилого гнилое гнилой гнилом гнилую гнилыми гниль гнилью гнусавой гнусно гнусное гнусном гнусному гнусность гнусные гнусный гнусным гнусными гнусных го говеть говор говоренного говорено говори говорившего говорившие говоривших говорившую говорил говорила говорили говорило говорилось говориль говорим говорит говорите говорится говорить говориться говоришь говорю говоря говорят говорящ говорящее говорящей говорящие говорящий говорящим говорящих говядина говядиной говядину говядины гоголевский гоголя год года годам годами годах годен годика година годину годится годичный годишься годной годные годов годовой годовщиной годовым годом годочек году годы годятся голая голгофу голи голиков голландская голландских голландцем голо голов голова головами головах голове головка головке головку головлей головного головной головном головною головную головные головных головой головокружений головою голову головушка головы голод голода голоден голодная голодно голодного голодное голодной голодному голодные голодный голодных голоду голой голос голоса голосами голосе голосил голосила голосистый голоском голосов голосок голосом голосу голосят голубая голубиная голубиную голубка голубоватый голубоватым голубое голубой голубом голубоокий голубою голубушка голубую голубчик голубые голубым голубыми голубых голубь голубятни голубятню голую голы голые голый голым голыми голых голь гольштейн гом гомера гомеридами гомозится гомозиться гонга гондол гондоле гонение гонений гони гоним гонима гонимого гонимый гонимых гонит гоните гонорарий гончаров гонявшийся гоняется гоняла гонялся гонят гонять гоп гор гора горазд гораздо горам горами горах горбатая горбиться горд горда гордая горделиво гордилась гордились гордился гордится гордиться гордо гордого гордой гордом гордому гордости гордостию гордость гордостью гордою горды гордые гордый гордым гордыне гордых гордясь гордятся гордячка горе горевал горевать горевший горевшим горел горела горели горелки горело горем горемыка горение горением горенки горенье горестно горестное горестной горестью горестях гореть горечи горечь горечью горжусь гори горизонт горизонта горизонтально горизонте горизонтом горизонты гористую горит горке горла горланили горланит горле горло горловую горлом горлу горлышко горний горнист горниста горницу горницы горничная горничной горничных горном горный горным горными город города городам городах городе городите городишке городишко городишком городке городничего городничий городового городовой городовому городовые городок городом городская городского городское городской городском городскую городу городят горой гороскоп горох горохе гороховой гороховую гороховый горохом гороху горошек горошина горошком горсточку горстью гортани гортанные гору горчица горше горшками горшки горшков горшочки горы горькая горькие горький горьким горьких горько горькой горьком горькою горькую горю горюй горюнова горюновы горюновым горючей горючий горючими горя горят горяча горячась горячая горячего горячей горячему горячешных горячею горячившегося горячие горячий горячил горячились горячился горячим горячими горячись горячится горячих горячишься горячка горячке горячки горячку горячности горячо горячусь горячую горящее горящей горящий горящим горящими госпитале госпитальный госпитальных господ господа господам господах господи господин господина господине господином господину господним господня господняя господом господская господский господского господской господствовавшей господствовала господствующею господствующие господствующим господу господь госпожа госпожам госпоже госпожи госпожинок госпожой госпожу гостей гостем гостеприимном гостеприимный гости гостивший гостила гостиная гостинец гостиница гостинице гостиницу гостиницы гостиного гостиной гостиную гостинцу гостинцы гостиный гостиных гостит гостить гость гостья гостю гостя гостям гостями гостях государственная государственное государственный государственным государственных государыня государь государю гот готов готова готовая готовившиеся готовившийся готовил готовила готовилась готовили готовились готовился готовит готовитесь готовится готовить готовиться готовлюсь готовность готово готового готовое готовой готовом готовую готовы готовые готовый готовыми готовь готовясь готовят готовятся готовящегося гофкригсрат гр грабеж грабежа грабежах грабеже грабежей грабежом грабил грабит грабителя грабят гравюр гравюры град граденаплем градирен градуса градусник градусов градусы грады гражданин гражданина гражданские гражданский гражданских гражданской гражданском гражданскому грамматике грамматику грамота грамотеи грамотность грамотный грамотных грамоты гранат грандиозного грандиозное грандиозные грани гранит гранитные границ граница границей границею границу границы гранпасьянс графа графин графина графине графини графинина графином графинчик графов графский графского графской графскому графу графы грации грациозно грациозности грациозную грациозны грациозный грация гребенки гребенку гребень греет грез греза грезами грезах грезил грезилась грезилось грезит грезится грезить грезишь грезы грезятся гремел гремела гремело гремит гренадера гренадеры греть грех греха грехам грехе грехи грехов греховной греховные грехом гречневой гречонка грешен греши грешили грешить грешник грешница грешницу грешно грешной грешную грешные грешный грибами грибов грива гривен гривенник гривенниками гривенники гривнами гривне гривну гривны григория григорьевич григорьевича григорьевичем гримасой гримасу гриф гриша гриши гроб гроба гробам гробах гробе гробниц гробницей гробов гробовая гробовой гробовою гробовые гробом гробу гроз гроза грозе грозил грозила грозило грозим грозись грозит грозится грозишь грозная грозней грозно грозное грозной грозном грозному грозны грозные грозный грозным грозных грозовая грозовой грозовые грозовых грозой грозы грозя грозясь грозят грозящие гром грома громад громада громаднейшего громадного громадное громадной громадном громадную громадные громадный громадным громадных громам громами громит громкая громкий громким громкими громко громкое громком громовая громовой громовые громовым громоздить громоздкий громоздко громом громче громыханье громя грот грохнулась грохнулся грохольского грохот грохотом грош гроша грошей грошик грошовый грошовых гру груб груба грубая грубее грубеешь грубейшим грубить грубиян грубиянит грубо грубоват грубовато грубого грубое грубой грубом грубости грубостию грубость грубостью грубою грубые грубый грубым грубыми грубых грудами грудах груде груди грудная грудной грудные грудных грудой груду груды грудь грудью груз груза грузди грунт группа группами группы грустен грусти грустим грустит грустишь грустная грустнее грустно грустного грустное грустной грустном грустною грустную грустные грустный грустным грустных грусть грустью грустя грустят груше грушевой груши грущу грызет грызла грызло гряд грядами грядущего грядущее грядущей грядущем грядущие грядущий грядущим грядущих гряды грязи грязна грязная грязно грязное грязной грязном грязнотцой грязною грязную грязны грязные грязный грязными грязных грязня грязь грязью грянет грянул грянулась грянули грянуло губ губа губам губами губах губернатор губернатора губернаторе губернатором губернаторстве губернатору губернии губерний губернию губерния губерниям губернский губернским губернских губернского губернском губил губили губите губительнее губительный губительных губить губишь губка губки губкой губой губок губу губы губят гувернантка гувернантках гувернантки гувернанткой гудел гудело гудки гудут гуды гул гуле гулкий гулом гульбе гуляет гулякой гулял гуляла гуляли гулянье гуляньями гулять гуляю гуляют гуляющей гуляя гумаге гумагу гуманистический гуманитетом гуманная гуманно гуманного гуманном гуманности гуманные гуманный гуммиластик гунял гурьбой гусак гусаки гусар гусара гусарские гусарское гусей гуси гусиных густая густейшими густо густой густоту густою густые густым густыми гусыни гуся гущей гюго д да дав давай давайте давал давала давалася давали давался давать давая даве давеча давешнего давешнее давешней давешнему давешний давешних давешнюю давешняя давившее давившей давила давило давит давить давка давке давлением давней давненько давнишнее давнишней давнишние давнишний давнишних давнишнюю давно давным давняя давя давящим дагестана дадим дадите дадут дает даете дается даже дай дайте дал дала далась далее далей далек далека далекая далекие далекий далеким далеких далеко далекое далекой далеком далекую далече дали дались дало далось дался даль дальнего дальней дальнейшего дальнейшее дальнейшей дальнейшем дальнейшему дальнейшею дальнейшие дальнейших дальнему дальние дальний дальних дальный дальня дальняя дальше далью дам дама дамам дамами дамах даме дамой дамою дамские дамскую дамся даму дамы дана данного данное данному данную данным дано дант данта дантово дань дар дарами дарвина дареную дарил дарить дармоедка дармоедом даров дарований дарования даровщинку даром дарь дарье дарьи дарья дарят даст дать дач дача дачами дачах даче дачей дачи дачник дачники дачников дачном дачные дачу дашенька дашеньки дашеньку дашь даю дают два двадцати двадцатилетней двадцатирублевые двадцатисемилетний двадцатый двадцать двадцатью дважды две двенадцати двенадцатого двенадцатом двенадцатую двенадцатый двенадцать двенадцатью дверах дверей дверец двери дверными дверь дверьми дверью дверям дверями дверях двести двигавшимся двигает двигается двигал двигалась двигали двигались двигалось двигался двигатель двигателя двигать двигаться двигают двигая двигаясь движение движением движении движений движению движения движениях движенье движеньем движенья движеньях движет движется движешься движимости движимый движись движутся движущее движущееся движущиеся движущихся двинется двинулась двинули двинулись двинулся двинутся двинуть двинуться двое двоеверьем двои двоим двоих двойку двойник двойной двойные двойным двойных двор двора дворам дворах дворе дворец дворик дворне дворней дворни дворник дворника дворнике дворники дворников дворникова дворником дворнику дворницкой дворницкую дворню дворня дворняжка дворняжки дворов дворовый двором двору дворца дворце дворцовой дворцовым дворцы дворян дворяне дворянин дворянина дворянке дворянки дворянская дворянские дворянский дворянскими дворянского дворянской дворянском двугривенного двугривенные двугривенный двугривенных двукратного двуличного двум двумя двусмысленного двусмысленности двусмысленную двусмысленных двух двухкопеечника двухмачтовый двухминутной двухнедельное двухчасовое двухэтажном де дебаркадера дебатирован дебош дебютировала дев дева деваешь девал девам девать деваться деверь девизами девиц девица девицами девицах девице девицей девицу девицы девически девические девический девического девической девичий девичьего девичьей девичьем девичью девка девкам девке девки девкой девой девок девочек девочка девочкам девочками девочке девочки девочкой девочку девственна девственного девственное девственность девушек девушка девушке девушки девушкой девушку девчонка девчонки девчонку девчоночку девчурочка девы девяносто девяти девятнадцатый девятнадцать девятого девятом девятый девятым девять девятьсот деготь дегтем дед деда дедах дедов дедом деду дедушка дедушке дедушки деды дезертировать действенный действие действием действий действительная действительнее действительно действительной действительности действительность действительностью действительные действительный действительным действительных действия действиях действйтельности действовал действовало действовать действует действуют действующие действуя декабре декабрь декабрьский декабря декадент декадентах декадентский декадентское декан декламирует декламируете декламируя декларируя декорации декораций декорация дел дела делаем делает делаете делается делаешь делай делайте делал делала делалась делали делались делалось делаль делам делами делась делать делах делаю делаюсь делают делающим делая делаясь деле деления делец деликатен деликатная деликатнейшим деликатно деликатности деликатный деликатным деликатными делила делился делись делит делишкам делишки дело деловая деловитости деловитость делового деловой деловую деловые деловым деловых делом делось делу делывал дельное дельному дельный дельцам дельце дельцу дементьева демисезон демка демон демона демонизм демонов демонологии демоном демонским демонстрации денег денежного денежной денежные денежный денек денем дение денницу денно дену денщик денщиков денщику день деньгам деньгами деньгах деньги деньжонки денька деньские деньской департамент департамента департаментах департаменте депутата депутатами депутатов депутаты дер дергает дергайте дергала дергали дергать дергая дерев дерева дереве деревенели деревенская деревенские деревенский деревенскими деревенских деревенского деревенское деревенской деревень деревеньках деревне деревней деревни деревню деревня деревням дерево деревом дереву деревушку деревцо деревьев деревья деревьям деревьями деревьях деревянная деревянное деревянной деревянною деревянную деревянные деревянный деревянным деревянными деревянных дерем дерет держа державный держал держала держалась держали держалось держась держат держать держаться держи держим держись держит держите держится держишь держу дерзкие дерзкий дерзким дерзких дерзко дерзкого дерзкой дерзкую дерзок дерзостей дерзости дерзостно дерзостные дерзость дерзостью дери дериганов дернет дернул дернула дернули дернуло дернуть дерутся десертом дескать десницей деспот деспотизм деспотически деспотическому десяти десятилетнею десятилетнюю десятин десятине десятину десятирублевую десятирублевый десятка десятки десятков десятого десятое десятом десятую десятый десять детали детей дети детками детки деток деточек деточка деточки детская детски детские детский детским детскими детских детского детское детской детском детскому детскою детскую детства детстве детство деть детьми деться детям детях дешевле дешево дешевую дешевый деятелен деятели деятельная деятельно деятельности деятельность деятельностью деятельною деятельные деятельным джентльмен джентльмена джентльменничанье джентльменски джентльмены ди диалектики диапазона диван дивана диванам диване диванов диваном дивану диванчик диваны дивизионно дивились дивился дивись дивитесь дивиться дивлюсь дивная дивно дивное дивную дивные дивный дивным диво диву дивясь дикарь дикая дикие дикий диким дико диковина дикого дикое дикой диком дикости дикость дикою диктовала диктовать диктовку диктует дикую дилетантизма дилижанс динамит диораме диплом дипломат дипломатическим дипломаты директор директора директрисы дирижера диск диссертацией диссонанс дитюся дитя дифтерит дичей дичи дичившийся дичь диэта дланью длилась длились длилось длился длинная длиннее длиннейшим длинно длинноволосый длинного длинное длинной длинному длинною длинную длинны длинные длинный длинным длинными длинных длиной длину длится для длятся дмитриев дмитрием дмитрий дмитрия дна дне дневная дневник дневника дневного дневное дневной дневном дневному дневных дней днем дни днища дно дном дню дня дням днями днях до добавил добавила добежав доберусь добиваешься добивай добивались добивался добиваться добиваются добилась добился добирается добирались добирался добираясь добиться доблести доблестно доблестях добр добра добрался добраться добрая добре добрее добрей добрейший добрели добренький добреющий добро добровольно добровольную доброго добродетели добродетель добродетельна добродетельная добродетельно добродетельного добродетельной добродетелям добродетелями добродушием добродушнейшим добродушно добродушного добродушное добродушным доброе доброй добролюбов добром доброму добросовестная добросовестный доброте добротой доброту доброты доброю добру добрую добры добрые добрый добрым добрыми добрыни добрынин добрых добряк добудет добываемых добываете добывал добывали добывание добыванья добывать добываю добывают добывающим добывая добыл добытые добыть добычу добьется добьюсь добьют довариваете доварит доведенные доведенных доведены доведете доведите доведу довез довезем довезу довел довела довели доверенности доверенность доверенностью доверии доверилась доверителю довериться доверия доверчив доверчива доверчиво доверчивого доверчивой доверчивости доверчивость доверчивостью доверчивый довершению доверши довершили доверьтесь доверявшего доверял доверялась доверялся доверясь доверяю доверяя довести доводами доводил доводит доводить доводишь доводы доволен довольна довольно довольны довольный довольным довольных довольства довольство довольствовался довольством довоспиталась доврались довремся догадавшись догадается догадалась догадался догадаться догадаются догадка догадками догадках догадки догадок догадывалась догадывался догадываюсь догадываясь доглядит догматы догнав догнал догнала договариваешь договаривала договаривали договаривать договаривая договорил договорила договорилась договорились договорился договорить догонишь догонял догонять догоняют догоравшего догоравший догорает догорал догорала догоревшей догорел догорим доделал додумалась додумался додумывался доедаем доедали доедешь доеду доел доели доехали доехать дожа дождавшись дождалась дождался дождаться дождевом дождевою дождевые дождей дождем дождемся дождется дождешься дожди дождик дождичек дождливую дождливых дождь дождю дождя дождями дожей дожив доживает доживал доживать доживающая доживем дожидается дожидался дожидаться дожидаюсь дожидаются дожидаясь дожил дожила дожить дозваться дозволение дозволили дозвольте дозволю дозволяется дозвонишься доискался доискивался доискиваться доить доищусь дойдем дойдет дойдешь дойду дойдут дойдя доймут дойти дока докажет докажете докажу доказав доказал доказала доказали доказана доказано доказательств доказательства доказательство доказательством доказать доказывает доказываете доказывается доказывал доказывали доказывалось доказывать докам доканчивает доканчивай доклад доклада докладах докладную докладом докладу докладывает докладывал докладывать докликался доколе доконал доконало докончив докончил докончить доктор доктора доктором докторской доктору доктрин документ документа документам документы докучливого докучливых докучной докучным дол долг долга долгая долге долги долгие долгий долгим долгих долго долгов долговой долговом долговременной долговременную долговязо долгого долгогривых долгое долгожданный долгой долголетний долгом долгу доле долее долетал долетают долетели должен должна должником должно должностей должности должность должны должным должок доли долин долинам долинах долине долинный долины долог доложил доложили доложился доложить долой долу долы дольнего дольше долю доля дом дома домам домами домах домашнего домашнее домашней домашнем домашнему домашние домашний домашним домашними домашних домашнюю домашняя доме домелю домик домика домики домов домовитой домового домовой домовому домовый домой домом домоседа домоседы домохозяйка дому домчавшись домчался домчимся домы дон донельзя донес донесем донесение донесению донесет донеси донесла донесли донести донесу донжуаны донизу донна доннерветтеров доном донос доноса доносилась доносились доносился доносится доносить доносу доносы доносятся доношу донского донцем доныне допечет допивая допилась дописать дописывает дописывать допитый доплыл доподлинно дополнения дополняет дополняла допотопных допрашиваемого допрашивает допрашивайся допрашивал допрашивала допрашивались допрашивать допрашивающих допрос допроса допросах допросика допросим допросить допроситься допросишься допросов допросчика допросы допрошу допускает допускаете допускается допускай допускал допускали допускалось допускать допускаю допустил допустила допустили допустим допустит допустить допущу допытывались допьяна допятилась дорисовывать дорн дорна дорном дорну дорог дорога дорогам дорогами дорогах дорогая дороге дороги дорогие дорогим дорогих дорого дороговизне дорогое дорогой дорогом дорогою дорогу дорогую дородный дорожа дорожат дороже дорожил дорожили дорожит дорожите дорожка дорожке дорожки дорожкой дорожку дорожная дорожное дорожной дорожном дорожному дорожные дорожный дорожу досада досаде досадливо досадливого досадно досадный досадовал досадой досаду досадую досадуя досады досаждают досаждающее доселе доселева доска досказал досказала досказано досказать доскам доскачет доске доски досконально доскою доску доследить дослушал дослушала дослушали дослушивает дослушивать досмеивался досок доспех доставай доставал доставала доставало доставалось доставался доставать доставаться доставил доставили доставит доставить доставляет доставлял доставляло доставшейся доставшихся доставшуюся достает достается достал достала достали достались достало досталось достался достанет достанется достанешь достану достанут достанутся достань достатком достаточная достаточно достаточном достаточными достать достают достаются достающеюся достиг достигаешь достигал достигали достигало достигают достигли достигло достигнет достигнешь достигну достигнув достигнутого достигнуть достижении достичь достодолжного достоевский достоин достоинств достоинства достоинствами достоинстве достоинство достоинством достоинству достойная достойней достойнейшей достойно достойное достойной достойный достойным достойных достояньем достроил достроился дострочу доступ доступна доступной доступном доступны доступных достучался досуги досчитал дотащился дотащиться дотла дотоле дотолкалась дотрогиваясь дотронется дотронулся дотягивает дотянет дотянешь дотянул дотянуть доучился дохленьких дохнет дохнул дохнула дохнут дохнуть доход дохода доходил доходила доходили доходило доходит доходов доходу доходцу доходы доходя доходят доцента дочел дочерей дочери дочерью дочерях дочесть дочиста дочитав дочитала дочитать дочитывал дочитывали дочитывать дочка дочке дочки дочку дочла дочли дочь дошедшее дошедшие дошел дошила дошла дошли дошло дощечка дощечке дощечки дощечку драгоценнейшие драгоценнейший драгоценно драгоценное драгоценность драгоценные драгоценный драгоценным драгоценных драгунские драдедамовой драдедамовом драдедамовый дразни дразнил дразнила дразнили дразнит дразните дразнить дразнишь дразня дразнят драка драке драки дракон драку драл драли дрались дрался драма драмы дранье драньем драпированы драпировка драпируется драпируюсь драпри драть драться дребедень дребезжащему дребезжащим древ древен древесных древко древнее древней древнейших древнем древни древние древний древних древности дремал дремать дремлет дремлется дремлешь дремлю дремлют дремлющей дремлющим дремлющими дремлющую дремные дремота дремоте дремотные дремотой дремоту дремоты дрему дремучий дремучих дроби дробил дробились дробный дров дрова дровами дрогнет дрогнувшие дрогнувших дрогнул дрогнула дрогнули дрогнуло дрожа дрожавшее дрожавшей дрожавшею дрожавшие дрожавших дрожавшую дрожал дрожала дрожали дрожало дрожаньем дрожат дрожать дрожащая дрожащей дрожащею дрожащим дрожащими дрожащую дрожек дрожи дрожит дрожите дрожишь дрожки дрожу дрожь дрожью друг друга другая друге другие другим другими других другого другое другой другом другому другою другу другую дружб дружба дружбе дружбой дружбу дружбы дружелюбно дружелюбной дружески дружеский дружеским дружескими дружеских дружеского дружеской дружескому дружескую дружества дружественные дружественным дружил дружился дружить дружище дружке дружкой дружна дружно дружной дружном дружную дружочек друзей друзья друзьям друзьями дрыхнет дрыхнешь дрыхни дрыхнут дрыхнуть дряблое дряблый дряблым дряблыми дрязг дряни дрянная дряннейшего дрянно дрянного дрянное дрянной дрянном дрянные дрянным дрянных дрянцо дрянь дрянью дряхлом дряхлую дряхлый ду дуб дубе дубняк дубняком дубовые дубы дубье дуга дуги дугой дугообразны дует дуется дузе дуклиду дум дума думаем думает думаете думаешь думай думайте думал думала думали думалось думам думать думах думаю думают думающих думая думу думы дуне дуней дунечка дунечке дунечки дунечкино дунечкинова дунечкиной дунечкины дунечкиных дунечкой дунечку дуни дунул дунуло дуню дуня дуняша дуняше дуняшей дуняши дуняшу дупелей дуплет дуплетом дур дура дурак дурака дураками дураки дураков дураком дураку дурацкая дурацки дурацкие дурачила дурачится дурачить дурачке дурачки дурачок дуре дурен дурман дурмана дурмане дурна дурная дурно дурного дурное дурной дурною дурнушка дурную дурные дурочке дуррак дуру дурында дурь дуся дуться дух духа духами духах духе духи духов духовник духовно духовному духовным духом духота духоте духоту духоты духу дучяша душ душа душами душат душе душевная душевного душевное душевной душевном душевные душевный душевным душевных душегубец душегубства душегубцы душен душенька душеньку душечка души душил душистая душистою душистым душистыми душит душите душить душкин душкина душкину душна душно душное душный душным душных душой душонка душою душу дуэли дуэль дуют дщерь дым дыма дыме дымилась дымился дымится дымка дымно дымной дымные дымный дымных дымом дыму дымят дымящимся дыр дыра дырах дыре дырке дырочки дыру дыры дырявая дырявые дырявый дырявым дыхание дыхания дыханье дыша дышал дышала дышали дышало дышат дышать дыши дышит дышится дышишь дышлом дышу дьявол дьявола дьяволе дьяволом дьяволу дьявольская дьявольский дьякон дьячка дьячок дюжего дюжина дюжинная дюжину дюжины дюссотов дяде дядей дядек дядечка дяди дядин дядьки дядьшка дядьшки дядю дядя е евангелие евангелии евангелия евг евгений евгению евнух еврей еврейский еврейского евреями европе европейские европою европу евстигней евстигнея евших египет египетские египетскими египетского египетскую египта египте его егоровна егору еда едва еде едем едемте едет едете едешь единичное единичных единоверцев единовременного единогласно единогласных единого единой единому единородна единородную единства единственная единственно единственное единственной единственному единственною единственную единственные единственный единственным единый единым едите едко едкого едкой едкою еду едут едучи едущая едят ее ежегодно ежедневная ежедневно ежедневного ежедневное ежедневной ежедневном ежедневный ежедневным ежедневных ежели ежемесячного ежеминутно ежеминутным ежеминутных еженедельная еженочной ежечасные езди ездивший ездил ездила ездили ездит ездите ездить ездиют ездой езды ездят езжал езжали езжу ей екатерингоф екатерингофе екатеринина екатерининского ектеньи ел ела елагин еле елей елена елене елену елены елец ели елисаветграде елисейские елка елку еловая еловую ель ельце ем емеле ему енотовых епиходов епиходова епиходову епоходов ерани ерань еранью ермак ермака ермолай ермолках ермолке ерошил ерунду еруслан ерша ершей ершика если есмь ест естественная естественно естественного естественное естественной естественном естественному естественнонаучные естественности естественною естественную естественные естественный естественным естественных есть есь ет ефим ефимьюшка ефтова ефтом ефту ехав ехавшему ехал ехала ехали ехать ешь ешьте еще ею ж жаба жаворонка жаворонков жадная жадно жадности жадностию жадностью жадные жадный жадным жадными жадных жажда жаждал жаждало жаждет жаждой жажду жаждущего жаждущей жаждущим жаждущую жажды жакетку жал жала жалась жалей жалела жалели жалеть жалею жалеют жалея жали жались жалиться жалка жалкая жалкие жалкий жалким жалкими жалких жалко жалкого жалкое жалкой жалком жало жалоб жалобились жалобная жалобно жалобный жалобным жалобы жаловалась жаловались жаловался жалование жалованье жалованья жаловаться жалок жалости жалость жалостью жался жалуемся жалуетесь жалуется жалуйся жалуюсь жаль жан жандарма жандармы жанлис жар жара жаргон жаре жареная жареной жарила жарится жарища жаркая жаркие жаркий жарким жарких жарко жаркого жаркое жаркой жаркую жарок жаром жару жарче жги жгла жгли жгло жгут жгучая жгуче жгучего жгучее жгучие жгучий жгучим жгучих жгучую ждал ждала ждали ждало ждать ждем ждет ждете ждешь жди ждите жду ждут ждущие ждущую жды же жевал жеваное жевать жег жезл желаем желаемого желаемое желает желаете желаешь желай желал желала желали желаль желание желанием желании желаний желанию желания желаниям желанная желанное желанным желанье желанья желательно желать желаю желающего желая железа железная железней железнодорожных железной железном железною железную железные железный железным железными железо железом желоб желобом желт желтенький желтенькими желтеют желтизной желтизны желтоватая желтовато желтоватой желтоватые желтого желтое желтой желтом желтому желтою желтые желтый желтым желтыми желтых желудком желудок желудочка желчен желчи желчная желчно желчное желчный желчь жеманства жемчуг жемчуга жемчугами жемчуге жемчугом жемчугу жемчужин жемчужина жемчужной жемчужный жен жена женам женами женат женаты жене женив женился женись женитесь женится женить женитьба женитьбе женитьбу женитьбы жениться жених жениха женихами женихи женихов жениховское женихом жениху женишка женишок женишь женишься женой женская женски женские женский женских женского женское женской женскому женскою женскую женственной жену женщин женщина женщинам женщинами женщинах женщине женщиной женщиною женщину женщины жены женюсь женят женятся жерди жердями жернова жертв жертва жертвами жертвах жертвовать жертвой жертвочка жертву жертвует жертвую жертвы жест жеста жестами жесте жести жестки жесткие жесткий жестким жесткими жестко жесткой жесткости жесткость жесткую жесток жестокая жестокие жестокий жестоких жестоко жестокой жестоком жестокости жестокость жестом жесту жесты жестяной жестяном жетон жечь жженкой жжет жив жива живая живее живей живем живет живете живется живешь живи живите живишь живности живо живого живое живой живом живописец живописи живописно живописной живописных живописцу живопись живописью живости живость живостью животного животное животной животный животными животных животрепещущей животрепещущею живу живут живуча живучи живущего живущей живущий живущими живущих живущую живую жившим живших живы живые живым живыми живых живьем живя жид жиденькие жиденький жиденькой жидким жиду жиды жизней жизненная жизненного жизненное жизненной жизненную жизненные жизненный жизненных жизни жизнию жизнь жизнью жил жила жилам жилах жилет жилета жилете жилетка жилетке жилетку жилетный жилету жилец жили жилица жилицу жилища жилище жилками жилку жило жилы жилье жильца жильцам жильцах жильцов жильцом жильцу жильцы жилья жир жирна жирно жирного жирное жирной жирные жирный жирных жирок жиром жиру житейские житейских житейского житейское житейской жителей жители жительства жительстве жить житье житья жмет жмется жмешь жмут жнивью жнитво жнут жорж жоржа жребий жребия жрецы жрешь жуан жуаном жувеневских жужжала жужжаньем жужжат жужжащий жужжит жужжите жук жуков жуковский журавли журавлиным журавля журил журнал журнала журнале журналистом журналов журналы журнальной журнальце журчанье журчаньем журчит жутко жуткость жучка жующие з за заалели заалелся заарестовать забавах забавляет забавляло забавляюсь забавник забавника забавный забаву забалканский забастовки забвение забвению забвенье забвенья забегает забегаешь забегал забегала забегать забегаю забегают забегая забегу забедневшее забежали забежать забежит забелеет забелелась заберется заберите заберут забилась забился забирает забирается забирали забирался забираясь забит забита забитым забитых забить забияки заблаговременно заблагорассудила заблагорассудилось заблагорассудит заблестят заблистала заблистали заблудился заблудитесь заблуждение заблуждении заболевала заболеваю заболеете заболел заболела заболею заболтавшегося заболтался забор забора заборами заборе заборов забором заборы забот забота заботам заботами заботах заботе заботились заботилось заботился заботимся заботит заботится заботиться заботишься заботливая заботливо заботливое заботливой заботливости заботливостию заботливость заботливостью заботливую заботливы заботливый заботливым заботливыми заботой заботою заботу заботы заботясь забочусь забрал забрала забрали забрались забрасывал забрать забрежжит забрезжилось забрезжит забрел забродит забросил забросить заброшенная заброшенной забубенного забудем забудемся забудет забудется забудешь забуду забудут забудь забудьте забулдыга забуянила забыв забывает забываете забывай забывайте забывал забывала забывалась забывали забывался забывать забываться забываю забывают забывая забывчив забывчивости забывшему забывшие забывший забывшись забыл забыла забылась забыли забылись забылось забылся забыт забыто забытого забытое забытой забытом забытую забыты забытые забытый забытым забытых забыть забытье забытьи забыться забытья забьется забьются завадских завален завалена завалившись завалилось завалился завалинке завалиться завалявшиеся завалялось заваривать заведен заведение заведении заведений заведения заведениями заведениях заведет заведется заведи заведите заведовал заведовают заведу заведутся заведую завезти завезу завел завелась завелись завелся завернул завернулась завернута завернутою завернуты завернуть завернуться завертелись завертелось завертываться завертываясь завершила завершители завеса завесила завести завесу завет завета заветного заветное заветной заветный заветных заветов заветом заветы завещал завещала завещание завещании завещанию завещания завещанного завещанных завивала завивать завидели завидит завидно завидовать завидуешь завидую завидя завизжала завираюсь зависело зависит зависти завистлив завистливо завистливого завистливы зависть завистью зависящим зависящими завитой завитые завитых завладев завлек завлекал завлекала завлекли завлечен завод завода заводах заводе заводил заводила заводит заводится заводить заводские заводы завоет завожу завозилась заволакивало заволновалась заволокла заволокло завопил завопила завопит завороженный заворотила заворчал заврался завсегда завтра завтрак завтрака завтракает завтракал завтракать завтракают завтраком завтраму завтрашнем завтрашнему завтрашний завтрему завывавший завывал завываниями завыванье завяжется завяжу завяз завязалась завязались завязалось завязан завязанный завязанными завязнет загадал загадала загадать загадка загадками загадки загадкой загадку загадок загадочно загадочное загадочной загадочному загадочные загадочный загадочным загадывает загадываний загар загвоздка заглавие заглавием заглавия заглавном загладилось загладится загладить заглохло заглушает заглушал заглушала заглушат заглушенный заглушенных заглушило заглушит заглушить заглушон заглядевшись загляделась загляделся заглядывавшей заглядывает заглядывал заглядывала заглядывать заглядывая заглянет заглянете загляну заглянув заглянул заглянула заглянуть загнан загнанная загниют загнутым заговаривай заговаривал заговаривала заговаривался заговаривать заговариваю заговаривая заговелись заговенья заговор заговорил заговорила заговорили заговорило заговорит заговорить заговорщик заговорщиков заговорю заговорят заголосил загорается загораживаемый загораживают загораживая загорались загоралось загораются загордится загоревшаяся загоревшейся загоревшеюся загоревшимися загорелась загорели загорелись загорелой загорелось загорелую загорелые загорись загородившем загородил загородили загородных загорожена загороженные загорячился заготовил заготовлю заградил заградить заграничная заграничный заграничных загреб загремел загремели загромождена загромождены загрубевшими загрубели загрызет загрязнилось загрязнился загрязнить загубила загудели загулял задаваемые задаваемым задавайте задавал задавала задавали задавать задавая задавила задавит задавлен задавят задайте задал задала задало задам задаром задаст задатка задатки задатку задаток задать задач задача задачею задачи задачу задаю задвигались задвигаются задвижкой задвижку задвинет задворками задворки задебренные задевает задевающего задевая задевши задел задела задели задело заденет задержал задержали задержало задержать задерживаем задерживай задерживать задерживаю задержкам задержками задержу задернется задернул задетым задеть задираете задирательное задирать задней заднем задний задних задобрить задок задолжал задом задор задора задохнулся задразнили задремавший задремал задремали задремать задремлет задремлешь задрожав задрожал задрожала задрожали задрожало задрожит задул задумавшись задумает задумаетесь задумай задумайте задумал задумала задумалась задумались задумался задумана задуманного задуманное задуматься задумаюсь задумаются задумчив задумчивая задумчивее задумчиво задумчивое задумчивой задумчивом задумчивосгь задумчивости задумчивость задумчивостью задумчивую задумчивы задумчивые задумчивый задумчивым задумчивыми задумчивых задумывается задумывайтесь задумывалась задумывались задумывался задумываться задумываются задумываясь задурманился задушевный задушил задушила задушило задушить зады задымились задымится задыхавшаяся задыхается задыхаешься задыхалась задыхался задыхаться задыхаюсь задыхающимся задыхающуюся задыхаясь заедает заедать заедены заеду заезжая заела заем заемного заемное заемному заемные заехал заехали заехать зажав зажаждал зажал зажаты зажатый зажглись зажгло зажгут заждались зажег зажечь зажжена зажженный зажженных заживаются заживем заживет заживо зажигает зажигал зажигали зажигательное зажигать зажигая зажила зажили зажило зажимая зажирел зажиточном зажмет зажмешь зажмуренные зажмурив зажмуриваясь зажмурил зажмурился зажмуря зажужжала зажужжали зазвать зазвенел зазвенело зазвенят зазвучал зазвучала зазвучат зазвучит зазовет зазору зазубринки заиграет заиграешь заиграл заиграла заиграли заиграть заиграют заигрывал заикается заикаться заикаются заикаясь заикнется заикнитесь заикнулась заимодавец заинтересовала заинтересовали заинтересовало заинтересовался заинтересован заинтересованы заинтересовать заискать заискивала заискиванье заискиваю зайдем зайдет зайдете зайдешь зайди зайдите зайду зайдя займа займемся займет займется займешь займите займусь займут займутся зайти зайца зайцами закажу заказ заказал заказали заказанные заказать заказу заказывал заказывала заказывались заказывать закалена закаливая заканчивается заканчивать закапали закарабкался закат заката закатал закате закатившееся закатившись закатилась закатилось закатился закатим закатной закатном закатный закатных закатом закату закатывалось закачал закивала закинем закинув закинул закинула закинулось закинуто закинуть закипел закипела закипели закипело закипит заклад заклада закладе закладка закладной закладом закладу закладчик закладчика закладчиками закладчики закладчиков закладчицы заклады закладываемое закладывай закладывал закладывала закладывать закладывают заклинаю заключает заключаете заключается заключало заключалось заключать заключаю заключен заключение заключением заключении заключению заключения заключениям заключив заключил заключила заключили заключить заклятье заколдован заколдованных заколот заколотила заколотить заколют закон закона законам законно законного законное законном законному законности законность законною законную законный законным законными законных законов законодатели законодатель законом закону законфузившись законфузилась законфузились законфузился законфузиться законченный законченных закончился закончить законы закопал закоптелая закопченного закоренелою закоробившись закорузлую закорузлый закоснелость закоулки закоченели закоченелый закрадывается закрасить закраснелась закрепить закричал закричала закричали закричат закричать закричите закричу закровянилось закроет закроется закрой закройся закрою закроют закругленной закружилась закружился закружите закружитесь закружится закрутишься закрутят закручивая закрыв закрывает закрываешь закрывай закрывал закрывали закрывать закрывая закрывшись закрыл закрыла закрылась закрыли закрылись закрыт закрыта закрытое закрыты закрытый закрытыми закрытых закудахтал закулисную закупить закупка закупками закупкой закупоривать закупорилось закуражился закурив закуривает закуривать закурил закусив закусил закусить закуска закусками закуске закуски закуску закусывал закусывала закусывать закутав закутавшись закутал закуталась закутался закутанный закутанным закутаться зал зала залает залах залаяла зале залег залегла залежавшейся залежался залежаться залез залезал залезли залезть залепетала залепила залетает залетал залетела залетело залечить залив залива заливалась заливались заливаться заливаются заливаясь заливистый заливного заливные залившею залившись залила залилась залились залился залитая залито залитое залитые залитый залить залог залогом заложена заложенном заложенный заложив заложил заложила заложили заложит заложить заложу залой заломив заломившаяся заломлены залпом залу залучить залы зальдевших зальет зальется зальешь зальюсь замай замайте заманит заманить заманишь заманчивей заманчиво замараться замарашки замасленной замасленную замасленные замасленных замахал замахивается замахиваясь замахнется замахнула замахнулась замашками замашкой замедленного замедлил замедляет замедляй замелькал замелькали заменил заменились заменился заменить замену заменю заменя заменяет заменялась замер замереть замерзнет замерзнуть замерла замерли замерло замертво замести заместо заметен заметенной заметив заметил заметила заметили заметим заметит заметить заметишь заметку заметливы заметна заметная заметно заметного заметное заметной заметном заметною заметную заметны заметным заметов заметова заметовские заметову заметовым заметок заметь заметьте заметят замечаемый замечает замечаете замечается замечаешь замечай замечал замечала замечали замечалось замечание замечанием замечанию замечания замечаниями замечателен замечательная замечательнейшая замечательнейший замечательно замечательного замечательное замечательном замечательные замечательными замечать замечаю замечают замечая замечен замеченной замеченным замечтает замечтал замечталась замечтался замешается замешался замешан замешаны замешательство замешательством замешкали замешкались замешкался замигал замирает замирала замирало замиранием замирающие замирающий замирающим замирая замка замке замки замкну замкнут замкнутую замкнутых замкнуть замнется замогильный замок замолит замолк замолкла замолкли замолкло замолкнут замолкнуть замолчал замолчала замолчали замолчат замолчать замолчи замолчит замолчу заморишь заморские заморской заморскую замочились замочился замочной замочную замою замрет замри замрут замуж замужем замужества замужестве замужеству замужнюю замучаешься замучай замученные замучил замучила замучилась замучили замучились замучился замучит замучить замшевые замшевый замыкалась замыкалова замыкаю замыкая замыкаясь замысел замысла замыслишь замыслов замыслы замыть замышляла замышлялась замяла замялся замямлил замять занавес занавесей занавеска занавесками занавески занавеску занавесок занавесом занавесы занавесью занды занес занесен занесенной занесенными занести занимавшая занимаемая занимает занимаетесь занимается занимаешься занимай занимайся занимайтесь занимал занимала занималась занимали занимались занимало занимался занимательным занимать заниматься занимаю занимаюсь занимаются занимающаяся заново занозила заносил заносчивая заносчиво заносчивого заносчивость заносчивы заносчивым занося заношенная заношенного заношенные занявший занял заняла занялась занялись заняло занялся занят занята занятая занятие занятии занятий занятия занятиям занято занятого заняты занятые занятый занять заняться заодно заорал заочно западают западник западню запамятовал запамятовали запарился запас запаса запасе запасем запасными запасом запасы запах запахе запахло запахов запахом запачкала запачкался запачканная запачканное запачканные запачкаться запашке запевает запевали запевания запевая запекаться запеклась запеклись запекшейся запекшимися запекшихся запел запела запели запендю запенившимися запер заперев запереть запереться заперла заперлась заперли заперся заперт заперта запертая заперто запертой запертую заперты запертых запершись запечатал запечатанную запивает запивать запил запинаются запинаясь запинкой запираемы запирает запираете запирается запираешь запиралась запирали запирался запирать запираю запирают запираются записали записались записан записанными записано записаны записать записка записками записках записке записки запиской записку записная записной записную записок записочку записывает записывания записыванья записывать записываю записывают записывая запихивать запихивая запишем запиши запишу заплакал заплакала заплакали заплаканное заплаканной заплаканы заплакать заплатами заплатах заплати заплатил заплатила заплатили заплатим заплатит заплатите заплатить заплатишь заплатки заплаты заплачем заплачет заплачешь заплачу заплачут заплевавший заплеванному заплеванный заплесневела заплесть заплетены заплутался заплывали заплывшие заплыли заплясало запнулась запнулся заподозрили заподозрить запоем запоет запоешь запоздало запоздалый запоздалым запоздать запой заполнить заполняя заполонила заполонить запомнивши запомнил запомнили запомним запомните запомнить запомню запомнят запонка запонки запор запора запоре запорет запороши запорошит запоя заправить заправленную заправляет запрег запредельных запрет запретив запретил запретишь запреты запрещает запрещала запрещение запрещения запрещениям запрещено заприте запритесь запропастившуюся запропастились запросила запросится запросто запросят запружена запрыгает запрыгал запрыгала запрыгали запрягут запряженная запряженной запрячь запугала запуганной запуганным запугать запускает запускал запускать запустив запустил запутавшись запутает запутаешься запуталось запутанного запутанный запутываться запутывая запутываясь запушенным запущенностью запущенную запущенный запущенным запущенных запущено запылавшие запылали запыленное запыленной запыленному запыленную запыленные запылены запылилась запылилось запыхавшись запыхалась запьешь запью запястье запястья запятнаны зарабатывал заработает заработал заработались заработался заработать заработка заработком заражал заражалась заражались заражаются заражая зараженного зараженные зараженный зараз заразил заразила заразилась заразили заразителен заразительная заразительно заразительны заразы зарайский зарайских зарайского заранее зарапортовался зарастает зарделось заре зарева заревам заревая зареве заревел зарево заревое заревом заревою заревые зарежет зарежь зарезал зарезала зарезу зарей заречная заречной зарею заржавленным заржет зари зарницы зарницына зарницыной зародилась зародились зародыш зарождаются зарождение зарождения заройся зароненной заронился заросла заросший заросшими зароют зарубежной зарубить зарывшись зарыдала зарыдаю зарыдают зарылись зарыт зарыто зарыть зарыться зарычал зарычи зарю заря заряда зарядили заряжанье засади засаленной засаленном засаленный засасывает засверкав засверкавшими засверкал засверкали засверкало засветили засветились засветилось засветло засвидетельствовал засвидетельствовала засвидетельствован засвидетельствована засвидетельствование засвидетельствовано засвидетельствовать засевались заседание засеку засела заселен засели заселилось засело засиделся засидится засипел засияет засиял засияла засияли засияло заскакать заскрежетав заскрежетал заскрипит заслоненная заслонил заслонилась заслонилось заслонявший заслонялось заслуга заслуги заслуженная заслуженного заслуженной заслуженном заслуженным заслужил заслужу заслушивался заслыша заслышавший засматриваться засмеется засмеют засмеются засмеявшись засмеялась засмеялись засмеялся засмеяться засмотрится заснет засну заснувши заснул заснула заснули заснуть засовы засохла засохли засохло засохшею засохшим засохшими засохших заспав заспанная заспанные заспаны заспишься заставал заставала заставил заставили заставило заставит заставить заставленное заставлю заставлявших заставляет заставлял заставляла заставляли заставляю заставляют заставляя заставой заставу заставы заставят застал застала застали застанем застанет застанешь застану застарелой застать застегивался застегивание застегивать застегивая застегнут застегнутое застелют застенчив застенчива застенчиво застенчивости застенчивость застенчивы застиг застилает застойки застойкой застольных застонал застонет застрахованы застраховать застрелившиеся застрелил застрелился застрелись застрелитесь застрелится застрелиться застрелюсь застряло застрянет застукало заступаться заступила заступило заступитесь заступиться заступом застучал застучали застучало застучат застучит застывала застывало застывая застыдилась застыдился застыдят застыла застыли застынет засудят засуетилась засуетились засуетился засуетятся засунув засунул засунула засуха засухи засуху засученными засучились засыпает засыпал засыпала засыпанная засыпаю засыпают засыпи засыплет засыхавшая засядем затаенной затаившейся затаившиеся затаившимся затаил затараторила затасканной затасканную затвердил затворе затворена затвори затворив затворилась затворить затворничества затворю затворяет затворяется затворялась затворят затворяя затевать затеем затеет затеешь затеи затей затекает затем затемнен затепли затеплят затерлось затерся затертого затертый затерявшийся затерял затерянная затерянный затеряться затесало затеснились затеял затеяли затеялись затеями затеять затих затихать затихла затихли затихло затишье затишьев затишьях заткнет заткни заткнутые заткнутым затмевал затмение затмений зато затолпилось затопав затопал затопишь затопчет заторелым заторопившись заторопилась заторопился заторопясь затосковал затравленная затратили затревожилась затревожит затрепетал затрепетала затрепетали затрепещет затрется затрогивать затронет затронули затруднение затруднении затруднений затруднения затруднилась затруднительно затруднительное затрудняется затруднялась затруднялись затруднялся затрудняясь затряс затряслась затряслись затрясусь затуманила затуманилась затуманит затушено затушить затушу затыкать затылка затылке затылок затягивает затягивая затянувшийся затянула затянули затянулось затянут заунывную заурядным заутрене заутрени заученное заученную заученный заучены заучусь зафилософствовался захар захара захарка захарке захаров захарович захаровича захаровичем захаровых захаром захару захарыч захарыча захвалить захватив захватившие захватил захватила захватили захватило захватит захватить захватывает захватывало захватывают захваченный захворал захворала захихикал захлебнулся захлебываясь захлестнула захлестнуло захлопнула захлопнулась захлопывала захлопывая захмелевший захныкал заходил заходила заходили заходило заходит заходить заходя заходящее заходящим захождения захожу захотел захотела захотели захотелось захотеть захотите захотят захохотал захохотала захохотали захохочет захочет захочется захочешь захочу захрипел захромала захрустел зацелованный зацепите зацепиться зачало зачастую зачатия зачатками зачах зачахла зачахну зачем зачеркивает зачеркнет зачерпнула зачешется зачешутся зачислен зачитался зачитывался зачтено зашаталась зашаталось зашевелились зашевелит зашевелится зашевелят зашевелятся зашел зашептал зашептала зашептались зашивались зашипел зашипит зашить зашла зашлепал зашли зашло заштопала заштопать зашумит зашумят защелкал защемило защита защите защити защитит защитите защитить защититься защитник защиту защиты защищавших защищает защищал защищала защищался защищать заявил заявила заявить заявлениях заявлено заявляет заячья звал звала звали зваль звание званием звания званых званья звать звезд звезда звездам звездами звездах звезде звездной звездой звездочка звездою звезду звезды звена звенела звенели звенело звенит звено звену звеньев звенья звеня звенят звенящий зверей зверем звери звериного звериной звериную звериный зверок зверская зверские зверского зверском зверства зверь зверю зверя звон звоне звонил звонили звонит звонить звонка звонки звонкие звонкий звонким звонко звонкой звонком звонок звоном звонят звук звука звукам звуках звуки звуков звуком звуку звучал звучала звучали звучало звучат звучащий звучащим звучен звучит звучным звякание звякают звякнул зги зготовляй здание здания зданиях зданье зданья здесь здеся здешнего здешней здешнем здешнему здешнею здешние здешний здешним здешних здешнюю здоров здорова здоровается здоровалась здороваться здоровая здоровее здоровенькие здоровенького здоровехонек здорово здорового здоровой здоровому здоровы здоровые здоровый здоровым здоровыми здоровье здоровьем здоровью здоровья здравие здраво здравого здравое здравом здравому здравствовать здравствует здравствуй здравствуйте здравый здсеь зе зевавшему зевает зевак зевал зевать зевая зевес зевнет зевнул зевнуть зевотой зевоту зевоты зейн зейнрок зелена зеленая зеленеет зелененькими зелененькую зеленеют зелени зеленные зелено зеленоватые зеленоватым зеленого зеленой зеленом зеленую зеленщик зеленщика зеленщики зеленщику зелены зеленые зеленый зеленым зелеными зеленых зелень зеленью земель земле земледелие землей землетрясений землетрясению землею земли землю земля земляк земляка земляку землям землянику землянке земная земно земного земное земной земном земные земского земской земском земскую земстве земчуг зените зеницах зеркал зеркала зеркалах зеркале зеркало зеркалом зеркальной зеркальный зеркальце зеркальцем зерна зерно зигзагами зигзагом зигфрид зима зиме зимнее зимней зимнем зимние зимний зимних зимой зиму зимы зинаиду зияющие зла злаки златая златит злато златом златым златятся злая злее злейший злился злит злитесь злится злиться злишься зло злоба злобе злобен злобная злобно злобного злобное злобной злобною злобные злобный злобным злобных злобой злобою злобствуют злобу злобы зловещая зловещее зловещей зловещие зловещий зловещим зловили зловредную злодее злодеев злодей злодейка злодейскою злодейств злодейство злодея злодеяние злое злой злом злонамеренная злорадно злорадством злословил злословия злости злостных злость злостью злосчастный злоупотреблении злоупотреблений злоупотреблять злу злую злы злые злым злыми злых злюсь злющая злющей злясь змее змеей змеею змеи змеилась змеились змеиный змеится змей змею змея змеящихся знавал знавали знавшие знавший знаем знает знаете знаешь знай знайте знак знака знаками знаки знаков знаком знакома знакомая знакомиться знакомо знакомого знакомое знакомой знакомом знакомому знакомств знакомства знакомстве знакомство знакомству знакомую знакомы знакомые знакомый знакомым знакомыми знакомых знакомя знал знала знали зналь знамен знамена знаменательно знаменательного знаменательные знаменательный знаменательным знаменательных знамением знамений знаменит знаменитейший знаменитой знаменитости знаменитость знаменитый знамения знаменовало знамо знамя знание знанием знаний знания знаниями знаньем знанья знатно знатной знатность знатный знатных знатока знать знаться значат значение значением значении значения значенье значила значили значило значит значительная значительно значительное значительной значительном значительному значительную значительные значительный значительным значительных значить значишь значу знаю знают знающая знающему знающий знающими зная знобить зное зноем зной знойкову знойной знойною знойный зною зноя зов зова зовем зовет зовете зовешь зови зовом зову зовут зовущих зол зола золе золовка золой золота золотая золоте золотеет золотистые золотистый золотистым золотит золотится золотить золотника золото золотоглавый золотого золотое золотой золотокудрый золотом золотопромышленника золотопромышленность золотою золоту золотухой золотушный золотую золотые золотым золотыми золотых золу зонтик зонтика зонтиком зоргенфрею зори зоркая зорким зорко зоркого зорком зорче зорь зорю зосимов зосимова зосимову зосимовым зрак зрачки зреет зрели зрелище зрелого зрелой зрелости зрелость зрелостью зрелый зрелым зрение зрению зрения зренье зреют зримей зримый зрит зрителей зрители зритель зрителями зришь зря зуб зубам зубами зубах зубки зубов зубок зубоскалить зубчатой зубчатою зубчатый зубы зудят зурна зурны зыбкий зыбкой зыбком зыблется зятем и ибо ибсен ибсена иван ивана иване иванов иванове иванович ивановича ивановичу ивановна ивановне ивановной ивановну ивановны ивановой ивановскую иванову иваном иванонва ивану иваныч иваныча иванычем иванычу ига игла иглой иглу иглы игнатьевич игнатьевича игнатьевичем игнатьевна игнатьевны игнатьич игнатьича игнашка иго иголка иголочки игом игра играем играет играете играешь играешься играй играйте играл играла играли играло играль играми играть играх играю играют играючи играющем играющею играющих играя игре игривая игривее игривенькая игривого игривости игривостию игривую игривый игрой игрок игру игрушечку игрушки игры идеал идеала идеалам идеале идеален идеализм идеалист идеалов идеалом идеалу идеалы идеальная идеально идеальный идее идеей идеи идей идейка идейку идейных идем идемте идет идете идеть идешь идею идея иди идиллией идиллий идиот идиотка идиотством идите идола идолов идолом идолу идти иду идут идучи идущего идущей идущему идущие идущим идущими идущих идущую идя иезуитов иене иерей иероглифов иерусалим иерусалима ижицы из изб изба избавил избавились избавите избавить избавлен избавление избавленным избавлялись избавляя избавь избаловали избаловались избаловался избалован избалованы избах избе избегайте избегал избегала избегали избегать избегая избегнул избегнуть избежать изберет изберешь избил избитая избито избитой избитую избитых избрав избрал избрала избранной избранном избранные избранный избранных избранья избрать избу избудешь избушке избы избытка избытке избыток избыть изведав изведал изведала изведаны изведать изведешь извел изверг изверга извергнете извергнуть изверившись изверились известен известившись известие известием известий известия известиям известиями известка известке известковую известная известно известного известное известной известном известному известности известность известною известную известны известные известный известным известными известных известью известясь извечно извечной извещение извещены извив извиваясь извивом извилистой извинение извинения извини извинилась извинили извинился извините извинить извиниться извинюсь извиняешься извинялся извинят извиняться извиняюсь извиняются извиняя извиняясь извлек извлекал извлекли извлеку извлечь извне изводят извозчик извозчика извозчикам извозчике извозчики извозчиков извозчиком извозчику изволив изволил изволила изволили изволит изволите изволь извольте изволят изворачивался изворачиваться изворотиться изворотливости изворотов извращенном изгадил изгадили изгадит изгиба изгибе изгладилась изгладилось изгнанник изголовье изголовья изгонишь изгоняется изготовилась изготовить изготовлять издавать издавна издалека издали издало издание издании изданию издания изданьица издателей издатели издательской издательство издателям издать издевается издеваться изделия издерганы издержанного издержек издержка издержкам издержками издержке издержки издохнем издохнуть издрогнувший издрогшая издыхании издыхающей изжарил изжога иззяб излагаете излагал излагать излагающего излагая излер излечивать излив изливавшейся изливается изливали изливались излить излишек излишне излишней излишнею излишним излишнюю излияний излияния изловили изловчусь изложением изложения изложил изложили изложить изложу изломал изломала изломалась изломались изломан изломанной изломанными изломанных изломано изломаться излученье измайлов измалетства измельчал измен измена изменение изменению изменив изменившимся изменил изменилась изменили изменились изменилось изменился изменит изменится изменить измениться изменнику изменница изменю изменюсь изменяем изменяет изменяется изменяла изменят изменятся изменять изменяющий изменяя измерили измерит измерить измерял измеряла измеряли измеряя измокшее измокшем изморились измучена измученная измученное измученному измученный измучившееся измучившись измучил измучилась измучили измучились измучило измучился измучиться измучишь измучусь измята измятая измято измятую измятые измятый изнеженному изнеженные изнеженным изнемог изнемогает изнемогал изнемогала изнемогаю изнемогают изнемогая изнемогли изнеможение изнеможении изнеможения изнемочь износишь изношенная изношенного изношенный изношенными изнурения изнуренное изнутри изо изобличена изображавших изображайте изображал изображать изображают изображен изображена изображение изображу изобрази изобразилось изобразительнее изобразить изобразят изобретал изобретатель изобретений изобретенном изобьешь изовраться изодранные изодранными изойти изорвал изорваль изорванное изошла изощрены изощряется израсходовано изредка изрек изрекаете изрекал изрекала изтопталь изувера изуверство изукрашенною изумил изумились изумился изумительно изумительного изумительной изумительною изумительные изумительный изумится изумить изумлен изумление изумлением изумлении изумлению изумления изумленный изумленными изумленных изуродованный изустный изучавший изучал изучать изучая изучение изученно изучил изучила изучить изъездить изъявил изъявили изъявления изъявлял изъяснил изъяснить изъясняю изъятья изысканная изысканно изыскать изюминки изюму изяществом изящна изящная изящно изящного изящной изящную изящны изящные изящный изящным изящных иисус иисусу икнув иконе иконостас икону иконы ил или илиаду илистом иль илье ильей ильи ильин ильина ильине ильинична ильинская ильинские ильинским ильинскими ильинских ильинской ильинскую ильич ильича ильиче ильичом ильичу илью илья илюша илюше илюши илюшу им иматру имевшая имевшего имевшей имевшему имевшею имевшиеся имеем имеет имеете имеется имеешь имей имейте имел имела имели имелись имело имелось имен имена именами именем имени имение имением имении именин именинам именинах именинники именинница именинницей именины имению имения именно именьишко иметь имею имеют имеющего имеющему имеющий имеющих имея ими импровизирует имущества имущественные имущество имя иначе иная инда индеек индейка индейки индейку индейский индивидуализма индивидууму индийского индифферентизм индию инеем иней инициалы инкрустацией ино иногда иного иное иной инока иному иностранец иностранка иностранного иностранной иностранные иностранцам иностранцами иностранцем иностранцы иною инспектором инстанции инстанцию инстинкт инстинкта инстинктами инстинктивно инстинктивное инстинктом инстинкту институт института институте инструкцию инструкция инструмент инструмента инструментами инструменте инструментом интегральное интеллигента интеллигентных интеллигенцией интеллигенции интендантов интерес интереса интересах интересе интересен интересная интереснейшем интереснейшую интересно интересного интересное интересной интересном интересны интересные интересный интересным интересных интересов интересовал интересовала интересовало интересовался интересовать интересоваться интересом интересует интересуете интересуетесь интересуется интересуюсь интересуются интересуясь интересы интимном интимностей интонацией интонации интриги интриговать интригует ину иную инфлюэнца информацию иные иным иными иных иоаннова иоган ипохондрией ипохондрик ипохондрику ипохондрическое ипохондрию ирина ирине ириной ирину ирины ирис ирисы иронией иронически иронический иртыша исай исая искажал искажать искажая искажение искажением искажению искаженным искажены исказила исказили исказилось исказить искал искала искалеченной искалеченным искали искам исканий искания искательниц искать исключая исключением исключений исключения исключениями исключительно исключительное исключительном исключительный исковеркана исковерканную исковеркано искони искоренено искоренить искореняет искоса искр искра искрами искренен искренна искренне искреннего искреннее искренней искреннему искренние искренний искренним искренно искренности искренность искренностью искренняя искривившеюся искривившимися искривившимся искривила искривилось искривленном искривленною искривленный искривленным искривлено искрились искрой искрометное искры искупался искупит искупительный искупителя искупить искупление искусно искусным искусными искусств искусства искусствах искусстве искусственная искусственно искусственной искусство искусству искушенье испакостили испанец испанию испанке испанскую испарения испарениями испачкав испачкался испейте испекли испекут испепелить испепеляешь испепеляющие испечет исписанная исписанной исписанную исписаны испитое испитым испить исповеди исповедовал исповедовала исповедовать исповедывал исповедь исповедью исподлобья исподтишка исполинский исполнен исполнена исполнение исполнением исполнении исполнения исполненного исполненное исполненной исполненный исполненных исполнено исполнены исполнив исполнил исполнила исполнились исполнилось исполнимость исполнит исполните исполнитель исполнителя исполнится исполнить исполниться исполнишь исполню исполняет исполнял исполняла исполнять исполняю исполняют исполняя использован использование испортил испортилось испортите испортить испортишь испорчен испорченные испорченный испорчу испошлился исправился исправимая исправить исправишь исправлением исправления исправляет исправлять исправника исправнику исправно исправным исправных испрашивать испросив испуг испуга испугавшись испугается испугал испугалась испугали испугались испугался испугаль испуган испуганная испуганно испуганного испуганное испуганной испуганном испуганные испуганный испуганным испуганными испуганы испугать испугаться испугаюсь испуге испугом испугу испужаешь испужались испускала испуская испустил испытав испытавшей испытавший испытал испытала испытание испытаний испытания испытанные испытанный испытать испытаю испытующии испытующим испытывает испытывал испытывала испытывали испытывать испытываю исследования исследованы исследую иссосали иссохшее иссохшей иссохшие иссохшими иссохших исстрадаешься исстрадались исстрадался исступление исступлении исступлений исступления исступленно исступленном исступленному истаскался истасканное истасканные истасканных истаял истерзал истерзала истерзали истерзан истерзана истерзанной истерзанный истерика истерике истерически истерическим истерических истерического истеричны истечении истин истина истинам истине истинно истинное истинной истинном истинные истинный истинным истинных истиной истину истины истолкование истолкования истолкователя истома истоме истомилась исторг историей истории историк историческим исторического историю история источник источника источнике источники источников истощался истощена истощенный истощив истощилась истощился истощится истратила истратили истраченный истраченных истребил истребить истребленных истребленья истребляли истреблять истрепанная истрепанное истрепанном истрепанных истязал истязаний истязания исход исхода исходе исходить исходя исхудавшем исхудалом исцеленья исцеляющую исчахнет исчез исчезает исчезал исчезала исчезали исчезало исчезать исчезают исчезла исчезли исчезло исчезнет исчезнете исчезни исчезнут исчерпаешь исчерпаны исчерченных исчисление исчислил исчислить исшаркавшегося ит итак итал италии италию итальянски итальянские итальянский итог итога итоги итого иуда иудеев иудеи их ихнего ихнее ихней ихние ихнюю ихняя ишь ища ищем ищет ищете ищешь ищи ищите ищу ищут ищущего ищущие июль июля июне июнь июня й йодоформ к ка кабак кабака кабаке кабаки кабалу кабацкой кабачного кабачным кабинет кабинета кабинете кабинетов кабинетом кабинету каблук каблуками каблуки каблучки кабули кабы кавалер кавалера кавалерии кавалеров кавалеру кавалеры кавалькада каватину каватины кавказе кавказское кавычках кавычки кадил кадили кадило кадилом кадильный кадки кадочка кадочку каждая каждого каждое каждой каждом каждому каждою кажду каждую каждый каждым каждыми кажет кажетесь кажется кажешься кажись кажусь кажутся кажущуюся казавшеюся казавшийся казак казал казалась казались казалось казался казанскою казарм казармах казармой казармы казать казаться казачка казачку казачок казенная казенного казенное казенной казенную казенные казенщину казимир казне казней казненных казнены казни казнил казнила казнит казните казнить казну казны казнь казнью казнят казуистика казуистики казуистику казус кайма как какая какие каким какими каких како каков какова каково каковы каковые какого какое какой каком какому какою кактусами кактусов каку какую кал каламбуром каланча каланчой калачей калачом калашный калеб калебов калек калека календарем календарь календаря калеными калигула калинникова калитка калитке калитки калитку калиту калош калошах калоши камелии камелий камелиями каменистом каменистый каменная каменного каменное каменной каменным каменных каменщик камень каменья камер камердинер камердинером камешек камешкам камешки камином камне камней камнем камни камню камня камням камнях каморка каморке каморки каморку кампанил камушками камфарой камыш камыша камыши канав канавами канаве канавкой канаву канавы канал канала каналы каналью каналья канареек канарейка канарейками канарейках канарейки канат канве канву канвы кандалов канделябры канет канешь каникулы канкан канканную канул канула канцелярии канцелярист канцеляристов канцеляристы канцелярию канцелярские канцелярскими канцоной капали капель капельки капельку капернаумов капернаумова капернаумову капернаумовых капитал капитала капиталисты капиталов капиталом капиталу капиталы капитальная капитального капитальное капитальном капитальному капитальный капитан капитолии капитэн капкан капле каплей каплет капли каплю каплют капля каплям каплями капоте каприз каприза капризам капризах капризен капризная капризница капризничай капризничал капризничать капризно капризный капризным капризов капризу капризы капсюль капуста капустой капусту капусты кара карабкался карабкаясь караван карамзин карандаш карандашей карандаши карандашиком карандашом карать караул караулила караулить карашо карающего кардамонову карет карета каретах карете каретного каретное каретой карету кареты карий карих каркают карлик карловны карлосу карль карман кармана карманам карманах кармане карманном карманные карманный карману карманы кармен карменситы карнизов карп карпатах карпович карт карта картавя картам картами картах карте картежник картин картина картинами картине картинка картинки картинно картиной картинок картину картины картишки картограмме картограммой картограмму картограммы картой картонки картонку картофелем картофель картофелю картофеля карточек карточка карточку карту картуз картуза карты кары карьера карьере карьерой карьеру карьеры касавшемуся касавшийся касается касалась касалось касался касаются касающейся касающемся касающихся касаясь каске каскетку каски кастаньет кастелянши кастрюль кастрюлю кастрюлями катавшийся катается катай катастроф катастрофа катастрофе катастрофой катастрофы кататься кате катей катерина катерине катериной катериною катерину катерины катехизис кати катила катился катит каторгами каторге каторги каторгу каторжник каторжного каторжной каторжные каторжный каторжных катышек катю катя катят катятся каучука кафедру кафедры кафешантана кафтан кафтана кафтане кафтанов кафэ кацавеек кацавейка качавший качает качается качал качала качали качалка качался качанье качаться качающихся качая качели качелях качеств качества качествами качествах качестве качнув качнулся качнусь каша кашей кашель каши кашин кашлем кашля кашляет кашлял кашляла кашлянув кашлянул кашлять кашляю кашляя каштановые каюсь каюта каюте каюту каяться квадратный квадраты квартал квартала квартале квартального квартальной квартальному квартальные квартальный квартальными квартальных кварташками квартир квартира квартирам квартирами квартирах квартире квартиркам квартирку квартирная квартирных квартировать квартирой квартиру квартиры квас квасом квасу квасцов квасы квентин кверху келье келья кем кеплеровы керченские кивает кивал кивать кивая кивера кивнув кивнул кивнула кидает кидается кидал кидала кидать кидаются кидаясь киев киева киеве киевский кий кильватерной кинематографе кинжал кинжалы кину кинув кинул кинулась кинулись кинулся кинуто кинутого кинутые кинуть кинуться киот киотом киоты кипарисный кипарисы кипел кипела кипело кипение кипеть кипит кипсэком кипучая кипучими кипят кипятком кипятят кирбитьевна кирбитьевне кире кирка киркой киркою кирпич кирпича кирпичей кирпичик кирпичиков кирпичный кирпичных кирсановский кисеей кисейная кисейными киселе кисель киселя кислая кисло кислое киснет киссинген кистей кисти кисть кистью китайских китайского китайцами китах кихотов кичке кичливости кишат кишела кишели кишки клавикорды клавиш клавиши клад кладби кладбища кладбище кладбищенской кладбищу кладет кладете кладите кладку кладовой кладовую кладом кладу кладут кладя клал клала кланявшиеся кланяется кланялась кланялись кланялся кланяться кланяются кланяясь кларнет класса классах классе класть клеверными клеверных клевета клевете клеветник клеветой клевету клеветы клевещете клевещут клеенкой клеенчатый клейменые клеймила клеймили клеймы клеймят клекот клен кленовые клены клетка клетке клетки клетку клетушка клетушке клетчатых клешни клещах кликал кликами кликать клике клики кликни кликну кликнул кликнуть климат климата климате климатические клинком клинок клич кличет кличешь кличь клоак клоакам клоаки клобука клок клокотало клокочут клонил клонилась клонились клонило клонится клоня клонясь клонятся клопа клопов клопом клопшток клопы клочка клочками клочке клочки клочком клочок клуб клубами клубе клубный клубов клубок клубы клумбами клюках клюквы клюкою ключ ключа ключами ключей ключи ключик ключниц ключница ключом клюют кля кляла клялись клялся клянет клянется клянешь кляни клянусь кляня клясть клясться клятвам клятвы кляча клячи клячонка клячу кн книг книга книгам книгами книгах книге книги книгой книгопродавец книгопродавцы книгу книжек книжечку книжка книжками книжках книжке книжки книжкой книжку книжной книжные книжный книжонки кнопа кнопками кнопку кнут кнута кнутами кнутов кнутом кнуту кнуты княгини княгиня княжеская княжеский княжеском княжна княжны княжьего князей князь князька князья князю князя ко кобелева кобыле кобыленка кобыленке кобыленку кобылиц кобылица кобылятниковым ков коварнее коварно коварную коварные коварства коварство коварству кове ковер коверкала коврам коврами коврах ковре коврик ковров ковром ковру ковры ковчег ковчега ковчеге ковша ковы ковылем ковыли ковыль ковырять когда кого когса когти когтит когтями когтях кодексе кое коего коему кожаное кожаном кожаным коже кожи коз коза козе козел козеля козла козловых козлу козни козоедова козы козырев козыри козырьком козьего коим коих кой кокардой кокетку кокетливо кокетничает кокетничаете кокетничала кокетничать кокетничаю кокетства кокетстве кокетство кокетству кол кола колбасница колбасной колбасы колдовское колдовства колдун колдуна колдунья колдуют коле колебалась колебании колебания колеблется колеблющейся колеблющемся колебля колеблясь колее колеи колен колена коленами коленах колене коленей колени коленках коленке коленки коленкоровые коленку колено коленопреклоненным коленочках коленяк коленям коленями коленях колес колеса колесами колесах колеснице колесо колец колечище колечках колечко колечком колею колеями коли количестве количество количеством колко коллежский коллежским коллежского коллежской коллежскую коллекцию коллекция колода колодезь колодец колодник колодой колоду колок колокол колокола колоколов колокольни колокольный колокольным колокольня колокольцы колокольчик колокольчика колокольчики колокольчиков колокольчику колола кололись коломенских колонии колоннами колонной колонны колорит колорита колоритом колос колосья колотилины колотит колотится колотить колотиться колотушку колоть колоться колотятся колпак колпаке колпаки колпаком колпине колумба колхидой колчан колыбель колыбелью колымаге колымягин колыхалась колыхаться колыхаясь коль колька кольца кольце кольцо кольцом кольцы кольчугу колю колючие колючий колючих коля коляска коляске коляски коляской коляску колясочке колясочкой колясочку ком команда команде командир командиров командировку командиром командовал командор командора комары комбинаций комедии комедию комедия комет комета кометы комизм комик комиссии комиссию комиссия комическая комические комической комическую комкая комком комку коммерческий коммиссаржевской коммун коммуна коммуне коммуну коммуны комнат комната комнатам комнатах комнате комнатка комнатке комнатки комнатку комнатном комнату комнаты комод комода комоде комоду комок компанейские компанией компании компанию компания компаньоном компаньоны комплекции комплимент комплимента комплиментов комплименты компромиссов кому комфорт комфорта комфорте комфортно конверт конвоем конвойного конвойный конвульсии конго конгрессов кондитера кондитерской кондрашки кондуктор коне коней конец конечно конечные конник конница консервативное консервативные консерватории консилиум конская конский конских конспиративным констан константин константина константином константинополь константину конституцией конституций консультации контор контора конторах конторе конторка конторскую контору конторщик конторщика конторы контрабас контракт контракта контракте контрактом контракту контракты контральтом контрдансов контролем контру конур конуре конуру конуры конус конфекты конфетами конфузился конфузится конфузишься конфузливо конфузливы конфузьтесь конфузятся конца концам концах конце концентрировалась концентрических концепция концерт концертах концерте концов концом концу концы кончается кончалась кончались кончалось кончался кончатся кончать кончаю кончаются кончен кончена кончено кончив кончивших кончик кончике кончики кончиком кончил кончила кончилась кончили кончились кончилось кончился кончим кончит кончится кончить кончиться кончишь кончу конь коньке коньяк коньяком коньяку коньячком коню конюшен конюшенную конюшне конюшни конюшню конюшня коня копаю копают копеек копеечки копейка копейками копейке копейки копейкой копейку копен коперник коперника копи копии копит копить копиться копию копна копоть копошились копыт копыта копытами копытом корабли корабль кораблю корабля кораллов кордегардии корделией корделии корделия коренной коренные коренным коренными коренных кореньями корешки корзине корзинку корзиной корзиночкой корзину корзины коридор коридора коридоре коридоров коридором коридору корит корица корицей корицу корицы коричневого коричневое коричневой коричневыми корка корки кормами корми кормил кормилец кормили кормилица кормит кормить кормишь кормление кормят корней корнелия корнем корнета корнету корни корню коробит коробить коробиться коробка коробке коробки коробку коробочками коробочке коробочки коробясь корова коровам корове коровник корову коровы коровье коровьими корок королева королевой король короля короне корону короны коротаю коротенькие коротенький коротеньким коротенькой коротеньком коротки короткий короткими коротких коротко короткого короткое коротком короче корпию корпус корпуса корпусом корреджиевой корректна корректуру корреспонденции корреспонденция корточках корчаги корчит коршун коршуну коры корыстолюбивых корысть корыта корытами корыте корыто корь кос коса косами косвенно косенький косится косичку космос коснел косности косноязычен косноязычная косноязычное косноязычные коснувшись коснулись коснулось коснулся коснуться косо косоворотке косоворотку косой косоплеткой косою косте костей костер кости костлявой косточкам косточкой косточку костра кострами костров кость костюм костюма костюмах костюме костюмов костюмом костюму костюшку костя костяной костяшек косы косым косынка косынкой косынку косынок косыночка косыночки косыночку косяком косясь косят косятся кот кота котенка котенком котеночка котлет котлетам котлетки котлеты котомку которая которого которое которой котором которому которою которую которые который которым которыми которых коттедж коттень коты кофе кофеем кофеи кофей кофейник кофейника кофейником кофейнику кофейной кофею кофеями кофий кофточка кофточки кофточку кох коха кохане кохом кочевые коченеют кочергой кочкам кочки кочующих кошачьи кошек кошелек кошелька кошельке кошемар кошка кошки кошкой кошмар кощунственных кра краб краденое крадеными крадешь краев краем краешком кража краи край крайне крайнее крайней крайний крайних крайностей крайности крайность крайностях крайнюю крако крал кран крапиве крапивой крапиву крапинками краса красавец красавиц красавица красавицам красавице красавицей красавицы красавцем красе красен красив красива красивая красивее красивенькая красиво красивое красивой красивом красивую красивы красивые красивый красивыми красивых красил красили красильщик красильщика красильщике красильщики красильщиков красит красить краска красками красках краске краски краской краскою краску красная краснеет краснеешь краснел краснела красненькие красненький красненькой красненькую краснеть краснею краснеющим краснея красно красноватое красноваты красноватые красноватый красноватыми красного красное красной красном красному красноречиво красноречивое красноречивые красноречие краснощекая красною красную красны красные красный красным красными красных красовался красой красок красот красота красоте красотой красотою красоту красоты красочны красть красу красы кратер краткие кратко кратковременного краткости краткую кратчайшим крахмалит крашеные крашеный краю края краям краях кредит кредитивные кредитками кредитке кредитки кредитку кредитный кредитными кредитовать кредиток кредиторшей кредиту кремля кремнист кренделей крендельков кренделями крепилась крепили крепился крепитесь крепится крепка крепкие крепкий крепким крепко крепкое крепкой крепкую крепнет крепок крепости крепостник крепостники крепостного крепостном крепостные крепостным крепостных крепость крепостью крепчал крепче крепь крепясь кресел кресла креслами креслах кресле кресло креслом крест креста крестами кресте крестик крестил крестила крестилась крестили крестился крестин крестины крестит крестится крестить крестненький крестную крестным крестов крестовском крестовые крестом кресту кресты крестьян крестьянам крестьянами крестьяне крестьянин крестьянина крестьянка крестьянская крестьянских крестьянской крестясь крещены крещенье крещенью кривая кривизну кривизны кривила кривились криво кривой кривом кривы кривыми кризис крик крика криками криках крике крики крикливо крикнет крикнешь крикнул крикнула крикнуть криков криком крику кринолине кристаль кристально критик критики критиков критиковать крича кричал кричала кричали кричаль кричат кричать кричащем кричи кричит кричите кричишь кричу кров кровавая кровавиться кровавого кровавой кровавый кровавых кровати кроватка кроватке кровать кроватью кроватями кровель крови кровию кровлей кровлю кровля кровлями кровная кровное кровные кровный кровом кровообращение кровопроливцы кровопролитий кровотечение кровь кровью кроется кроила кроить крокета крокодилов кроликов кроме кроне крот кроткая кроткие кроткий кротким кроткими кротких кротко кроткого кроткое кроток кротости кротостию кротость кротостью крошат крошеные крошечная крошечное крошечную крошечные крошить крошка крошки кру круазе круг круга кругам кругами кругах круге круги кругла круглая кругленькие круглого круглое круглой круглолицая круглом круглую круглые круглый круглым круглыми круглых кругов круговорот круговорота кругом кругу кружатся кружащаяся кружащийся кружев кружева кружевами кружевах кружевною кружевных кружево кружевцом круженье круженьем круженья кружи кружилась кружились кружит кружится кружить кружиться кружках кружке кружки кружку кружок крупно крупного крупной крупные крупным крупными крупных крупой крупы крутая крутившимся крутизну крутились крутился круто крутого крутой круторогий крутым крутыми крутясь кручами круче кручи кручину круша крушить крыл крыла крылами крылатое крылатый крылах крылий крылись крыло крылова крылом крыльев крыльем крыльца крыльце крыльцо крыльцом крыльцу крылья крыльями крыльях крым крыма крыс крысах крысиную крысой крысы крытая крытой крытые крыш крыше крышей крыши крышку крышу крюк крюков крюком крюку крючка крючках крючке крючком крючкотворец крючок крючочки крякнет крякнул кряхтела кряхтит кряхтите кряхтя кстати ксюша кто кувшинами кувыркнулся кувырком куда кудахтает кудахтал кудахтанье кудахтаньем кудахтающие кудель кудрей кудри кудрино кудрявой кудрями куды кузины кузнец кузнеца кузнецов кузнецу кузнечики кузница кузовковы кузьку кузьминична куклы куколку кукушка кулак кулаками кулаке кулаки кулаком кулачонками кулебяк кулебяка кулебяки куликовом куликовым кулинарной кулиса кулыгин кулыгина кулыгину кулыгиным кулькова кульком культурные кум кума кумачах куме кумир кумирню кумиром кумиры кумой кумом куму кумы купала купалась купались купальне купальню купальня купания купаньем купаться купец купеческие купеческих купи купидонов купил купила купит купить купишь куплен куплена купленное купленную купленные купленными куплено куплю купол куполом купоны купца купцам купцах купцов купцу купцы купчик купчиха кур куражился курами куранты куреньями курил курилась курился куриная куриную курит курите курить курица курице курицей курицу курносое курносые курок куролесил куропатки курс курса курсистка курске куртины куртке куртки куртку курточки курточку курчавою курчавый куры курьез курьезу курьер курьера курьерский курьеру курю куря курятникам курятники курящейся курящих кусали кусая куска кускам куски куском кусок кусочек кусочка кусочкам кусочками кусочки куст кустам кустами кустарник кустах кусте кустика кустов кустом кусты кутает кутались кутаясь кутит кутьи кутью кутья кухарка кухарками кухарках кухарке кухарки кухаркой кухарку кухарок кухмистерской кухне кухней кухни кухню кухня кухням кухонной кухонный куцой куча куче кучей кучер кучера кучерам кучерами кучере кучером кучеру кучи кучка кучками кучки кучку кучу кушает кушай кушак кушаком кушал кушала кушало кушанье кушанья кушать кушаю кушают кушетке кушетку куши куще кующей кхе кхи л ла лабаз лабазника лабазнике лабазнику лабиринт лавиза лавизе лавизу лавизы лавину лавкам лавках лавке лавки лавкой лавку лавочек лавочка лавочкам лавочках лавочке лавочки лавочку лавочник лавочники лавочнику лавочные лавр лаврентий лаврентия лавром лавры лагере лагерь лагуне лагуны лад ладан ладах ладил ладить ладно ладони ладонь ладонью ладонями лады ладью лае лаем лает лаетесь лается лазаревич лазарь лазаря лазил лазить лазури лазурной лазурь лай лайдак лайтесь лакедемонов лакее лакеев лакеи лакей лакейским лакейской лакейскому лакейскую лакейщину лакею лакея лакеям лакированные лакомств лампа лампада лампадки лампадой лампе лампой лампу лампы лангвагеном лангвагену ландышах ландышей ландыши ланитам ланиты лапах лаптев лаптей лапу лапши лапшу ларец ларцов лас ласк ласка ласкает ласкал ласкала ласкалась ласками ласкательный ласках ласкаю ласкают ласкаются ласкающая ласкающем ласкающую лаская ласкаясь ласке ласки ласков ласковая ласковее ласково ласкового ласковость ласковы ласковые ласковый ласковым ласковых лаской ласку ласточка ласточкино лат латах латник латыни латынскую латынью латыши лафитом лачуг лая лаяла лаять лба лбов лбом лбу лгал лгала лгали лгать лги лгу лгунишка лгунью лгут лгущие лебедей лебеди лебедь лебезил лебезятников лебезятникова лебезятникове лебезятникову лебезятниковым левая левого левой левом левою левую левый левым лег легавая легион легиона легистраторша легка легкая легкие легкий легким легкими легких легко легковерная легковерные легкого легкое легкой легком легкомыслен легкомысленнее легкомысленно легкомысленного легкомысленное легкомысленной легкомысленных легкомыслие легкомыслием легкомыслии легкомыслия легкоперой легкою легкую легла легли легло легок легонько легче лед леденец леденея леденит леденцами леденцах леденящий леди ледком ледоход ледяная лежа лежавшего лежавшее лежавшие лежавший лежавшим лежавшими лежавших лежавшую лежал лежала лежали лежало лежанке лежанки лежанку лежанок лежанье лежаньем лежанья лежат лежать лежащего лежащий лежащими лежащую лежи лежит лежите лежишь лежу лез лезвие лезвием лезвию лезгинку лезет лезешь лезть лезут лейтмотив лейтмотивом лекарем лекарств лекарства лекарствами лекарствах лекарственных лекарство лексиконе лектор лекции лекций лекцию лелеемый лелеет лелей лелеял лена лене лени ленив ленивая ленивее ленивец лениво ленивого ленивое ленивой ленивом ленивою ленивы ленивые ленивый ленивых ленился ленора лености леночка лентами лентах ленточек ленточке лентою ленты лентяй лентяя лень ленью леню леня леонардо леонид леонида лепестки лепет лепетал лепетала лепетом лепечет лепилась лепились лепно лепят лепятся лермонтов лермонтова лес леса лесами лесах лесе лесничестве лесничество лесничий лесной лесные лесных лесов лесок лесом лести лестию лестниц лестница лестницами лестнице лестницей лестницу лестницы лестничестве лестно лестного лестным лестных лесть лестью лесу лет лета летает летаешь летай летал летала летало летам летами летать летах летают летевшую летел летела летели лететь лети летим летит летите летишь летнего летнее летней летнем летнему летние летний летних летнюю летняя лето летом летописей лету летун летучая летучей летучим летучих летя летят лечат лечебницу лечению лечения лечи лечил лечилась лечит лечить лечиться лечишься лечь лешие леший лешим лжет лжете лжешь лжи лжива лживой лживою ли либерал либерализма либералы либеральной либеральный либеральным либо ливень ливингстона ливрее ливреи ливрейном ливрея лида лидией лидинька лидию лидия лидочка лидочке лидочкой лидочку лизавета лизавете лизаветин лизаветина лизаветой лизавету лизаветы лизнет лик лике ликов ликованье ликом ликует ликург ликургами лил лилась лилии лилий лились лиллас лилово лиловое лиловые лиловыми лился лимон лимонничаешь лимоном линейке линейки линейку линией линии линий линию линия линор линялая линять лион липкий липким липкой липли липовая липового липовые липовым липовых липпевехзель липпи липу липы лира лирическая лисица лиссабонского лиссабонское лист листа листами листе листиках листиков листка листках листке листки листов листок листом листочки листу листы листьев листья листьях лись литавр литейной литейном литератор литераторов литератором литераторы литература литературе литературно литературном литературную литературный литературой литературу литературы лития лить лифчик лихо лихом лихоманка лихорадка лихорадке лихорадки лихорадку лихорадочная лихорадочно лихорадочного лихорадочное лихорадочной лихорадочность лихорадочную лихорадочный лихорадочным лихорадочных лиц лица лицами лицах лице лицезреть лицемерный лицо лицом лицу личек личика личике личико личиком личиной личная лично личного личной личном личному личности личность личностью личную личные личный личным личных лишает лишали лишений лишениями лишенное лишенной лишенною лишенные лишено лишившийся лишил лишились лишился лишить лишком лишнего лишнее лишней лишние лишний лишним лишних лишняя лишу лишь ло лоб лобзаний лобызает ловелас лови ловил ловит ловить ловкая ловкий ловко ловкость ловкую ловлю ловушка ловушкой ловушку ловя ловят лога логика логики логикой логиныч логическая логически логическим логической логично логичны лодка лодке лодки лодку лодочник лодочника лодочнику ложа ложась ложатся ложе ложек ложем ложементов ложечка ложечки ложечкой ложечку ложи ложилась ложились ложилось ложился ложись ложитесь ложится ложиться ложка ложки ложку ложного ложное ложные ложу ложь ложью лоз лозунг лозунгом локон локончиками локончики локоть локтей локтем локти локтя локтями локтях лом ломает ломаешь ломайтесь ломал ломала ломаль ломаной ломать ломаться ломаю ломают ломаются ломая ломбард ломберном ломберный ломберными ломились ломится ломка ломки ломовою ломовых ломом ломоть ломтем ломтю ломят лона лондон лондона лондоне лоне лоно лопастей лопасти лопатками лопатой лопахин лопахина лопахину лопахиным лопнет лопнувшей лопнула лопнуло лорд лордом лорнет лорнеткой лорнетку лорнировала лоси лоск лоскутка лоскутки лоскутков лоскутное лоскуток лоскуты лоскутья лоснилась лоснятся лоснящиеся лососина лососиной лота лотереей лотереи лотерею лотках лото лохматый лохмотник лохмотников лохмотьев лохмотья лохмотьям лохмотьях лошадей лошаденка лошаденке лошаденку лошади лошадиное лошадке лошадки лошадку лошадь лошадьми лошадью лошадям лошадях луг луга лугов лугом лугу лужа луже лужи лужин лужина лужине лужинская лужину лужиным лужица лужице лужу луиза лук лука лукав лукавая лукавил лукавит лукавить лукавлю лукаво лукавого лукавое лукавой лукавою лукавства лукавство лукавством лукавые лукавый лукавым лукавыми лукавых лукавь луковицу луком лукоморья луку луна лунатизм луне лунная лунного лунной лунном лунному лунные лунный лунным луной луну луны лупить луч луча лучам лучами лучах лучезарнее лучей лучи лучинку лучину лучины лучистые лучистыми лучом лучшая лучше лучшего лучшее лучшей лучшему лучшие лучший лучшим лучших лучшую лущить лчать лый лысая лысина лысине лысиной лысину лысый лысым ль львиного львиное львице львицей львов львович львовна льву львы льда льдины льдом льет льется льзя льнет льном льстецам льстиво льстивое льстивый льстил льшая льшим льших льшую льщу льюиса льют лю люба любви любезен любезна любезнее любезнейшая любезнейший любезно любезной любезностей любезность любезностью любезностями любезны любезные любезный любезным любезных любека люби любивший любил любила любили любило любим любимая любимец любимице любимо любимого любимое любимой любимую любимца любимцу любимые любимый любимым любимых любит любите любитель любителя любить любишь люблю любо любовалась любовались любовался любоваться любови любовник любовника любовники любовником любовница любовницей любовницы любовно любовные любовных любовь любовью любого любое любом любопытен любопытная любопытнейшими любопытно любопытного любопытной любопытную любопытные любопытный любопытным любопытными любопытных любопытства любопытство любопытством любопытствую любуется любуйся любуясь любя любят любящего любящее любящей любящие любящим любящимися любящихся люд люда людвиговна людвиговной людвиговну людей люди людовике людом людская людски людские людских людской людскому людскую людьми людям людях люли люстра лютни лютой лютые люциферова ля ляг лягаев лягается лягаться лягнул лягу лягут лягушек лядащая ляжет ляжете ляжешь лязбиля лязг лязгают ляп м ма мавзолей мага магазин магазина магазинам магазине магазинов магазины магарыч магдалина магистром магического магнатские магнетизер магомет магометами магометом мадам мадемуазель мадера мадеры мадонн мадонне мадонну мадонны мае маевские мается маешься мажет мажордом мажордомы мажу мазали мази мазилкой мазнул мазурика мазурка мазурке мазурки мазурку мазью май майков майор майора майором майская майские майскими майских мак макароны маклашиных маклера маковой маковый маку макушка макушке мал мала маланья малейшая малейшего малейшей малейшем малейшему малейший малейших малейшую маленькая маленькие маленький маленьким маленькими маленьких маленько маленького маленькое маленькой маленьком маленькому маленькою маленькую малина малинки малиновая малинового малиной малину малины малицкое мало маловажное маловажной малого малограмотный малодейственных малодушие малодушии малодушия малодушного малодушной малое малой малолетним малолетними малолетными малом малому малоподвижная малостоящее малость малую малые малый малым малыми малых мальски мальчик мальчика мальчике мальчики мальчиком мальчику мальчишек мальчишески мальчишескою мальчишечка мальчишка мальчишкам мальчишками мальчишке мальчишки мальчишков мальчишкой мальчишку мальчонком мальчугана малютки малюток малявочка малявочкой маляре маляры мама мамай мамасю мамася мамаша мамаше мамашей мамаши мамашину мамашу маме маменька маменьки маменькой маменьку мамками мамочка маму мамы ман манго мандолина мандолине манер манера манерами манерах манерой манером манеру манеры манжет манжетах мани манил манила манило манир манит манифест манишка манишки манишку манкировал манкировали манкировать манкируете мансардах мансарде мантилий мантильи мантильке мантильку мантилью манто манфредами манящие маргарита марево марии марина марине мариновали марину марины марионетками марию мария марк маркизы мармелад мармеладов мармеладова мармеладову мармеладовым мармеладовых марсово март марта марте марфа марфе марфой марфу марфы марш марширующих марье марьей марьи марью марья марьяж маска маскарад маскарада маски маской маску масла масленица масленице масленицы маслеными масло маслом маслу масляною масок масса массивная массивный массивных массимо массис массою массу массы мастей мастер мастерица мастеровому мастеровым мастеровых мастером мастерская мастерски мастерских мастерское мастерской мастеру масштабе матвевна матвеевич матвеевича матвеевичем матвеевичу матвеевна матвеевне матвеевной матвеевну матвеевны матвеем матвеич матвеича матвеичем матвей математики математику математически математический математических математической математическую матерей матери материал материала материалов материалом материалу материалы материальной материальную материальные материальный материальных материею материи материй материнским материнское материю материя матерчатое матерь матерью матерьялистских матовому матовый матовых матом матрена матрос матросы матушка матушки матушку матчиш мать махал махать махая махнешь махнув махнул махнула махов махочкие махочкую маху мачеха мачехе мачехи мачехой мачтам мачтах маша маше машей машенька машеньки машенькой машет маши машин машина машинально машине машинистом машинку машину машины машка машу мая маяк маяки маятник маятника маятником маяться маячишь мгла мгле мглист мглистой мглистом мглистый мглой мглою мглу мглы мгновение мгновением мгновений мгновения мгновениями мгновенно мгновенного мгновенное мгновенной мгновенный мгновенным мгновенье ме мебели мебель мебельном мебельщику меблированная меблированные меда медали медаль медальон медальоне медалях медведе медведей медведенка медведенко медведенком медведенку медведь медведя медвежью меди медикаменты медики медиком медицине медицинский медицинское медицину медицины медленная медленнее медленно медленного медленное медленною медленный медленным медленных медли медлил медлила медлит медлительно медлительностью медлю медля медно медного медной медном медную медные медный медными медных медовые медовых меду меды медь медью меж между межу мездровых мейербер мел меланхолик меланхолики меланхолически меланхолический меланхолической меланхоличный меланхолия меленьким мелет мелете мелеют мели мелка мелкая мелкие мелкий мелким мелкими мелких мелко мелкого мелкое мелкой мелкоозабоченные мелкопоместный мелкою мелодией мелодраме мелом мелочам мелочах мелочей мелочи мелочной мелочною мелочную мелочных мелочь мелочью мелькавшие мелькавшим мелькавших мелькает мелькал мелькала мелькалась мелькали мелькало мелькают мелькающем мелькающие мелькнет мелькнувшей мелькнувшем мелькнувший мелькнул мелькнула мелькнуло мелькнуть мельком мельник мельниц мельнице мельницей мельницы мельничного мельчайшей мельчайшие мельчайших мельчал мелю мена мене менее ментоны меньшая меньше меньшее меньшей меньшие меньшинство меньшого меньшой меньшую меня меняет меняетесь меняется меняешь менял менялась менялись менялы менять меняясь мер мера мерам мере мерещилась мерещились мерещилось мерещился мерещится мерещиться мерещутся мерзавец мерзавцы мерзкий мерзко мерзкой мерзкую мерзлого мерзнуть мерзок мерзости мерзость мерке мерки меркнет мерку мерлехлюндии мерно мерное мерные мерный мерным мерой мертв мертва мертвая мертвее мертвец мертвеца мертвецам мертвецами мертвецах мертвецу мертвецы мертвечинкой мертво мертвого мертвое мертвой мертвому мертвую мертвые мертвый мертвым мертвых мертвящей меру мерцает мерцание мерцанье мерцаньем мерцающий мерцающими меры меря меряясь мессианическую мессины мессия мессы мест места местам местами местах месте местечка местечках мести местности местность место местов местом местоположение месту месть местью месяц месяца месяце месяцев месяцем месяцу месяцы метал металась металл металлическая металлической метался метаний метать метаться метафизике метели метелицей метель метельную метелью метеоры метешь мети метит меткое метлинскому метлой метнула метнулись метнулся метод метода методически метр мету мех меха механизм механики механически мехмет мехов меховая меховою меховую мехом меч мечами мече мечет мечетесь мечи мечом мечта мечтавшую мечтает мечтается мечтаешь мечтал мечтала мечтали мечталось мечтам мечтами мечтаний мечтания мечтаниями мечтаниях мечтанье мечтанья мечтатель мечтательница мечтательно мечтательностию мечтательность мечтательных мечтателям мечтать мечтах мечтаю мечте мечтой мечту мечты мечусь мечут мечутся мешавшее мешавшийся мешавших мешаем мешает мешаете мешается мешай мешайте мешал мешала мешалась мешали мешались мешало мешать мешаться мешаю мешают мешаются мешающего мешающую мешая мешаясь мешкать мешкая мешке мешки мешок мещан мещане мещанин мещанина мещанинишка мещанинишкой мещанинишку мещанином мещанка мещанок мещаночка мещански мещанской ми миг мига мигавшие мигает мигал мигали мигать мигают мигающего мигая миги мигнет мигни мигнувший мигнул мигнуть мигом мигрень мизерном мизерные мизерных мизинца микельанджело миколай миколаю миколая миколка миколке миколки миколкину миколкой миколку микробами микроскопические микроскопических микстуру мил мила милашка милая милее милейшая милейший миленькая миленькие миленький милитриса милитрисе миллиарды миллион миллиона миллионами миллионер миллионером миллионная миллионов миллиону миллионы мило миловать милого милое милой милосерд милосердии милосердию милосердия милосердному милосердные милосердный милосердным милосердые милосердый милости милостив милостивая милостивее милостивое милостивый милостисдарь милостию милостыни милостыню милость милочка милую милы милые милый милым мильон милюков милютины миме мимики мимо мимолетное мимолетной мимолетных мимоходом миндаль миндалю миндаля минералами минет министерства министерстве министр министра министрам министром министру миновав миновал миновала миновалась миновались миновало миновалось миновать миной мину минувшего минувшее минувшем минувших минует минул минусом минут минута минутами минутах минуте минутки минутку минутно минутного минутное минутны минутный минутой минуток минуточку минутою минуту минуты минуют минуя мины миньятюрных мир мира мираж миражами миражи мире мирен мирилась миримся мирит мирится мирить мириться мирно мирного мирное мирной мирный мирным мирных миров мировая мирового мировое мировой мировою мировую мировых миролюбивого миром миртов мирты миру миры мирянам мирятся мисс мистик мистифицировать мистически мистическое мистическом митреем митрей митрея митрофаниевском митька митьки митькой митьку михаил михаила михайлов михайлович михайловна михайловну михайловны михайловским михайлыч михайлычем михеем михей михею михея мицкевича мичману миша мише мишель мишу мишуры младая младенец младенца младенцев младенцем младенцу младенческий младенческой младость младшая младшей младшему младший младшую младые млет мне мнение мнением мнении мнений мнению мнения мнет мнителен мнительно мнительность мнительный многие многим многими многих много многого многодумный многое многознаменателен многозначительны многоколонный многократно многократное многолюдный многолюдству многом многому многообещавшее многообещающих многообразным многопенный многоразличны многоразличных многоречиво многословные многостороннее многотрудной многотрудный многоуважаемая многоуважаемой многоуважаемою многоуважаемый многоцветными многочисленнее многочисленнейшее многочисленной многочисленные многочисленными многочисленных многоэтажный многоярусный множественном множество множеством множит мнози мной мною мня мо мов мовщина мог могил могила могиле могилка могилке могилки могилкой могилой могилою могилу могилы могильная могильной могильный могильными могильщик могла могли могло могу могут могуче могучей могучие могучий могучих могущее могущество могущие мода моде модели моделью модистки модная модничаньем модно модное модной модный модным мое моего моей моем моему моет моется моею може можедом можем может можете можешь можно мозаи мозг мозга мозги мозгу мозоль мои моим моими моисеич моих мой мокка мокрая мокро мокрое мокрой мокрота мокроту мокрую мокры мокрые мокрый мокрым мокрыми мокрых мол молва молвив молвил молвит молвой молву молвы молебен моли молил молила молилась молились молилось молился молимся молись молитв молитва молитвах молитвенное молитвенный молитвой молитву молитвы молитесь молить молиться молишься молнией молниеносного молниеносных молнии молний молния молньей молньи молод молода молодая молодеет молодежи молодежь молоденькая молоденькое молоденькой молодец молодо молодого молодое молодой молодом молодому молодости молодость молодою молодую молодца молодцевато молодцы молоды молодые молодым молодыми молодых моложавее моложавом моложе молока молоко молоком молокосос молокососа молокососы молола молот молотком молоть молочнице молочный молочным молочных молча молчавшего молчал молчала молчали молчалива молчаливая молчаливее молчаливо молчаливость молчаливую молчаливые молчаливый молчаливым молчание молчанием молчании молчанию молчания молчановка молчат молчать молчи молчим молчит молчите молчишь молчу моль мольба мольбами мольбе мольбой молюсь моля молясь молящий момент момента моменты монастыре монастырем монастырский монастырь монах монахам монахи монахов монаху монашку монблан монета монетку монетой монетчиков монеты монисто монологам монологах монологи мономана мономании мономанию мономания мономанов мономаны монополию монотонно монотонной монотонные монтировало монумент монументально монументальных мопассан мораль морально моргающими морген морде морду морды море морем моркови морковь морковью моровой мороженое мороженым мороз мороза морозами морозе морозное морозном морозный морозным морозцы морозы морочил морочите морская морские морским морских морского морское морской морском морскую морфием морфий морщась морщатся морщила морщились морщился морщин морщинами морщинах морщинистое морщинки морщиной морщины морщитесь морщится морю моря мосеич москва москве москви москвой москву москвы московские московским московских московского московской московском московскою мост моста мостах мосте мостик мосткам мостки мостов мостовая мостовой мостовую мостовых мостом мосту мосты мосье мотать мотаю мотают мотив мотор мотька мох мохнатое мохнатой мохнатую моцарта моцион моциона мочальное моченье мочи мочили мочками мочь мошенник мошенника мошенники мошенников мошенническая мошенническим мошенничеств мощные мою моя мрак мрака мраке мраков мраком мрамора мраморной мраморном мрачен мрачились мрачить мрачна мрачная мрачнее мрачней мрачно мрачного мрачное мрачной мрачном мрачною мрачную мрачные мрачный мрачным мрачных мстил мстителен мститель мстительной мстить мстят мсье му мудр мудрая мудрено мудреной мудреному мудреные мудреным мудреными мудрецов мудрецы мудрил мудрить мудро мудрое мудрость мудрствуйте мудрые муж мужа муже мужей мужем мужества мужественно мужественного мужественное мужественности мужественность мужественный мужественным мужеством мужик мужика мужикам мужиками мужиках мужики мужиков мужиком мужику мужичка мужичком мужичок мужичья мужнего мужнина мужниного мужниной мужские мужского мужской мужскому мужу мужчин мужчина мужчине мужчиной мужчины мужья мужьям мужьями муза музе музее музы музыка музыкальной музыкальном музыкальный музыкант музыкантам музыкантов музыканты музыке музыки музыкой музыку мук мука муками муке муки мукой муку мундир мундире мундиров мундштук муравей муравейником муравьи муравьиною мурашиной мурашки мурлыкающем мурлычу мурлычут муромца мус мускул мускулистых мускулов мускулы мускульная мускульной мускульном муссинским муссинских мусье мутилось мутит мутится мутить мутна мутно мутны мутные мутный мутным муть мутят мух муха мухи мухояров мухоярова муху мучает мучаете мучаетесь мучается мучаешь мучаешься мучай мучайся мучайте мучась мучат мучаю мучаюсь мучают мучая мучаясь мучение мучением мучений мученичество мучения мучениях мученье мученья мученьях мучил мучила мучилась мучили мучились мучило мучился мучимый мучит мучителей мучители мучительная мучительнее мучительнейшее мучительницы мучительно мучительного мучительное мучительной мучительном мучительною мучительную мучительны мучительные мучительный мучительным мучительных мучителя мучитесь мучится мучить мучиться мучишь мучишься мучнистой мучной мучть мучусь мучь мучьте мучьтесь мхов мчалась мчались мчатся мчаться мчит мчится мщение мщения мы мыкается мыкать мыкаться мыкаю мыла мыли мыло мылом мыльный мыслей мысленно мысленный мысли мыслил мыслит мыслителей мыслитель мыслительная мыслительной мыслителя мыслителям мыслить мыслию мысль мыслью мыслю мыслям мыслями мыслят мыслях мыслящие мыслящим мыты мыть мытье мытья мычит мышами мышей мыши мышкой мышку мышления мышонка мышц мышь мышью мэри мюнхенский мягка мягкая мягки мягкие мягкий мягким мягкими мягких мягко мягкого мягкое мягкой мягком мягкости мягкость мягкостью мягок мягче мяса мясистым мясник мясника мясники мяснику мясное мясной мясо мясоед мятеж мятежи мятежно мятежность мятежный мятежным мятежными мятежных мятой мяты мячик н на набавил набалдашником набат набата набегал набегала набегами набегают набегающих набежал набежали набекрень набережной набережную набережные набережных наберется наберу наберут набивает набивать набирается набиралось набит набита набитая набито набитую набитый наблюдавший наблюдает наблюдай наблюдайте наблюдал наблюдала наблюдали наблюдатель наблюдательный наблюдателя наблюдать наблюдают наблюдаются наблюдая наблюдение наблюдению наблюдения наблюдениям наблюдениями набожных набок наболевшие наболело наболит наборным набравшуюся набрал набрался набрасывала набрасывалась набрать набраться набрел набросана набросать набросаю набросившись набросил набросилась набросился набросить наброситься наброску наваждения наваленные навалено навалил навалили наварю наведаться наведаюсь наведены наведывайтесь наведывалась наведывался навек навеки навел навела навели навело наверно наверное навернулись навертел наверх наверху наверчено навесами навеселе навесов навесом навести навестил навестить наветам навешаны навещает навеянной навзничь навзрыд навин нависали нависла навлечь наводил наводила наводило наводит наводите наводить наводнение наводнениями наводя навозе наволочками наволочки навострив навострил навострила навострились наврал наврала навсегда навстречу навыворот навык навыкате навязал навязала навязался навязчивая навязывал навязывались навязывать навязываться наг нагибается нагибалась нагибаться нагибаясь нагло наглого наглости наглость наглостью наглухо наглые наглый наглым наглядел нагляделась наглядно нагнал нагнется нагнись нагнув нагнувшись нагнул нагнула нагнулся нагнусь нагнуться наговариваешь наговорил наговорила наговорить наговориться нагой нагонит нагорном наготове наготой наград награда наградишь наградой награду награды награждать награждая награждения награждено нагреется нагроможденной нагрубила нагрубит нагрудник нагружать нагрязнили нагрянет нагрянул над надавал надает надарит надбавляет надвигается надвинулись надвое надворного надворному надворный надворным надвяжем надвяжу надвязать надев надевает надеваешь надевала надевали надевать надевают надевая надеемся надеетесь надеется надежд надежда надеждами надеждах надежде надеждой надеждою надежду надежды надежная надежный надейтесь надел надела наделает наделал наделала наделают надели наденет наденете наденешь надену надень наденьте надеру надет надета надето надетое надетой надеты надеть надеюсь надеются надеялась надеялись надеялся надеясь надеяться надзиратель надзирательницей надзирателю надзирателя надзором надивиться надлежащем надлежащими надменен надменная надменно надменной надменном надменность надменный надменным надо надобен надобилось надобно надобностей надобности надобность надобны надоедаем надоедал надоедала надоедаю надоедлив надоедят надоел надоела надоели надоело надоем надоест надоесть надолго надорвала надорвана надорванный надоть надписана надпись надписью надругается надругательство надрывает надрываются надрывающимся надрываясь надсаждаясь надседаются надсона надстройками надтреснутый надув надувает надувание надувать надует надуется надуешь надула надуло надулся надумавшись надумаем надумаете надумается надумал надумался надуто надутого надуть надушенный надушил надуют надышаться наедине наездники наездницы наемной наехали наживать наживешь нажил нажимала нажимали нажить нажми назад назади назвал назвала назвали назван название названием названии названий названия названья назвать наземь назидание назидания назидательно назло назначавшуюся назначаемые назначал назначалась назначали назначались назначался назначать назначаю назначение назначением назначении назначения назначенного назначенною назначенную назначенным назначено назначены назначил назначила назначить назначу назовет назовете назовешь назови назовут назойливому назойливость назойливостью называвшихся называем называемая называемое называемой называемую называемые называемый называет называете называется называешь называл называла называли называлось называть называться называю называют называя назюзюкался наибесполезнейшую наибеспрерывнейшие наиблагороднейшего наиболее наивен наивная наивно наивности наивною наивные наивный наивным наивных наигрывает наигрывая наизнанку наизусть наименьшим наиполезнейший наиприличнейшая наискось наитий наихитрейшим найдем найден найдена найдено найдены найдет найдете найдется найдешь найди найдите найду найдут найдутся найдя найма наймет наймешь найми наймите найму найти накажет наказал наказали наказана наказание наказания наказанье наказать наказов наказывает наказывай наказывал наказывали накануне накапало накаркают накидывает накинет накинете накинув накинул накинула накинулась накинулся накинута накинутом накинутся накипала накипевшую накипят накладет накладывает накладывала накладывать наклевывалась наклеенное наклеены наклепал наклонен наклоненная наклоненной наклонив наклонившегося наклонившись наклонил наклонила наклонилась наклонился наклонится наклониться наклонностей наклонности наклонность наклонностях наклонною наклонны наклоном наклонялся наклонясь наклоняя наклоняясь наколем наколет наконец накопившиеся накопилось накоплялась накорми накормила накормить накормлю накормят накось накошенною накрахмаленные накрахмалены накрепко накрест накричит накроют накрывавшегося накрывает накрываете накрывай накрывался накрывают накрыл накрыла накрыли накрылся накрыт накрытый накрыть накуплю накурено накурил накуролесили налагает налагал налагают наладили наладит налгал налево налег налегая налегке належал налезло налепит налета налетевшим налетел налетели налетело налетит наливает наливается наливай наливал наливала наливая наливка наливной нализаться налил налиновал налитыми налить налицо наличности наличных наложенное наложил наложила наложницей налью налюбуется налюбуешься налюбуются нам намазанных намахивается намахнулся намачивая намашет намедни намеднишнему намек намека намекавший намекает намекаете намекал намекала намекам намеками намекать намекают намеке намеки намекнул намеков намеком намелет намелко наменял намеревался намереваясь намерен намерена намерение намерением намерении намерений намерению намерения намерениям намерениями намерениях намеренно намерены намерила наметался наметил намечтать нами намок наморщивши намотал намочил нанес нанесенная нанесенной нанесет нанести нанимаете нанимал нанималась нанимать нанимают нанкового наносил наносит наносный наносным нанял наняла наняли нанялся нанята нанятой нанять наобещал наоборот наобум наотмашь наотрез нападает нападал нападая нападение напал напали напасешься напасть напев напева напевает напевах напевают напевая напевы напекли напекут напел напер наперво наперебив наперед наперекор наперерыв напечатали напечатана напечатанным напечатано напечатать напечет напивались напивался напиваться напиваюсь напившись напилась напился напирали напирать напирают напирая написав написал написала написали написанная написанного написанное написанной написанную написанный написано написаны написать напитанная напитку напиток напитывается напиться напиханном напишет напишешь напиши напишите напишу напишут наплакались наплевал наплевать наплывают наплясался наподличал напоенного напоил напоила напоили напой напоказ наполеон наполеона наполеонами наполеоновской наполеоном наползают наползет наполнена наполненная наполненной наполненные наполненный наполненным наполненных наполнено наполнены наполнил наполнилась наполнился наполнит наполнится наполнить наполню наполняемы наполняется наполнял наполнялись наполняло наполнялось наполнялся наполнять наполняют наполняя наполовину напомадил напомадился напомаженных напоминавших напоминает напоминай напоминал напоминало напоминания напоминать напоминаю напоминают напоминающий напоминая напомни напомнив напомнил напомнила напомнилась напомнило напомнит напомните напомнить напор напора напорется напою напоят направил направились направился направить направиться направлен направление направлений направлению направления направленных направлено направлю направляется направляют направляясь направо направятся напрасна напрасная напрасно напрасной напрасные напрасный напрасных напрашиваетесь напрашивается напрашиваются например напролет напросто напротив напрягает напрягал напрягались напрягаю напрягая напрягаясь напрягутся напрягши напряжение напряжением напряжения напряженная напряженно напряженное напряженной напряженном напряженный напряженным напряжены напугал напугали напугались напугана напуганные напугано напугать напудренные напужались напускное напускные напускным напустило напустился напутает напутаешь напутствие напутствии напущенные напьется напьюсь наравне нараспашку нарастала нарастали нарастание нарастающие нарах нарву нарезался нарисовался нарисованного нарисованные нарисовано нарисовать наркотических народ народа народе народец народницей народной народный народов народовольческого народом народонаселения народу народы народятся нарочно наруже наружная наружно наружного наружное наружной наружности наружность наружностью наружную наружные наружу нарукавнички нарумяненную нарушаемого нарушает нарушал нарушалась нарушали нарушало нарушалось нарушать нарушая нарушена нарушение нарушения нарушено нарушил нарушилось нарушился нарушит нарушителей нарушится нарушить нарушу нарцизов нарыв нарывавший нарывает наряд наряда наряде наряди нарядить нарядная нарядно нарядной нарядом наряжается наряжалась наряжаются нас насадил насадим насвистывает наседка наседке наседки наседку насекомое насел население населения населенная населенные населились населило насердке насидишься насилие насилием насилия насиловать насилу насильно насильственно насильственной насильственною насильственные насильственным насилья насказала насквозь насколько наскоро наскочила наскучивало наскучил наскучу наслаждается наслаждайся наслаждайтесь наслаждалась наслаждались наслаждался наслаждаться наслаждаюсь наслаждаются наслаждаясь наслаждение наслаждением наслаждений наслаждения наследникам наследники наследнику наследовавшие наследовал наследства наследственной наследство наследству наслышан наслышка насмеемся насмерть насмехается насмехаюсь насмешек насмешил насмешите насмешка насмешками насмешки насмешкой насмешку насмешлив насмешливая насмешливее насмешливо насмешливого насмешливое насмешливой насмешливости насмешливый насмешливыми насмешливых насмеюсь насмеялся насмотрелся насолить наспал наставал наставало наставил наставительно наставлений наставлениях наставлено наставляй наставница настает настаивавший настаиваем настаивает настаиваете настаивал настаивала настаивать настаиваю настал настала настали настало настанет настанут настасье настасьей настасьи настасью настасьюшка настасья настежь настенька настигающую настлали настоенной настоит настоится настойчива настойчиво настойчивого настойчивое настойчивости настойчивость настойчивостью настойчивые настойчивый настойчивым настолько настоял настояла настоянию настояно настоятельно настоятельную настоящая настоящего настоящее настоящей настоящем настоящему настоящею настоящие настоящий настоящим настоящих настоящую настраивает настраивается настрого настроение настроением настроении настроению настроения настроено настроены настроим настроит настроить настройщика настрочить настудишь настудят наступает наступала наступают наступивший наступил наступила наступили наступило наступит наступлением насущного насущной насущные насущным насущными насчет насылает насылая насыпь насыпью насядет наталия натальи наталья наташа наташе наташей наташи наташу натащил натащить нате натекла натерпелась натерпелся натертому наткнулся натолкнуть натолочь натопленных натоптали наточи натруженный натугой натура натурализме натуральнее натурально натуральности натуральную натуральны натуре натурой натуру натуры натыкается натыкался натягивает натягивать натяжек натянул натянулись натянутая натянуто натянутый наугад наук наука науках науке науки наукой науку наумыч наутро научи научил научила научило научился научимся научит научите научить научные научных научусь наущения нафталина нафталином нахал нахально нахальное нахальный нахальным нахальства нахальство нахальством нахватает нахватают нахлебник нахлестался нахлобучив нахмуренно нахмуренной нахмуренные нахмуренный нахмуренным нахмуренными нахмурив нахмурившись нахмурил нахмурила нахмурился нахмурясь находившейся находил находила находилась находили находились находило находилось находился находит находите находится находить находиться находишь находку находчивости находя находясь находят находятся находящий нахожу нацарапает нации наций национальности национальные нацию нациями начавшегося начавшей начавши начавший начал начала началами началась начале начали начались начало началом началось начался начальник начальника начальнике начальники начальником начальнику начальница начальницей начальницу начальства начальство начальством начальству начатки начатое начатые начать начерно начертал начертана начертанной начертанный начертанных начертать начертил начесть начинавшаяся начинавшее начинавшейся начинавший начинаем начинает начинаете начинается начинаешь начинай начинайте начинал начинала начиналась начинали начинались начинало начиналось начинался начинаний начинать начинаю начинают начинающая начинающего начинающий начинающимся начиная начиненные начинки начинкой начисто начистоту начнем начнет начнете начнется начнешь начни начну начнут начнутся начудила наш наша наше нашего нашей нашел нашелся нашем нашему нашептывает нашептывал нашествие нашествий наши нашим нашими наших нашла нашлась нашли нашлись нашло нашлось нашу нащипанными нащупав наяву наяды не неаккуратна неаккуратно неаполитанский неба небе небеленая небес небеса небесам небесами небесах небесная небесно небесное небесной небесном небесные небесных небесполезно неблаговоления неблагодарен неблагодарно неблагодарности неблагодарность неблагодарностью неблагодарные неблагодарный неблагонадежность неблагонамеренных неблагополучно неблагоприятному неблагородный неблагородными неблагородство неблагосклонно небледнеющих небо небогат небогатый небожитель небойкие небойсь небольшая небольшие небольшим небольшими небольшого небольшое небольшой небольшому небольшую небом небосклон небосклону небось небрежен небрежнее небрежно небрежное небрежной небрежности небрежностию небрежностью небрежною небрежные небрежный небрежным небритая небритую небритый небу небывалая небывалого небывалое небывалой небывалые небывалый небывалым небывалыми небывалых небывальщину небылицу небытия нева неважно неважное неважной неважны неважным невдомек неве неведении неведения неведом неведомая неведомо неведомой неведомою неведомый неведомым неведомыми неведомых невежд невежды невежества невежество невежи невежливая невежливо невежливою невелик невелика невелико невеликодушно неверие неверна неверно неверного неверною неверный неверных невероятно невероятности невероятные невероятным неверующий неверующих неверьем невеселая невесело невеселые невеста невесте невестка невестки невестой невесту невесты невесть невзгод невзначай невзрачный невиданно невиданного невиданной невиданные невидимая невидимо невидимой невидимую невидимые невидимый невинен невинна невинная невинно невинного невинном невинному невинности невинность невинностью невинные невинный невинных невиновных невменяем невнимания невнимательным невнятны неводу невозвратимо невозвратимый невозвратно невозвратного невозвратном невозвратные невозвратным невоздержания невозможен невозможно невозможное невозможному невозможности невозможную невозможный невозможным невозможных невозмутимая невозмутимее невозмутимо невозмутимого невозмутимое невозмутимый невозмутимым невой неволе неволей неволит невольно невольного невольное невольной невольном невольною невольный невольных невообразимо невоскресшему невоспитанный невою невпопад невредим невредимо невредимого невредимый невский невскому неву невы невыгодно невыкупленные невыносим невыносима невыносимая невыносимо невыносимое невыносимы невыносимый невыносимыми невыразимая невыразимо невыразимого невыразимое невыразимом невыразимым невысказанном невысоко невысокому невыстоявшеюся невысыхаемым невыторгованный нег негаданно негаданной негде неге неги негибнущая неглубокой неглуп неглупая неглупый него неговорлив негодная негодном негодный негодным негодование негодованием негодовании негодования негодуя негодяем негодяй негодяя негой негоцианта неграциозными негреющий негреющим негромкие негромко негры негустые недавнего недавнее недавней недавнем недавнему недавний недавних недавно недавнюю недавняя недалеко недалечко недальней недальный недаром недвижимо недворянина неделе недели неделикатно неделикатным неделовитостью неделовых недель недельку неделю неделя неделям недетски недетской недлинный недобор недобровольно недоброе недоброжелателен недоброжелательством недоваренную недоварены недоверием недоверия недоверчив недоверчива недоверчиво недоверчивости недоверчивость недоверчивостью недоверчивые недоверчивый недоверчивым недоволен недовольна недовольная недовольны недовольный недовольным недовольства недовольство недогадливый недоглядел недогляди недоглядишь недозрелости недоимках недоимки недоконченное недолго недоносков недоразумения недорого недосказанного недосказанное недоставало недостает недостало недостатка недостатками недостатков недостатком недостаток недостаточно недостижимой недостоин недостойна недостойному недостойною недостойны недостойный недостойных недоступна недоступно недоступного недоступность недоступную недоступный недосягаем недосягаемо недосягаемым недосягаемых недотепа недотрогой недоумевал недоумевали недоумевая недоумение недоумением недоумении недоумений недоумения недоумениям недоуменьем недоучившихся недочитанная недр недрах недремлющая недрогнувший недруги недружелюбным недуг недугами недугом недурна недурно нее неестественно неестественною неестественных нежа нежат нежданно нежданной нежданные нежданный нежели нежен неженского неживой неживою нежинский нежит нежить нежливенькая нежна нежная нежнее нежней нежнейшего нежнейшей нежнейшими нежно нежного нежное нежной нежном нежности нежность нежностью нежною нежную нежны нежные нежный нежным нежных незабвенная незабвенной незабываемую незабываемые незабываемых независимо независимое независимости независимость независимую независимым независящим незадолго незаживающие незаконная незаконною незаметнее незаметно незаметное незаметной незаметную незапамятные незапамятных незапертую незапятнанном незаработанном незаработанный незаслуженного незаслуженным незастенчивые незатейливой незачем незваному незваный незвучно нездешней нездешнею нездешний нездешними нездоров нездорова нездоровая нездоровилось нездоровится нездорово нездоровы нездоровье нездоровьем неземная неземного неземной неземные незлобный незнаемою незнаком незнакомец незнакомого незнакомое незнакомой незнакомом незнакомому незнакомству незнакомую незнакомцами незнакомые незнакомый незнакомым незнакомыми незнание незнанию незначащие незначительна незначительное незначительные незрим незримо незримые незримый незримым незыблемое неизбежная неизбежно неизбежного неизбежное неизбежность неизбежною неизбежные неизбежный неизбежным неизбежными неизбежных неизведанных неизвестен неизвестная неизвестно неизвестное неизвестном неизвестности неизвестность неизвестные неизвестный неизвестным неизвестными неизвестных неизгладимо неизгладимым неизлечима неизлечимую неизлечимые неизменной неизменную неизменный неизменным неизменяющейся неизмеримою неизреченной неизъяснимо неизъяснимое неизъяснимых неимением неимоверно неимоверные неимоверным неинтересно неинтересны неискусно неисполненные неисправим неисправимый неисправимым неисправной неистинном неистовая неистово неистовое неистовой неистовые неистовым неистощимой неистощимых неистребима неистребимого неисходно неисходный ней нейдем нейдет нейдут нейрастения нейти некий неких некогда некого некоего некоммерческих некому неконченные неконченный некормленая некоторая некоторого некоторое некоторой некотором некоторому некоторою некоторую некоторые некоторый некоторым некоторыми некоторых некошенном некрасива некрасиво некрасивой некрасивый некрасивым некраснеющих некрасова некрашеный некрепкий некрепок некстати некто некуда некупленных некую неладно неласково неласковый нелегкая нелегко нелегкую нелепа нелепее нелепей нелепейшая нелепо нелепого нелепое нелепости нелепость нелепый нелепым нелживо нелицемерной неловкая неловкий неловким неловко неловкого неловкое неловкости неловкость неловкостью неловок неложно нельзя нельки нелюбимый нелюбовь нем немалая немало немедленно немедленного немедленной немедленном немедленным немеет немезида немели немеркнущей немеркнущий немец немецкая немецки немецкие немецкий немецким немецких немецкого немецкой немецком немецкую немилосердно неминуемая неминуемо неминуемое неминуемою неминучий немка немке немки немку немногие немногими немногих немного немножечко немножко немо немое немой немок немолодой немолодому немолоды немолодых немолчный немом немота немотствует немочь немощами немощеного немощеными нему немудреная немудрено немудреный немудрый немую немца немцами немцев немцем немцу немцы немые немым немыслимо немыслимы немытого немытую немытый немых ненавидел ненавидела ненавидели ненавидеть ненавидит ненавидите ненавидишь ненавидя ненавидят ненавижу ненавистен ненависти ненавистию ненавистна ненавистная ненавистничают ненавистно ненавистного ненавистное ненавистною ненавистным ненавистными ненависть ненавистью ненаглядная ненаглядную ненаглядный ненаглядным ненадежно ненадежных ненадолго ненароком ненарушимая ненарушимо ненарушимого ненастной ненастный ненастья ненасытимое ненасытимый ненасытность ненасытную ненатурально ненормальности ненормальным ненужная ненужно ненужного ненужной ненужность ненужную ненужны ненужные ненужный ненужным необдуманно необдуманное необитаемою необитаемыми необозримая необозримо необозримого необозримой необразованная необразованный необузданной необходим необходима необходимейшие необходимо необходимого необходимое необходимости необходимостию необходимость необходимостью необходимою необходимы необходимые необходимыми необъяснимо необъяснимое необъяснимые необъяснимым необъятная необъятно необъятного необъятное необъятном необъятною необъятную необъятные необыденное необыкновенная необыкновенно необыкновенного необыкновенное необыкновенном необыкновенному необыкновенною необыкновенную необыкновенные необыкновенный необыкновенным необыкновенных необычайнейшее необычайное необычайной необычайности необычайный необычайным необычный неоднократно неоднократный неодобрение неодолимая неодушевленный неожиданная неожиданнейшим неожиданно неожиданного неожиданное неожиданному неожиданность неожиданною неожиданную неожиданны неожиданные неожиданный неожиданным неожиданными неоконченная неокрепших неопасен неопасно неописанна неописанная неописанном неописанный неопределенно неопределенное неопределенной неопределенною неопределенные неопределенным неопределенными неопределенных неопровержимой неопрятен неопрятнее неопрятном неопрятный неопытного неопытному неопытности неопытные неопытный неосновательны неоспоримый неосторожнее неосторожно неосторожного неосторожное неосторожности неосторожность неосторожны неосторожным неостроумен неосуществима неосуществимым неотвратимо неотвратимый неотвязная неотвязный неотвязчивую неотвязчивый неотделанную неотесанный неоткуда неотлагательное неотлагательные неотлагательным неотлучно неотразим неотразимо неотразимого неотразимое неотразимою неотразимые неотразимый неотразимых неотсрочимый неотступная неотступнее неотступно неотступный неотъемлемо неохота неохотно неохотой неохотою неоцененных непарадных неплотно непобедимой непобедимый непобедимым неповинен неповоротливой неповоротливом неповторимая непогибшим непогоды неподалеку неподвижен неподвижна неподвижная неподвижнее неподвижно неподвижного неподвижное неподвижной неподвижности неподвижность неподвижностью неподвижною неподвижные неподвижный неподвижным неподвижными неподдельного неподдельное неподкупная неподкупном неподобно неподражаем неподражаемо неподходящие непозволительно непоказанной непокладные непокойна непокойно непоколебимее непоколебимой непоколебимыми непокорных непокрытой неполнота неполный неположенные непомерной непомерный непомерным непонимании непонятая непонятливости непонятливый непонятна непонятная непонятно непонятного непонятное непонятному непонятный непонятным непорядком непорядочно непосвященные непосильной непослушны непосредственного непосредственное непосредственной непостижима непостижимо непостижимый непота непотребного непохожее непохожей непочтительна непочтительностию неправда неправду неправедные неправильного неправильное неправильной неправильности неправильность неправильный неправленный неправые непредвиденных непреклонна непреклонная непрекращающегося непреложным непремен непременно непременною непреодолимое непрерывно непрерывное непрерывной непрерывный непрерывным непрестанной непривилегированные непривлекательной непривычки непривычного непривычное непривычным непривычных неприготовленную непризнанный непризнанным неприкосновенное неприкосновенность неприличия неприлично неприличное неприличном неприличности неприличным неприметнее неприметно непримиримых непринужденности непринужденный неприступная неприступны неприступные непритворенную непритворно непритворной непритворным непричастен непричесанной неприязненно неприязнь неприятель неприятельский неприятельским неприятен неприятнейшее неприятно неприятного неприятное неприятной неприятном неприятному неприятностей неприятности неприятностию неприятность неприятностях неприятные неприятный неприятным неприятными непробудней непробудно непробудной непробудную непробудные непробудный непроглядном непроглядный непродажных непродолжительный непроезжий непролазная непроницаема непроницаемости непроницаемы непроницаемым непроницаемых непростительно непростительной непростительную непростительные непроходимая непрочен непрошеные непрошеных непрядва непрядвой непутное неравенством неравна неравнодушен нерадению нерадостно неразборчиво неразвернувшийся неразвитость неразвиты неразгаданного неразговорчив неразговорчивы неразделенную нераздельности неразлучна неразлучны неразрезанною неразрешаемые неразрешенных неразрешим неразрешимому неразрешимы неразрешимый неразрываемую неразрывно неразрывного неразрывную нерасположение нерасчетливо нерв нервами нервическая нервический нервическим нервическое нервная нервно нервное нервной нервною нервны нервный нервным нервных нервов нервы нередко нерешенное нерешимости нерешительно нерешительном нерешительности нерешительность нерешительностью нерешительны неровно неровности нерукотворный неряхой нес несбывшееся несбыточной несбыточною несбыточную несбыточные несбыточный несбыточных несвежем несвежи несвежую несветский несвойственным несвязанный несвязно несвязный несгораемые несдобровать несерьезным несессер несессеры несет несете несется несешь неси несказа несказанно нескладная нескладно нескладном несколькими нескольких несколько нескольку нескончаемой нескончаемому нескончаемый нескончаемым нескромности нескромный нескрываемая нескрываемым нескучно несла неслабый неслась несли неслись неслиянности несло несловоохотлив неслось неслушавшие неслыханно неслыханное неслыханной неслыханном неслыханному неслыханные неслыханным неслыханными неслыханных неслышно неслышным несмелые несметные несметный несмотря несмыкаемости несносна несносно несносное несносную несносный несовершенно несовершеннолетнее несовершеннолетний несовершенств несогласие несогласимых несогласию несомненно несомненного несомненною несомненный несомненным несомненных несообразный несообщителен несоразмерно несостоятельны неспешно неспешные неспешный неспешным неспокойна неспокойно неспособность неспособным несправедлива несправедливая несправедливо несправедливого несправедливое несправедливость несправедливостью несправедливую несправедливы несправедливые несравненном несравненный нессиметрично нестерпим нестерпима нестерпимая нестерпимо нестерпимого нестерпимой нестерпимом нестерпимый нести нестиранного нестройном несу несут несущейся несуществующий несущие несущиеся несущий несущихся несч несчастен несчастие несчастии несчастий несчастию несчастия несчастиях несчастлив несчастлива несчастливейшим несчастливый несчастна несчастная несчастнее несчастненькой несчастного несчастное несчастной несчастном несчастному несчастною несчастную несчастны несчастные несчастный несчастным несчастными несчастных несчастье несчастьем несчастью несчастья несчастьями несчетные несытым неся нет нетактично нетвердо нетвердой нетвердым нетвердыми нетверез нетерепении нетерпелив нетерпеливо нетерпеливого нетерпеливое нетерпеливой нетерпеливому нетерпеливы нетерпеливый нетерпеливым нетерпение нетерпением нетерпении нетерпения нетопленную неторопливо нетрезвом нетронутую нетронутый нету нетяжко неуважения неуважительный неуверенном неувядаемая неувядающая неувядающим неувязанное неугодлив неугодливая неугодно неугомонный неудавшейся неудавшиеся неудавшуюся неудач неудача неудачами неудаче неудачи неудачная неудачно неудержимее неудержимо неудержимого неудержимую неудержимым неудобно неудобной неудобным неудобств неудобства неудобствами неудобстве неудобству неудовлетворенность неудовлетворительными неудовольствие неудовольствием неудовольствия неужели неужель неузнаваемого неузнаваемой неузнанной неуклонно неуклюж неуклюжая неуклюже неуклюжее неуклюжий неуклюжим неукоснительно неукрашенное неукротимей неуловимо неуловимое неуловимый неуложенное неумело неумелые неумен неумением неуменьем неуменья неумеренная неумеренно неумеренное неуместною неумирающего неумной неумолим неумолима неумолимо неумолимый неумолкаемый неумолкающей неумолчной неумолчные неумытого неумытый неумышленно неумышленное неуныло неурожае неурожай неурядиц неуследимый неуспеха неустанный неустающие неустойки неустранимых неустрашимо неустройств неусыпно неутешными неутолимую неутомимая неутомимо неутомимым неученых неучтиво неучтивости неуютно нехитрого нехождению нехороши нехорошие нехороших нехорошо нехоть нехотя нехристь нецеремонные нечальны нечаянно нечаянного нечаянностей нечаянный нечаянным нечего нечеловеческих нечем нечему нечесаную нечесаный нечет нечиновных нечисто нечистота нечистоту нечистоты нечистым нечищенным нечищеные нечопорно нечто нечувствительно нечуток нешто нештопанного нештопаных нешуточному нею неявившихся неярки неяркий неясна неясная неясно неясное неясном неясному неясные неясный неясным неясных ни нибудь ниве нивы нигде нигилизм нигилист нигилистов нигилисты ниже нижеподписавшийся нижнего нижнее нижней нижнем нижнему нижнею нижний нижних нижнюю нижняя низать низведении низвела низводила низенькие низенький низка низкая низки низкие низкий низким низких низко низкого низкое низкой низком низкую низменный низок низости низость низшего низшее низшем низший никак никакая никакие никаким никакими никаких никакого никакое никакой никанора никем никита никите никитича никитой никиту никиты никифоровна никли никнет никогда никого никодим никодима никодиме никодимом никодиму никоим николавна николаевич николаевича николаевичу николаевна николаевне николаевной николаевну николаевских николаевском николаем николаичем николай николашка николашке николаю николая николы никому никто никуда нил ним нимало нимбы ними нимфа нина нине ниной нину нины ниоткуда нипочем нисколько ниспосланное нисходила нисходит нисходят нити нитка нитками нитки ниткой нитку ниток ниточками ниточке нить нитью нитяной нитяный них ниц ницце ницше ничего ничегошечко ничей ничем ничему ничком ничто ничтожен ничтожества ничтожество ничтожная ничтожнейшее ничтожнейшем ничтожнейший ничтожного ничтожной ничтожностию ничтожною ничтожны ничтожные ничтожный ничтожным ничтожных ничуть ничьего ничьей ничьи ничьим ничьих ничья нише нищ нищая нищего нищей нищему нищенские нищенской нищета нищете нищетой нищету нищеты нищие нищий нищим нищими нищих нмением нная нной ннх нных но новая новейшая новейшее новейший новейшими новейших новенькие новеньким новенького новенькое новехонькая новизне новичка новичкам новички новичок ново нового новодевичий новое новой новом новому новорожденный новоселье новостей новости новость новостью новостями новостях новою новоявленный новую новые новый новым новыми новых новь новью ног нога ногам ногами ногах ноге ноги ного ногой ноготками ногою ногтем ногти ногтями ногу ноев ноет нож ножа ножами ноже ножей ножи ножик ножичек ножищами ножка ножками ножки ножницами ножнички ножом ноль номер номере номинально ноне норе норке норки норма нормален нормальная нормально нормальное нормальной нормальному нормальную нормальный нормальным норму нормы норов норовил норовит норы нос носа носами носе носики носил носила носилась носили носились носило носился носиль носильщики носит носитесь носится носить носиться носишь носка носками носке носки носков носком носовой носок носом носу носы носясь носят носятся носящих нот нота нотами ноте нотка нотке нотки нотой ноту нотунг ноты ночам ночами ночах ночевавший ночевал ночевать ночевывал ночей ночи ночлег ночная ночно ночного ночное ночной ночном ночною ночную ночные ночным ночными ночных ночуем ночует ночуете ночуешь ночую ночь ночью ноша ношей ношу ношусь нощи нощно ноют ноябре ноябрь ноября нрав нрава нравах нраве нравилась нравились нравилось нравился нравитесь нравится нравиться нравов нравом нравоучения нравственная нравственно нравственного нравственное нравственной нравственном нравственности нравственность нравственною нравственную нравственные нравственным нравственными нравственных нраву нравы нравятся ну нудная нудные нудный нужда нуждается нуждался нуждах нуждаюсь нуждаются нуждаясь нужде нуждой нужду нужды нужен нужна нужная нужнее нужно нужное нужной нужную нужны нужные нужным нужных нулю нумер нумера нумерам нумерами нумерах нумере нумеров нумером нутру ны ные ныне нынешнего нынешнее нынешней нынешнем нынешнему нынешние нынешний нынешнюю нынче ньютон ньютоновы нюхает нюхала нюханье нюхая няне няней нянек нянечка няни нянчась нянчил нянчились нянчусь нянька няньками няньке няньки няньку няня нях о оазисе об оба обагри обагрившая обагрит обанкрутится обаяние обаянием обаянию обаяния обаяньем обаятельная обаятельного обаятельное обаятельной обаятельный обвалиться обварил обвел обвела обветшал обвешивают обвив обвивал обвивали обвиваясь обвила обвились обвинение обвинении обвинений обвинению обвинения обвиненного обвинив обвинил обвинила обвинительные обвинить обвинишь обвиняете обвиняешь обвинял обвиняли обвинять обвиняю обвиняющим обвитый обводила обводили обводить обводят обворожительно обворожительными обвязал обвязано обвязаны обглоданной обдавало обдадут обдает обдаешь обдал обдать обделаешь обделать обдергивались обдерет обдирать обдует обдуло обдумав обдумаем обдумайте обдумал обдумала обдуман обдуманно обдумать обдумывает обдумывал обдумывая обе обегал обегая обед обеда обедаем обедает обедаешь обедай обедал обедала обедали обедать обедах обедаю обедают обедая обеде обеденной обедне обедневший обедней обедни обедню обедня обедов обедом обеду обеды обежал обежит обездоленный обезличивают обезображеннее обезображенной обезображено обезобразили обезоруженный обезуме обезумел обеим обеими обеих оберегаемая оберегали оберегать оберегаю оберечь оберешься обернется обернув обернувшись обернул обернулась обернулись обернулось обернулся обернуты обернуться обертках обескуражены обеспечена обеспечение обеспеченный обеспечено обеспечить обеспокоена обеспокоенная обеспокоил обеспокоила обеспокоили обеспокоить обеспокою обеспокоятся обессиление обессиленный обессилил обессилили обетова обетование обетованной обеты обещает обещаете обещаетесь обещай обещал обещала обещали обещались обещался обещание обещанием обещаний обещанию обещания обещаниях обещанья обещать обещаюсь обещая обещаясь обжег обжегся обжечь обживаясь обжигаем обжигаешь обжигательная обжог обжора обивал обивать обид обида обидевшись обидел обидела обиделась обиделись обиделся обидеть обидеться обиднее обидно обидное обидной обидном обидному обидны обидой обиду обидчивость обидчиком обидчику обиды обидятся обижает обижаете обижаетесь обижается обижаешься обижайтесь обижалась обижался обижать обижаться обижаю обижают обижаются обижена обиженная обиженно обиженный обиженным обилие обилием обилии обилия обильно обильное обильный обильным обиняков обираешь обирать обираю обит обитавшие обитатели обитатель обитая обитые обитый обитым обиходе обкрадывал облагодетельствовал облагодетельствована облагодетельствованных облагороженному облагороженных обладателем облак облака облаками облаках облаке облако облаков облаком обласкала обласкали обласкают областей области область облачка облачко облегчаемы облегчает облегчаете облегчается облегчал облегчалась облегчающих облегчить облезлый облекать облекла обленилась обленился облепил облепленный облетели облечена обливает обливала обливалось обливаюсь обливая обливаясь облигаций облизываясь облик облике облилось облита облитой облитый облить облиться обличайте обличало обличат обличать обличают обличающее обличающие обличению обличения обличенного обличенных обличий обличили обличителем обличители обличить обличье облобызал обложив обложил обложила обложит облокотившись облокотилась облокотился облокотиться облокотясь обломав обломаешь обломками обломов обломова обломове обломовка обломовках обломовке обломовки обломовкой обломовку обломовская обломовский обломовских обломовского обломовской обломовском обломовскому обломовскую обломову обломовцам обломовцев обломовцы обломовщина обломовы обломовым обломовых обломок облупит обмакнешь обмакнул обмакнуть обман обмана обманет обманете обманом обманувшие обманул обманула обманули обманут обманутое обманутые обмануть обмануться обманчив обманчивый обманщика обманщицы обманывает обманываете обманываешь обманывал обманывали обманывать обманываю обманывают обманывающих обмахивая обмен обмена обмененном обменялись обменялся обмер обмеривали обмеривают обмеривая обмерить обмести обметает обмоет обморок обморока обмороками обмороке обмороки обмороком обмороку обмундировку обмывает обмывать обмызганный обнажает обнажаться обнаженная обнаженную обнаженные обнаженных обнажил обнаружен обнаружена обнаружением обнаруживал обнаруживать обнаруживая обнаружил обнаружили обнаружило обнаружилось обнаружится обнаружить обнаружиться обнеряшиться обнес обнесенный обнимает обнимаешь обнимал обнимала обнимались обнимать обнимаю обнимаются обнимая обнимемся обнимет обними обновившимся обновился обновить обновки обновления обновленного обновлять обноски обнюхает обняв обнявшись обнял обняла обнялись обнять обо обобрал обовьет обовьют ободком ободранный ободрано ободренная ободрившийся ободрившись ободрил ободрилась ободрило ободрился ободрись ободрите ободритесь ободрить ободриться ободряла ободряться обоего обожаемого обожает обожал обожанием обожания обожающие обожая обожги обожгли обожгло обожгу обожжен обожженный обоз обозвав обозвал обознать обозначавшие обозначается обозначалась обозначаются обозначена обозначено обозначился обозначить обозники обозреть обозу обои обоим обоими обоих обой обойдет обойдется обойдешь обойди обойдусь обойдутся обойдя обойной обойти обойтись обокрали обокрасть оболочке оболочку обольет обольется обольстит обольстительнее обольстительной обольстительный обольют обомлел обомлели обопрется оборачивается оборачивайте оборачивались оборачивался оборачивая оборачиваясь оборвавший оборвал оборвала оборвалась оборвалось оборванец оборванной оборванный оборванца оборванцем оборвать оборвет оборвется оборвешься оборви оборвыш обороняется оборот оборота оборотившийся оборотившись оборотил оборотилась оборотился оборотится оборотиться оборотне оборотнем оборотной оборотням оборотом оборотясь оботрет обошел обошла обошлась обошлись обошлось обошьют обоюдное обоюдных обоями обоях обрадовавшись обрадовал обрадовалась обрадовались обрадовался обрадованная обрадованного обрадованный обрадуемся обрадуется обрадуются образ образа образами образе образина образов образовал образовались образовалось образован образование образованием образовании образованию образования образованная образованного образованной образованном образованность образованны образованные образованный образованных образок образом образу образует образумить образумиться образцами образцовая образцовое образцовый образцу обрамлен обрамлена обрамленное обратив обратившись обратил обратила обратилась обратили обратились обратило обратилось обратился обратим обратит обратится обратить обратиться обратно обратясь обратят обратятся обращает обращается обращайте обращайтесь обращал обращалась обращали обращались обращался обращать обращаться обращаюсь обращаются обращающийся обращая обращаясь обращен обращена обращение обращением обращении обращений обращения обращенная обращенном обращенному обращенных обращены обращу обращусь обреешься обрезал обрезки обрекает обрел обречен обречена обреченного обреченные обреченными обречь обрисовывается обритый обритым обробел обробела оброк оброка оброке оброку обронил обросший оброчных обрубила обрубленный обрубок обругает обругал обругала обругали обругать обругаться обругаю обругают обруч обручен обручился обручится обручке обрушившиеся обрушила обрушилась обрушились обрушится обрыв обрыва обрывается обрывалась обрываю обрывая обрываясь обрывках обрывке обрывки обрывок обрывы обрюзг обряд обряда обрядах обрядов обряды обрящете обсаженный обсасывая обслуживания обставленная обставленный обстала обстановив обстановка обстановке обстановки обстановкой обстановку обстоятельно обстоятельное обстоятельную обстоятельным обстоятельств обстоятельства обстоятельствам обстоятельствами обстоятельствах обстоятельстве обстоятельство обстоятельством обстоятельству обстреливать обстриженными обстроилась обструганная обступили обступило обступят обсудил обсудим обсудите обсудить обсудят обсуждение обсуживался обсыплет обтерев обтереть обтерпелась обтертую обтесанный обтирал обтирала обтирая обтирку обтяжку обуви обугленный обуется обузу обусловлена обут обутым обухом обучался обучение обуют обуянный обхватив обхватил обхватила обхватили обхватывает обход обходившую обходил обходили обходилось обходился обходит обходится обходиться обходом обходя обходятся обхождении обшаривал обшарканные обшарканными обшивала обширен обширного обширную обширные обширный обшит обшлагом обшмыганные общая общего общее общей общем общему общества обществе общественного общественное общественной общественному общественный общественным общественных общество обществом обществу общечеловеческая общие общий общим общих общую объедаете объездили объезжать объем объемистый объемлет объемля объемом объехать объявив объявил объявила объявили объявились объявился объявим объявит объявите объявится объявить объявлен объявление объявлены объявляет объявляется объявляйте объявлял объявляю объявляют объявляя объял объяснение объяснением объяснений объяснению объяснения объяснениями объясненья объяснил объяснила объяснили объяснились объяснилось объяснимся объяснимым объяснись объяснит объясните объяснитесь объяснить объясниться объяснишься объясню объяснюсь объясняемую объясняет объясняется объясняешь объяснял объяснять объясняться объясняю объясняя объясняясь объятие объятии объятий объятия объятиях объятый объятья объятьях обывательская обывательской обывательскую обыденной обыденною обыденную обыденных обыкновение обыкновению обыкновения обыкновенная обыкновеннейших обыкновенно обыкновенного обыкновенное обыкновенной обыкновенном обыкновенною обыкновенную обыкновенны обыкновенные обыкновенный обыкновенным обыкновенных обыск обыски обыскивает обыскивайте обыскивать обыском обычаев обычаи обычай обычаю обычное обычной обычную обычные обычным обычными обычных обыщем обыщите обьятиях обяжете обязан обязана обязанной обязанностей обязанности обязанность обязанностью обязанностях обязанною обязанным обязаны обязательная обязательной обязательны обязательный обязательных обязательств обязательства обязательствах обязательство овал овальной овальными овацию овес овечку овеют овеяла овин овином овладевает овладевала овладевали овладевало овладевать овладевшей овладел овладела овладели овладело овощами овощи овощные овраг оврага оврагам овраге оврагом овса овсе овца овцой овцы овчинин овчинину огарка огарками огарки огарок огибавшую огибает огибали огибая огласку оглоблю оглобля оглохла оглушение оглушенный оглушила оглушили оглядев оглядевшись оглядел оглядела огляделся оглядеть оглядимся оглядки оглядкой оглядывает оглядывается оглядывайся оглядывал оглядывала оглядывался оглядывание оглядывать оглядывают оглядываются оглядывая оглядываяась оглядываясь оглянемся оглянется оглянись оглянитесь оглянувшись оглянула оглянулась оглянулись оглянулся оглянуться огне огневая огневицей огневой огневые огневым огней огнекрасная огнекрасные огнем огненного огненной огненные огненным огненных огни огнь огню огня огнями огнях ого оговор огонек огонь огонька огоньки огоньком огород огорода огородам огородами огороде огородить огородишком огородницы огородом огороду огороды огорошат огорошивает огорошить огорчать огорчен огорчением огорчения огорчениями огорченный огорчены огорчил огорчился огорчить ограбил ограбили ограбить ограблен ограбленной ограбленным ограда оградах ограде оградить оградой ограду ограды оградясь огражу ограниченные ограничивался ограничилось ограничился ограничится ограничить огрел огромная огромней огромнейшие огромнейшими огромного огромное огромной огромном огромному огромною огромные огромный огромным огромных огрызался огрызаться огурец огурцами огурцов огурцы огурчик одарен одаренные одев одевается одеваешься одевайся одевала одевалась одевали одевался одевать одеваться одевают одевающаяся одеваясь одевшись одежа одежда одежде одеждой одежду одежды одежке одежонку одеколону одел оделась оделся оденется оденешь одену оденусь оденут одеревенелыми одержим одессе одессы одет одета одетая одетого одетой одетому одеты одетые одетый одетым одеть одеться одеяла одеяло одеялом одеяние один одинаким одинакова одинаково одинаковое одинаковы одинаковым одинехонька одинешенька одиннадцати одиннадцатой одиннадцатом одиннадцатый одиннадцать одинок одинока одинокий одиноким одиноко одинокой одиноком одинокому одиночества одиночестве одиночество одиночку одиночное одичал одичалая одна однажды однако однакож однех одни одним одними одних одно одноважды одновременно одного однозвучно однозвучны однозвучным одноименной одной однократное одном одному однообразие однообразием однообразия однообразно однообразное однообразною однообразную однообразны однообразный однообразьи одностороннего односторонний односторонним одноцвет одною одну одобрение одобрением одобрительно одобрить одобряла одолеваемый одолевает одолевала одолевают одолел одолела одолели одолело одолеть одолжайтесь одолжение одолжением одолжите одонцовым одр одре одумается одумался одурачить одуревшими одуреешь одурения одурманения одурманивающий одутловатым одушевлена одушевление одушевлением одушевленные оды одышка одышки ожерельем ожесточением ожесточенная ожесточенное оживали оживет оживилась оживили оживить оживление оживлением оживленнее оживленно оживленном оживленностью оживленный оживлю оживляет оживлялась оживлялись оживлялся оживляющих оживляясь оживут ожидавшему ожидавший ожидает ожидаете ожидается ожидал ожидала ожидали ожидание ожиданием ожидании ожиданий ожидания ожидать ожидающий ожидающую ожидая ожидовел ожил ожиревший оз озаботится озабочен озабоченнее озабоченно озабоченную озабоченный озабоченным озадачен озадаченный озадачила озаренная озарены озарил озарила озарили озарилось озарился озарим озарит озарялись озаряющего оземь озер озера озерах озере озерна озеро озером озеру озими озимь озирается озирал озирала озиралась озирался озираньем озираясь озлились озлился озлобившись озлобленные ознакомиться ознаменовало ознаменовалось означавшие означает означается означенного озноб озноба ознобил озябла озябшие озябший озябшими ой ока окажется оказалась оказалось оказался оказанных оказано оказать оказии оказываемым оказывает оказывается оказывала оказывалась оказывали оказывалось оказывают окаймляющими окаменеет окаменел окаменелая окаменелость окаменелых окаменяющий оканчивалось окарикатурить окатил окаянная окаянного окаянные окаянный океан океане океаны окидывает окидывая окинет окинешь окинув окинул оклад окладов оклеветал оклеветали оклеивали окликает окликать окликнул окликнули окликнуть окна окнам окнами окнах окне окно окном окну око оков окованная окованный оковы оковывает оковывать околдован околдованная околицей около околотке околотку околоток оком окон оконной оконные окончание окончании окончания окончательнее окончательно окончательного окончательное окончательной окончательному окончательные окончательный окончательным окончательных окончен окончив окончил окончила окончит окончится окончить окопать окорока окороков окостенел окостенела окостенелый окоченели окоченил окошечко окошка окошке окошки окошко окошку окраинах окраску окрашена окрашены окрест окрестив окрестил окрестности окрестность окрестный окрестных окрик окрика окровавленного окровавленную окроме окромя окрошки округлилась округлости округляли округлять окружавшее окружавшей окружали окружающего окружающее окружающей окружающему окружающие окружающий окружающим окружающих окружающую окружены окружила окружили окружит окружить окружном окрыляли октавой октаву октябрь октябрьском октябрьскую октября окует окунулась окуня окупаются окутали окуталось оле оледенела оледенивший оледенил оледенило олей олени оленька олечка олешкин оли олимпийские олифе олицетворенный олово олух олухи олухов ольга ольге ольги ольгиной ольгой ольгу ольхи ольхой олюшка олюшкой оля омаров омбрельке омерзение омерзением омерзения омерзительно омрачалось омрачался омрачение омрачением омрачения омрачилось ому омут омута омуте омываемых омыт омытый он она онемев онемела онемели онемелое онемение онеметь онерами они онисим онисимовна онисимовны онисиму оно оного оную оны опадающий опал опаленная опаленные опаленный опалит опамятовавшись опары опасался опасаясь опасен опасение опасением опасений опасения опасениях опасливо опасна опасная опаснее опасно опасного опасное опасностей опасности опасность опасностями опасные опасный опасным опасных опеки опекой опеку опекун опекуна опекуном опера операции операцию опере опередил опередить оперлась оперном оперся оперу опершись оперы опечаленный опечалилась опечалится опечаток опиваете опирается опираясь описав описал описание описанием описании описаний описанная описанное описано описать описи описывал описывать опишу оплакала оплакиваю оплевать оплетая оплетет оплетешь оплеуха оплеуху оплодотворяется оподлит опоздавший опоздает опоздал опоздала опоздали опоздать опозоренная опозорить опозориться опомнившись опомнилась опомнился опомнись опомнитесь опомниться опомнясь опора опору опоры опосля опочиет опошлить оппозицию оппонировать оправдает оправдан оправдана оправдание оправданию оправдания оправдательные оправдать оправдаю оправдывавшее оправдывавшим оправдывается оправдываешь оправдывалась оправдывали оправдывались оправдывался оправдывать оправдываться оправдываюсь оправдывают оправдывая оправе оправив оправившись оправила оправилась оправился оправиться оправлюсь оправляла оправлялась оправляться оправляя опрашивает опрашивали определен определения определенная определеннее определенно определенного определенное определенной определенности определенную определенные определенный определенных определив определившеюся определил определился определительно определить определю определял определяя опредленным опричь опробую опровергает опровергал опровергнутых опровергнуть опрокидыванье опрокидываться опрокидываясь опрокинул опрокинутого опрокинутое опрокинутою опрокинутую опрометчиво опрометью опросила опротивели опрыскивает опрятно опрятное опрятность оптический оптом опубликован опускаем опускает опускаете опускается опускаешься опускал опускала опускалась опускали опускалось опускаться опускают опуская опускаясь опустевший опустевшим опустеет опустел опустели опустело опустелой опусти опустив опустил опустила опустилась опустили опустились опустился опустись опустите опустится опуститься опустошает опустошаются опустошение опустошенной опустошенный опустя опутавшие опухоли опухоль опушены опущена опущенная опущенные опущенный опущенным опущенными опущены опыт опыта опытах опыте опытная опытнее опытной опытности опытность опытный опытным опыту опыты опьянев опьяневший опьянел опьяненные опьяненный опьянят опять орава оракула орали оранжа оранжерею оранжереям оранжереях орарем оратор оратора ораторской ораторствовал ораторствуешь орбит органа организацию организм организмами организме органом органчиком органщика орда орден ордена орденом ордою ореол орет орех орехами орехи орешки оригиналы оригинальная оригинально оригинальности оригинальность оригинальный оригинальным оригинальных оркестр оркестре орла орлий орлиный орлиным орлица орлы оробев оробевшая оробеет оробела орудий орудия оружие оружием орфографии оса осадила осадить осадка осадой осадок осаду осаждает осанисто осанистый осанистым осанка осанкой осведомилась осведомился осведомиться освежает освежалась освежали освежать освежают освежила освежило освежит освежится освежить освежиться осветил осветилось осветит осветленный осветят освещает освещал освещало освещая освещена освещение освещении освещенная освободи освободившись освободилось освободить освободиться освободу освободясь освободят освобождался освобождаются освобождения освятил оседает оседлать осеклась осеклись осекся осел осела оселся осени осенила осенило осенить осенней осенние осенний осенних осень осенью осеняет осеняли осенялись осеняя осердитесь осерчает осетр осетра осетрина осетрины осечка осиливал осилил осилила осиновый осину осипшим осквернил осклабясь осколки осколок оскопления оскорбил оскорбились оскорбило оскорбительнее оскорбительно оскорбительное оскорбить оскорблен оскорблена оскорбление оскорблении оскорблений оскорбления оскорбленная оскорбленного оскорбленное оскорбленною оскорбленный оскорбленным оскорблены оскорблю оскорбляет оскорбляется оскорблял оскорблялась оскорблять оскорбляют оскорбляясь осла ослаб ослабевал ослабевали ослабевало ослабевший ослабевшим ослабел ослабели ослабело ослеп ослепил ослепила ослепительно ослепительной ослепительные ослепительным ослепительных ослепла ослепление ослепления ослепленный ослеплял ослеплялся ослепнув ослепнуть ослепшего ослепшие ослепший ослободился ослы ослышался осматривавшего осматривает осматривается осматривал осматривалась осматривался осматривать осматриваться осматриваю осматривая осматриваясь осмеливаюсь осмелилась осмелились осмелился осмелитесь осмелится осмелиться осмелишься осмелюсь осмеяно осмеять осмотр осмотрев осмотрел осмотрела осмотрелась осмотрелся осмотрено осмотреть осмотрит осмотрительнее осмотрится осмыслив осмыслилась осмыслилось осмыслить оснеженном оснеженные оснежит основа основался основание основанием основании оснований основания основаниях основанная основанное основанный основано основатель основательно основательные основать основная основным основных основу основы основываете основываясь особа особая особенная особенно особенного особенное особенной особенном особенностей особенности особенность особенною особенную особенные особенный особенным особенных особо особого особое особой особом особому особу особую особы особый особых осой оспа оспаривает оспаривали оспаривать оспу осрамили осрамлю оставай оставайся оставайтесь оставалась оставались оставалось оставался оставаться оставаясь оставив оставил оставила оставили оставим оставит оставите оставить оставишь оставлена оставленную оставленные оставленный оставленных оставлю оставляет оставляй оставляйте оставлял оставляли оставлять оставляю оставляют оставляя оставшийся оставшимся оставшись оставшихся оставь оставьте оставят остаемся остается осталась остались осталось остался остальная остального остальное остальной остальном остальную остальные остальным остальных останавливаемся останавливаетесь останавливается останавливайся останавливал останавливалась останавливались останавливался останавливать останавливаться останавливаются останавливающихся останавливая останавливаясь останемся останетесь останется останешься останови остановив остановившегося остановившись остановил остановила остановилась остановили остановились остановилось остановился остановись остановит остановите остановитесь остановится остановить остановиться остановишь остановка остановку остановлюсь остановясь остановят остановятся останусь останутся останься останьтесь остатке остатки остаток остаться остаюсь остаются остающимися остервенением остервенении остервенения остервенясь остереглись остеречь остзейскому остолбенел остолбенелый осторожен осторожна осторожнее осторожно осторожного осторожности осторожностию осторожность осторожностью осторожною осторожны осторожный осторожным осторожными острастка остри острием острия остров острова островах острове островов островок островского острову острог острога остроге острого острое острожной острожных острота остротами остротою остроты остроумие остроумием остроумии остроумия остроумная остроумнее остроумнейшим остроумно остроумного остроумны острые острый острым остудился оступался оступаясь остыл остынет осудили осудит осудить осудят осуждает осуждайте осуждала осуждаю осужден осуждена осужденные осужденных осунулось осунулся осушая осуществилось осуществился осуществится осуществить осуществлялась осчастливил осчастливили осчастливленный осыпавшимися осыпает осыпается осыпала осыпалась осыпали осыпанье осыпать осыпка осыплет осьмерка от отбивался отбивные отбился отбирал отбить отблагодарила отблеск отблеском отборно отборный отбрасывать отбросив отбросил отбросила отброшенный отваги отвагой отвагу отваживает отважно отважные отважный отвален отваливать отвалил отведает отведать отведет отведу отвезите отвезли отвезти отвека отвел отвела отвергался отверделость отверженец отверженности отверзалась отверзалось отверзлась отверзший отвернется отвернувшееся отвернувшись отвернулась отвернулись отвернулся отвернуться отвертывалась отвертываться отвесной отвесов отвести ответ ответа ответе ответив ответил ответила ответили ответит ответите ответить ответишь ответный ответов ответом ответственности ответственность ответу ответы ответь ответьте отвечает отвечаешь отвечай отвечайте отвечал отвечала отвечали отвечать отвечаю отвечая отвечу отвешивает отвинчу отвлекайся отвлекала отвлекали отвлеченен отвлеченные отвлеченных отвлечь отво отвода отводил отводили отводило отводит отводить отводу отводя отводят отвожу отворачивалась отворачиваться отворачивая отворачиваясь отворен отворена отворенной отворенною отворенную отворенные отворенных отворены отвори отворив отворившего отворил отворила отворилась отворили отворились отворит отворите отворить отворотами отворотил отворотился отворотит отворотясь отворю отворявшийся отворяет отворяется отворяешь отворяй отворяйте отворял отворялась отворялись отворяль отворять отворяться отворяют отворяются отворяя отвратил отвратительна отвратительная отвратительнее отвратительно отвратительной отвратительные отвратительный отвратительным отвратить отвращение отвращением отвращении отвращения отвращенье отвык отвыкла отвыкни отвычки отвяжешься отвяжутся отвязал отвязался отвязать отвязаться отгадать отгадываний отговаривался отговариваться отговорил отговорился отговорка отговорки отголосок отгони отгонит отгонишь отгонял отгоняющее отгороженное отгуляется отдававшаяся отдавал отдавала отдавали отдавался отдавать отдаваться отдавят отдадите отдает отдаете отдается отдай отдайте отдайтесь отдал отдала отдалась отдалении отдаленно отдаленного отдаленное отдаленной отдаленную отдаленные отдаленный отдаленным отдаленных отдали отдалить отдалял отдам отдан отданию отдано отдаст отдать отдаться отдашь отдаю отдают отдаются отдающих отдающуюся отделан отделанную отделаться отделаюсь отделение отделении отделению отделения отделенная отделено отделенье отделкой отделку отделом отделываемой отделывается отделывали отделывать отделываться отделывают отдельная отдельно отдельное отдельной отдельные отдельных отделявший отделяла отделялась отделялось отделять отделяющей отдернул отдернула отдохнем отдохнет отдохнешь отдохни отдохните отдохновенья отдохну отдохнув отдохнул отдохнула отдохнуть отдых отдыха отдыхает отдыхаете отдыхал отдыхала отдыхать отдыхаю отдыхая отдыхиваясь отдыхнувшись отдыхнулся отдыхом отдышалась отдышаться отекать отекшим отель отеля отер отерла отец отеческое отеческой отечества отечественные отечеству отжившее отжила отжило отзвук отзвучие отзовется отзыв отзыва отзывается отзываешься отзываться отзывов отзывом отзыву отзывы отиравшую отирать отирая откажемся откажет откажетесь откажется откажите откажитесь откажу откажусь отказа отказавшись отказал отказала отказалась отказали отказались отказался отказано отказать отказаться отказом отказывает отказываете отказываетесь отказываешься отказывайтесь отказывал отказывался отказываться отказываюсь откапывать откармливались откашливаться откашлянулся откидные откидываясь откинув откинула откинутом откладывал откладывать откладывая откланивалась откланиваться откланиваясь откланялась откланялся откланяться откликалось откликаясь отклики откликнулась откликнулся отклонив отклонить отколотил отколотили откомандировала откопал откопать откровенен откровенна откровеннее откровеннейшим откровенничает откровенничал откровенно откровенное откровенности откровенностию откровенность откровенностью откровенною откровенны откровенный откровенных откроет откроется открой откройся открою откроют откроются открыв открывает открывается открывай открывал открывалась открывались открывалось открывался открывать открываться открываю открываются открывающимся открывая открыл открыла открылась открыли открылись открылось открыт открыта открытая открытие открытием открытий открытию открытия открыто открытое открытой открытом открытою открытую открыты открытый открытым открытыми открытых открыть открыться откуда откудова откупа откупам откупиться откупорил откусила откусить откушать откушенной отлагается отлагал отлегло отлетевший отлетел отлетела отлетит отлива отливались отливало отливать отличаемся отличает отличались отличалось отличат отличать отличаются отличие отличительные отличить отличная отличнейший отлично отличное отличной отличном отличную отличные отличный отлогие отлогих отложат отложенные отложив отложил отложили отложим отложите отложить отложу отломанными отломок отлучался отлучился отлучиться отлучке отлучусь отмалчивается отмахивается отмахивался отмахиваются отмахиваясь отмахнувшись отмахнулась отменили отменить отменном отменные отменный отменят отменяю отметил отметили отметки отметкою отмеченный отмеченных отмечены отмоешь отмыв отмывает отмывал отмывать отмыли отнекивалась отнекивался отнес отнесет отнесла отнеслись отнесло отнесся отнести отнесу отнимает отнимается отнимал отнимала отнимало отнимаю отнимая отнимет отнимите отниму отнимут относил относилась относились относительно относительны относитесь относится относиться относятся относящимся отношение отношении отношений отношения отношениях отношусь отныне отнюдь отняв отнял отняли отнялись отнять ото отобедаем отобедайте отобедал отобедать отобедаю отобрал отобрать отовсюду отогнал отогнать отогрел отогрелось отодвигалась отодвигалось отодвигаться отодвигая отодвигаясь отодвинув отодвинул отодвинула отодвинулось отодвинулся отодвинуть отозвал отозвалась отозвалось отозвался отозваться отойдет отойди отойду отойдя отойти отолстел отолщением отомкнув отомкнула отомстить отопление отопрет отопрется оторвавшись оторвала оторвало оторвать оторваться оторвет оторопев оторопевший оторопел оторопью отослал отослана отошел отошла отошли отошло отощал отпаривал отпевании отпейте отпер отперла отперли отперта отпертая отперто отпертой отпертую отпечаталась отпечатают отпив отпивая отпил отпирает отпирала отпирать отпирают отписал отпихнул отпихнула отпихнуть отплясывал отполированного отпор отправившимся отправил отправила отправилась отправили отправились отправилось отправился отправимся отправитесь отправится отправить отправиться отправишь отправишься отправлен отправление отправлении отправлений отправления отправлениях отправленью отправлю отправлюсь отправляетесь отправляется отправляйся отправляйтесь отправляла отправляли отправлялись отправлялось отправлялся отправляю отправляюсь отправляются отправляющей отправляясь отправятся отпраздновать отпрыск отпрыске отпрыски отпрыску отпуск отпускает отпускай отпускала отпускалась отпускали отпускалось отпускать отпуская отпусти отпустил отпустит отпустить отпустят отпущен отпущено отрав отрава отрави отравил отравилась отравился отравитесь отравится отравить отравиться отравлений отравленной отравленный отравленным отравлены отравляешь отравляла отравлять отрада отрадно отрадного отражалось отражать отражаться отражаясь отражение отражением отражены отразились отразилось отразился отразится отрапортует отрастил отрезал отрезала отрезали отрезанные отрезанный отрезать отрезвел отрезвился отрезвись отрезвится отрезвляется отрезвлялась отрезвляло отрезвляться отрезвляющее отрезвляясь отрезков отрекаетесь отрекался отрекаюсь отрекомендовался отрекомендовать отрекомендоваться отрекся отретировался отречение отречения отречься отрешиться отринь отрицаем отрицаете отрицал отрицания отрицатели отрицательная отрицательно отрицательный отрицаю отрицают отрицая отроду отродясь отрочества отрочестве отрубленную отрывает отрывается отрывал отрывала отрывать отрывая отрываясь отрывисто отрывистые отрывки отрывок отрывочно отряд отряс отсвет отсветы отсидела отскакивавший отскочи отскочив отскочил отскочила отскочило отслужив отслужить отслужу отсохли отсрочивать отсрочишь отставал отставила отставке отставку отставного отставной отставные отставшие отставший отставшими отстает отстал отстала отстали отстало отсталого отсталостью отсталый отсталых отстанет отстану отстанут отстань отстаньте отстать отстаю отстегнул отстоял отстоялся отстранились отстраняются отстраняя отстраняясь отстряпает отступает отступало отступающих отступая отступив отступил отступила отступили отступило отступилось отступишься отступлю отступлюсь отступятся отсутствие отсутствием отсутствия отсутствующему отсутствующих отсчитал отсчитывать отсылал отсылать отсыпной отсюда оттает отталкиваешь отталкивал отталкивала отталкивали отталкивало отталкивать отталкивающее отталкивающий отталкивающим отталкивая оттащить оттенка оттенком оттенок оттеняет оттеняется оттепель оттепелями оттер оттерты оттого оттолкнет оттолкнешь оттолкнув оттолкнул оттолкнула оттолкнутый оттопырившуюся оттрепал оттреплет оттуда оттудова оттянулось отуманенный отуманивать отупения отупеть отупления отучены отучит отхаркивая отхаркнулась отхватит отхлебнул отхлебывая отхлеставшему отхлестал отхлынет отхлынули отходи отходил отходила отходит отходной отходят отца отцам отцами отцах отцвели отцветшего отцветшими отце отцов отцовские отцовский отцовских отцовскую отцом отцу отцы отчаивайся отчаивался отчаиваться отчасти отчаявшийся отчаялась отчаялись отчаяние отчаянием отчаянии отчаянию отчаяния отчаянная отчаяннее отчаянно отчаянного отчаянное отчаянной отчаянном отчаянному отчаянною отчаянные отчаянный отчаянным отчаянных отчаянье отчаянью отчаянья отчаяться отче отчего отчеканивая отчеканила отчеканилась отчеканился отчество отчет отчета отчетливо отчетливым отчетов отчетом отчеты отчизне отчизну отчий отчиной отчистил отчищенных отчуждения отшатнулась отшатнулся отшельник отшибло отшумела отщелкивая отъезд отъезда отъезде отъездом отъезду отъезжают отъезжающими отъехав отъявленного отъявленной отъявленным отыскав отыскал отыскала отыскалась отыскали отыскались отыскалось отыскался отыскание отыскать отыскивает отыскиваете отыскивал отыскивали отыскивать отыскиваются отыскивая отыщется отыщешь отыщи отягчены отяжелевшую отяжелели отяжелелой офелию офицер офицера офицерами офицеров офицером офицерская офицерских офицеру офицеры официально официальное официальном официальный официальным официальных офранцузят ох охает охаете охала оханье оханья охапку охая охватил охватила охватили охватило охватить охватывавшим охватывал охватывала охватывают охвачен охваченный охи охладевших охлаждается охлаждался охлаждение охлаждений охмелел охмелия охмеляющего охнет охнуть охо охота охоте охотку охотливо охотнее охотник охотника охотничья охотно охотой охоту охоты охранению охраню охраняет охраняя охрипший охта оцененного оцени оценила оценили оценить оценку оцепенеет оцепенел оцепенело оцепенелой оцепенении оцепенения оцепили оч очаг очами очаровал очарован очарована очарование очарования очарованной очарованный очарованье очарованьи очарованьям очарователен очаровательна очаровательная очаровательнее очаровательнейшая очаровательное очах очевидно очевидное очевидности очевидность очевидную очевидные очевидный очей очень очереди очередь очерк очертаний очертания очертанье очертил очертя очи очинил очините очинна очистительных очистить очистки очищает очищать очищений очию очками очках очки очнетесь очнется очнешься очнитесь очнувшийся очнувшись очнулась очнулись очнулся очнусь очнуться очумел очутившиеся очутившийся очутилась очутились очутилось очутился очутиться очутясь ошеломлен ошеломленная ошень ошибаетесь ошибается ошибаешься ошибалась ошибались ошибался ошибаться ошибаюсь ошибаются ошибетесь ошибиться ошибка ошибке ошибки ошибкой ошибку ошиблась ошиблись ошибок ошибочка ошибочно ошибочное ошибочных ошибся ошибусь ощеришься ощетинившейся ощетинился ощупав ощупал ощупом ощупывает ощупывая ощупью ощутил ощутилась ощутим ощутительно ощутить ощущаете ощущается ощущал ощущала ощущать ощущаю ощущая ощущение ощущением ощущении ощущений ощущения п па пава павел павильон павильонам павла павлиний павлиньи павлиньим павлович павловна павловне павловной павловну павловны павловцы павлыч павлыча пагубная падает падай падал падала падаль падать падаю падают падающего падающие падеж падение падением падении падений падения паденье паденья падет падут падучая падчерицей падчерицы падшая падшей падшему падший падшим падших падшую паек пазухи пазуху пай пакет пакетом пакеты пакостей пакостно пал пала паладином палата палате палату палаты пале палевые палевых палец пали палил палим палима палимый палит палка палки палкой палку палладиумом палочкой палочку пальбу пальмерстон пальмерстоном пальмой пальмы пальто пальца пальцам пальцами пальцах пальце пальцев пальцем пальцы пальчик пальчика пальчики палящая палящих памятен памяти памятлив памятниками памятно памятной память памятью пан панаму пане панегирика панели пани панихид панихиду панихиды панический панического панн панна панорама панораму панорамы пансион пансиона пансионе пансионерки пансионной панталон панталонах панталоны пантелеевич пантелеевна пантелеич пантера панцырь папа папаша папашей папаши папашу папенька папеньке папеньки папенькой паперти папильотках папирос папиросами папиросками папироски папироской папироску папиросочка папиросочку папиросочница папиросочницу папиросу папиросы папка папку папочка папочку папы папье пар пара параболу парада парадная парадное парадному парадных параду парализованы паралич паралича параллель параллельно параша парашу пардон паре парень пари париж парижа париже парижем парижских парижскому парикмахера париком парили парит парк парка парке парку пармезаном парней парнем парнике парнишка парнишкой парню парня парой пароксизме пароксизмы паром пароход парохода пароходах пароходе пароходы партере партией партии партию партия пару парус парусах парчами парче пары пас паску пасли пасмурна пасмурно пасмурный паспорт паспорта паспорте паспорту паспорты пассивно пастил пастух пастухи пастухом пастуший пастушков пастырь пасть пастья пасхальной пасхальный пасхе пасхи пасху пасьянс патентованных патер патетическая патетически патетических патетической патетическому патетическую патоки патриархальной пауза паузы паук пауке пауки пауком паутина паутиной паутину паутины паф пафос пахарь пахать пахло пахнет пахнул пахнуло пахнут пациент пациента пациентами пациентов паче пачками пачки пачку пачтенье пачулями пашен пашенька пашеньке пашеньки пашенькой пашка пашой пашут паях паяц паяца пе певал певали певец певица певицы певуче певучее певучей певучий певучим певучих певца певцы певчий певчих певший пегашке пегашку пегую педагог педагогическом педагогов педант педантизма педантизмом педантически педантических педантическое педантическую педанты пей пейзаж пейзажа пейзажах пейзажей пейзажи пейзажист пейсы пейте пекли пеклись пекут пекущуюся пел пела пелагеи пелагея пеленами пеленах пеленой пелериной пели пело пена пене пени пение пением пении пенится пения пенки пенкин пенкиным пеной пеною пенсии пенсион пенсиона пенсионе пенсионишко пенсионом пенсию пень пенье пеньем пеньи пеняй пеняйте пенять пепел пепельницу пепла пеплом пера первая первейшая первейшими первенец первенство первенствующей первенствующую первенца первенце перво первобытною первобытный первого первое первозданный первой первом первому первоначальная первоначально первоначальное первоначальную первоначальные первоначальный первоначальных первою первую первые первый первым первых пергаментным пергаментом перебегал перебегала перебегающими перебежала переберем переберет переберешься переберись переберусь переберут перебиваете перебивай перебивал перебивалась перебиванья перебиваюсь перебивая перебившего перебил перебила перебили перебирает перебирается перебирай перебирала перебирать перебирая перебиты перебить переболело перебороло перебором перебрал перебрала перебранился перебраны перебрасывается перебудить перебывает перебывали перебывало перебьют перевале переваливаться переваливаясь перевалился переварилась переварится переварить переведет перевез перевезенную перевезли перевезут перевел перевелась перевели перевелись перевернется перевернул перевернула перевернули перевернуло перевернулось перевернулся перевернуть перевертел перевертывается перевертывать перевертываясь перевес перевести перевесть перевиваясь перевод переводе переводили переводить переводной переводов переводом переводы переводя переводят перевожу перевозиться перевозка перевозки перевозят переворачивается переворачивался переворачивая переворот переворотил переворотов переворочаешь переврала перевязан перевязана перевязочным перевязь переглотают переглянулась переглянулся перегнувшись переговаривал переговорено переговорив переговорил переговорили переговорить переговоры переговорю перегорает перегородке перегородки перегородкой перегородку перед передав передаваемая передаваемой передавал передавала передавали передавать передавая передадите передает передайте передал передала передали передам переданной передаст передать передаю передвигая переделает переделаешь переделала переделано переделать переделки переделку переделывать передергивая передернуло передернут передке передней переднюю передо передовые передовыми передовых передом передразниванье передразнил передумав передумает передумал передумала передумано передумать передумывать передушил переедем переедет переедете переедешь перееду переежаем переезд переезда переезде переездкой переездом переезжаем переезжает переезжай переезжайте переезжал переезжали переезжать переезжаю переезжают переезжая переехал переехала переехали переехать переешь переждав переждал переждать пережевывает переживает переживала переживаю переживем переживешь переживу пережил пережит пережитых пережить перезрелою переименованы переимчивым переиначил переиначить перейдет перейдете перейдут перейдя перейти перекидывались перекинет перекинул перекинуть перекладных перекладывал перекладывала перекликаются перековать перекосились перекосилось перекрестив перекрестившись перекрестила перекрестилась перекрестился перекрестись перекрестит перекрестка перекрестках перекрестке перекрестку перекрестный перекресток перекрестясь перекрещивались перекрещивания перекрещиваясь перекрещу перекупленном перекусил перелетная перелетные переливает переливается переливы перелилась перелистывает перелистывай перелистывал перелистывать перелистывая переложил переложили переложит перелом перелома переломали переломаны переломится перельет перемелется перемен перемена перемене перемени переменившемся переменившимся переменил переменила переменилась переменили переменились переменилось переменился переменит перемените переменить перемениться переменного перемены переменяете переменят переменятся переменять переменяя перемешал перемешались перемешаны переминаясь перемолвить перемолола перемыть перенес перенесемся перенесении перенесено перенесет перенесешь перенесла перенесли перенесся перенести перенестись перенесть перенесу перенесут переносил переносила переносили переносит переносится переносить переносье переночуют переношу перенял перенятым переодеться перепало перепахали перепел перепела перепелки перепелов переписал переписана переписать переписка переписывали переписыванье переписывать перепишет перепишете переплет переплете переплетом переплыть переползанье переползанья переполненном переполненный переполнено переполнять переправил переправился переправить переправиться перепродает перепродай перепугается перепугаешься перепугалась перепугались перепугался перепуганная перепуганные перепуганными перепутает перепутаешь перепуталось перепутанные перепьются переработалась перерастает перервал перерву перерезывала переродило перерождавшаяся перерождения перерыв перерывая перерыву перерывы переряженный пересветов пересекло пересел переселился переселимся переселить переселиться переселяемся пересиливает пересиливал пересиливая пересилил пересилила перескажу пересказал пересказать пересказывал пересказывала пересказывать перескакивали перескакивать перескочить переславший переслал переслать переслюнить пересмешку пересмотрю пересохли пересохло переспорил переспрашиваемый переспросил переспросила перессорились перестав переставал переставала переставали переставало переставая переставил переставлял перестает перестал перестала перестали перестало перестанем перестанемте перестанет перестанете перестанешь перестану перестанут перестань перестаньте перестать перестаю перестают перестрадать перестройки перестройку переступал переступать переступают переступая переступив переступил переступила переступить пересуды пересчитаешь пересчитал пересчитают пересчитывал пересылала пересылались пересыпать перетаскано перетаскивать перетасовка перетащил перетащить переулка переулкам переулках переулке переулки переулком переулку переулок перехватила перехватить перехитренному переход перехода переходил переходила переходили переходим переходит переходить переходя переходят переходящее перехожу перечислил перечитав перечитал перечница перешагнешь перешагнул перешагнуть перешедшее перешедший перешел перешептал перешептываться перешивал перешить перешла перешли перешло перещеголял перил перила перилам перин перине перинное перины период периодически периодического периодической периодическою периодическую периферию перл перламутра перлы перо пером персидским персидской персидском персики персон перспектива перспективе перспективой перспективу перспективы перст перстень перстней перстнем перстнями перстнях перстом персты перу перуджийский перуджино перуджия перунами перцу перчатками перчатках перчатке перчатки перчатку перчаток першить перышко перьев перья перьями пес песен песенка песенки песенку песенники песенников песка песках песке пескину песком песку песне песней песни песнию песнь песнью песню песня песнями песнях песок пестиком пестрая пестреют пестрили пестрит пестрой пестротой пестрою пеструшка пестрые пестрый пестряков пестрякова пестряковым песчаной песчаную песчаные песчинка петей петель петербург петербурга петербурге петербургом петербургская петербургские петербургский петербургским петербургских петербургское петербургской петербургском петербургскую петербуржцу петле петлей петли петлицах петлице петлицу петлю петля петр петра петре петров петрович петровича петровиче петровичем петровичу петровки петровна петровне петровной петровну петровны петровский петровского петроградское петроградском петром петру петруша петрушка петрушке петрушку петруща петух петуха петухи петухом петушка петь петя пехота пехоте пехотного печалей печален печали печалились печалить печалиться печаль печальна печальная печально печального печальное печальной печальном печальною печальный печальным печальных печалью печалями печами печатаете печатаетесь печатал печататься печати печатными печатных печать печатью печени печений печенка печенками печеную печенья печет печи печке печки печкой печку печь пешкой пешком пешни пещера пещеры пианино пива пивал пивали пиво пивом пивца пивцо пигмалионом пидерита пиджак пиджаке пиджаки пик пикирована пикированная пикированным пикнуть пил пила пили пилит пилот пилюли пилюлю пион пир пира пирамидами пирамиду пирамиды пире пиров пирог пирога пироги пирогов пирогом пирожка пирожки пирожное пирожных пиру пирушках пиры писал писала писали писало писалось писан писаний писанное писанный писаных писаньем писанья писаньям писарем писаришек писаришками писаришки писарь писаря писателей писателем писатели писатель писательницей писательство писателю писателя писателям писателями писать писем писец писк пискариной пистолет писца писцов писцом писцы писывал письма письмах письме письменно письменном письменному письменный письменным письмецо письмецом письмо письмоводителем письмоводитель письмоводителю письмоводителя письмом письму питает питал питала питалась питали питание питать питаться питаю питая питаясь питейное питейной питер питере питером питии питомник питомце питомцы пить питье питья пиф пишем пишет пишете пишется пишешь пиши пишите пишу пишут пишутся пишущим пища пищали пищат пище пищи пищик пищика пищиков пищиком пищику пищу пиявки пла плавает плаванье плавать плавающих плавит плавно плавное плавный плакавшие плакавших плакавшую плакал плакала плакали плакались плакало плакаль плакать плакиды плаксиво плакун пламенем пламени пламенная пламенной пламенном пламенные пламень пламя план плана планами планах плане планет планета планетах планете планету планеты планом планомерно плантатору плану планы пластинка пластырями плат платеже плати платил платили платиль платим платит платите платить платишь платка платками платке платки платком платку платок платона платочек платочком платою плату платформу платье платьев платьем платьица платьице платьишке платьишки платью платья платьями платьях платя платят плач плача плаче плачевный плачевным плачем плачет плачете плачешь плачу плачут плачущее плачущим плачущих плачущую плачь плачьте плащ плаща плащах плаще плащом пле плевал плеванье плевны плевок плед пледом плелся племени племянника племянниками племянница плен пленительна пленительные пленная пленник пленной плену пленя пленял пленялись пленяло пленять плеск плесканье плесну плеснул плеснула плеснули плести плетеных плетень плетет плети плетнем плетни плетня плетут плеч плеча плечам плечами плечах плече плечи плечики плечо плечом плечу плешивым плещет плибьет плисовых плита плитам плитами плитах плите плиты плод плодами плодить плодов плодотворная плодотворной плоды плоские плоско плоти плотин плотине плотнее плотник плотника плотно плотного плотное плотной плотный плотных плоты плоть плох плоха плохая плохие плохих плохо плохонький плохую плоше площади площадка площадке площадки площадная площадной площадь площадям площадях плугом плут плутоват плутоватою плутовка плутовская плутовски плутовскою плутовства плуты плывет плыви плыл плыла плыло плыть плэд плюгавенький плюй плюнет плюну плюнул плюнуть плюнь плюс плюх плющами плющей плющом плюю плюют пляс плясала плясать пляске пляски плясуна плясунья пляшет пляши пляшут пни по побагровел побаивается побаивались побег побегом побегу побегут побегушках победа победам победами победе победил победила победителем победителя победить победно победный победой победоносцев победоносцевым победу победы побежал побежала побежали побежало побеждает побеждал побеждать побеждающею побеждая побежден побежденный побеждены побежит побелевшею побелевшие побелел побереги поберегите поберегся поберечь поберечься побеспокоили побеспокоить побивали побили побираться поблагодари поблагодарил поблагодарила поблагодарить поблагодарю побледнев побледневшая побледневшими побледнеет побледнел побледнела побледнели побледнело побледнеют поблек поблекла поблекли поблекло поблеклые поблекнет поблекшей поблескивает поближе поблизости пободрее побоев побожился побожиться побои побоится побой побоку поболее поболтать побольше поборемся поборол поборюсь побрал побранят побрезгуй побрился побродив побросать побудившие побудила побудили побудило побуду побудь побудьте побуждали побуждение побывали побывать побываю побыл побыстрей побыть побьет повадился повадки повадятся поважнее повалил повалилась повалило повалилось повалился повалится повалиться повальной поваляться повар повара поварам поваров поваром повару поведать поведение поведении поведению поведения поведет поведешь поведу поведут повез повезет повезли повезут повел повела повелевала повелел повеление повелению повелено повели повелитель повелительно повелительным повело повелось повергает повергался поверенная поверенного поверенному поверенный поверенным поверженный поверженным поверив поверил поверила поверили поверим поверит поверите поверить поверишь поверка поверкою повернись поверну повернувшийся повернувшись повернул повернула повернулась повернули повернулся повернуть повернуться поверстный повертел повертеться повертит повертывая поверх поверхностно поверхностным поверхность поверь поверьте поверю поверяет поверял поверяла поверят поверять повеселевшим повеселее повеселеет повеселел повеселела повеселели повеселело повеселись повеселиться повеселясь повесив повесившиеся повесил повесить повеситься повествовала повествований повествовать повествует повести повестка повестке повестки повестку повесть повестям повестях повесьте повеся повечеру повешенные повеял повеяло повивальными повивальных повидавшись повидать повидаться повилика повинился повинною повинных повиновались повиновался повинуйся повинуются повинуясь повинюсь повис повисали повисла повисли повиснет повисшей повисшими повита повито повихнулся повлекли повлекло повлияло повнимательнее повод повода поводил поводом поводу поводя поворачивает поворачивается поворачивайся поворачивался поворачиваю поворачивая поворачиваясь поворот поворота повороте поворотил поворотили поворотился поворотить поворотом поворчав повострее повредил повредит повредить повредиться повсеместно повсеместных повстречался повстречаться повсюду повторение повторения повторенья повтори повторившееся повторил повторила повторили повторило повторился повторим повторит повторится повторить повторю повторяет повторяйте повторял повторяла повторялись повторялось повторялся повторять повторяться повторяю повторяют повторяются повторяя повыскочит повысосет повыше повышение повяжется повязан повязаны повязки повязкой повязку поганая поганую погас погасал погасали погасания погаси погасил погасит погасла погасли погасло погасшие погиб погибает погибайте погибал погибало погибают погибающий погибели погибельные погибелью погибла погибли погибло погибнет погибну погибнуть погибший погибшим погибшими погладил поглотила поглотит поглощаются поглощен поглощенные поглубже поглумился поглупее поглупел поглупеть поглядев поглядел поглядела поглядели поглядело поглядеть погляди поглядим поглядит поглядите поглядишь поглядывает поглядывал поглядывала поглядывали поглядывая погляжу погнал погнался погнушались поговаривали поговаривать поговаривают поговори поговорила поговорили поговорим поговорит поговорить поговоришь поговорю погогочут погода погоде погоди погодили погодите погодой погоду погоды погодя погожу погони погонит погоняя погордиться погорел погорельцам погорельцев погорячатся погорячился погорячится погостить погреб погреба погребаете погребался погребальный погребальными погребе погребена погребла погребло погрозив погрозил погрозила погрознее погромче погружается погружалась погружались погружался погружаюсь погружаясь погружен погружена погруженная погруженный погрузил погрузилась погрузились погрузилось погрузился погрузит погрузится погрузиться погрузишь погрузясь погрустив погрустим погубил погубило погубит погубить погублю погуляет погуляй погуляйте погулял погулять погуще под подавай подавал подавала подавали подавался подавать подавая подавила подавить подавленной подавленный подавленным подавленными подавлю подавлять подавляются подавляя подавно подагра подагрика подагрой подагру подагры подадим подадите подадут подает подаешь подай подайте подал подала подалась подали подались подальше подам подан поданный подано поданы подаренное подаренный подари подарил подарила подарили подарите подарить подарка подарки подарков подарок подарю подаст подати подать подаче подачки подашь подаю подают подающий подающим подаяние подбегая подбегут подбежал подбежала подбежали подберу подбивался подбирает подбирала подбирался подбираю подбирающийся подбирая подбитые подбитыми подбородка подбородком подбородок подбоченясь подбросить подвал подвалы подвальной подвальные подвальный подведет подведешь подвел подвела подвели подвергаемся подвергается подвергалось подвергать подвергаться подвергаются подвергнуться подвернись подвернувшемуся подвернулась подвернулся подвести подвиг подвига подвигается подвигалась подвигалось подвигался подвигах подвигая подвигаясь подвиги подвигла подвигов подвигом подвижника подвижною подвинет подвинув подвинул подвинулась подвинулись подвинулось подвинулся подвинуться подвластной подвода подводах подводил подводит подводить подводя подвожу подворотню подвох подвязанными подгадил подгадить подгибались подгибаться подгибая подглядели подглядывавшая подгоняют подгорелое подгорит подгорюнившись подготовки подготовленного подготовлено подготовлюсь подготовляет подготовлялось подгреб подгулял поддавалась поддавался поддадим поддадитесь поддаешься поддакивал поддакивала поддакивали поддакивают поддакивая поддакнул поддалась поддали поддало поддамся поддаст поддаться поддеваете поддевке поддел подделаться подделаю подделывал подделывали подделывателей поддержания поддержать поддержи поддерживаемую поддерживает поддерживал поддерживала поддерживалась поддерживало поддерживать поддерживая поддержите поддержки поддразнивает поддразнивал поддразнивающие подействовал подействовала подействовали подействовало подействовать подействует поделаешь поделится поделиться поделом подержанная подери подернулась подернулось подернут подернутое подернутый подернутым подерутся подешевле поджав поджатым поджег поджигал поджигающее поджидал поджидала поджидали поджидаю поджимают поджимая подзадоривая подзатыльника подземелье подземной подземный подземных подзывал подзывали поди подивился подивитесь подите подкараулил подкараульте подкатил подкинуть подкипятило подкладке подкладкой подкладку подкладывал подкладывала подкладывая подков подколенки подколодная подкосились подкрадется подкрадутся подкрепил подкреплена подкрепляющим подкупишь подлаживаясь подлая подле подлее подлейшего подлейший подлейшими подлейших подленькая подлец подлеца подлецов подлецом подлецу подлецы подливку подлил подлиннее подлинно подличают подло подлого подлое подложил подлой подлости подлость подлые подлый подлым подлых подманить подмахнет подмахни подмахнул подмести подметено подметишь подметки подметок подмигивать подмигивающего подмигивающий подмигивая подмигнув подмигнул подмонтироваться подмывает подмывало подмышки подмышкой поднадул поднадуть поднес поднести поднеся поднимавшийся поднимает поднимается поднимай поднимал поднимала поднималась поднимали поднимались поднималось поднимался поднимать подниматься поднимаю поднимают поднимающихся поднимая поднимаясь поднимем поднимет подними поднимут подноготную поднос подноса подносе подносом подносы подносят подняв поднявшаяся поднявши поднявшись поднявшую поднял подняла поднялась подняли поднялись поднялось поднялся поднят поднятым поднять подняться подо подобен подобие подобно подобного подобное подобной подобном подобную подобные подобный подобным подобных подобострастия подобострастничал подобострастно подобрав подобрал подобрала подобрали подобрался подобраны подобру подогнутых подогревалась подогреть пододвинул пододвинула подожгла подождав подождал подождать подождем подождет подождешь подожди подождите подожду подожму подозвала подозревает подозреваете подозревай подозревал подозревала подозревали подозревать подозреваю подозревают подозревающего подозревая подозрение подозрением подозрении подозрений подозрению подозрения подозрениях подозрителен подозрительна подозрительно подозрительного подозрительное подозрительным подозрительными подозрительных подойдет подойдешь подойди подойду подойдя подойти подоконник подоконниках подол подолгу подолом подольстилась подольщаюсь подонки подонкихотствовать подопрет подосадовал подослал подоспеет подоспела подоткнул подох подошвы подошедшего подошедшею подошедший подошел подошла подошли подошло подошлю подпевает подпевал подпевать подпер подпереть подпившая подпирал подпираясь подписал подписала подписан подписана подписано подписать подписи подписка подписки подписку подписывает подписывал подписывалась подписывались подписывать подписывают подписывая подпись подписью подпишет подпишете подпишешь подпишитесь подпишу подползает подползи подползла подполковник подпольи подпоручик подпруга подпрыгивал подпрыгивать подпрыгивая подпрыгнула подпускать подрагивали подражать подразделений подразделения подразнить подразумевать подрал подрались подрался подрастают подрезаны подрезать подробная подробнее подробничать подробно подробностей подробности подробность подробностью подробностями подробностях подробную подробные подробный подрос подросли подруг подруга подругам подруге подруги подругою подругу подружились подружится подряд подрядами подряде подрядчик подрядчиками подрядчиков подряжают подсвечник подсвечнике подсвистывает подсекают подсекли подсекло подсел подскажи подсказанное подсказывает подсказывала подсказывать подсказывая подскакивала подскочил подскочит подслеповатыми подслепые подслуживается подслушал подслушала подслушанные подслушивай подслушивайте подслушивал подслушивали подслушивать подсматривать подсмеиваешься подсмотреть подсобить подсочиненные подставил подставит подставить подставляет подставлять подставляя подстерегает подстерегают подстерегающую подстеречь подстилая подстрекательные подстреленный подстрелили подстрелю подстригали подступившего подступило подсудимой подсудимому подсудимый подсудимых подсунул подсунули подталкивал подталкивать подтвердил подтвердила подтвердите подтвердить подтверждает подтверждаешь подтверждал подтверждала подтверждало подтверждалось подтверждать подтверждаю подтверждением подтвержденное подтвержу подтибрил подтыканной подтянуть подув подувания подувая подул подумав подумаем подумает подумаете подумаешь подумай подумайте подумал подумала подумали подумалось подумать подумаю подумают подумывает подумывал подурнела подурнели подустроить подушек подушечку подушка подушке подушки подушкой подушку подхватил подхватила подхватили подхватило подхватим подхватит подхватить подхватывает подхватывают подхватывая подхлопывать подходи подходившие подходил подходили подходило подходим подходит подходите подходить подходной подходя подходят подходящего подходящее подходящей подхожу подчас подчеркнула подчиненного подчиненные подчиненных подчинила подчинились подчинить подчиниться подчинялась подшиванье подшивать подшучивают подъезда подъезжает подъезжают подъем подъеме подъемлют подъемный подъехал подымавшеюся подымавшийся подымает подымайся подымал подымалась подыманием подымать подыматься подымая подымаясь подымусь подыскивает подышать подышит подь подьяческой подьячий подьячим подьячих поедем поедемте поедет поедете поедешь поединке поединку поеду поедут поедят поезд поезда поезде поездка поездке поездки поездку поездом поезду поезжай поезжайте поела поем поест поесть поет поете поется поехал поехала поехали поехать поешь пожав пожал пожала пожалев пожалеет пожалеешь пожалей пожалейте пожалел пожалела пожалели пожали пожаловал пожаловали пожаловался пожаловать пожаловаться пожалуй пожалуйста пожалуйства пожалуйте пожар пожара пожарах пожаре пожарищ пожарная пожарные пожаров пожаром пожары пожатие пожатия пожать пожелав пожелавшим пожелаете пожелал пожелали пожеланий пожелать пожелаю пожелают пожелтевшую пожелтелая пожелтели пожертвовал пожертвовала пожертвованием пожертвовать пожертвует пожертвуете пожертвуйте поживаете поживаешь поживее поживем поживешь пожилая пожили пожилой пожилые пожилыми пожилых пожимает пожимал пожимала пожимая пожинает пожирал пожирали пожирающая пожирая пожить пожмем пожмет позабавится позаботится позаботься позабыв позабыл позабытый позавидовали позавидовать позавтракал позавтракали позавтракать позавтракаю позавчера позади позаимствоваться позаняться позах позвал позвала позвали позваль позвано позвать позволением позволения позволено позволил позволила позволили позволит позволите позволительна позволительная позволительно позволить позволь позвольте позволю позволяет позволяешь позволял позволят позволять позволяю позволяют позволяющей позвонил позвонила позвякивают позднее поздней позднейшее позднейшей позднейшие поздний поздно поздняя поздоровавшись поздороваемся поздоровался поздороваться поздоровее поздорову поздравил поздравить поздравлений поздравляем поздравляли поздравлять поздравляю поздравляют поздравь позе позевывая позже позицию позлить познав познавала познавший познаем познай познакомил познакомились познакомился познакомить познакомиться познакомлю познакомясь познал познала познании познания познаниях позначительнее позовет позови позову позовут позолота позолотой позор позора позорище позорно позорное позорном позорным позором позу позы позыв позывов позьми пои поиграть поил поила поименно поискав поискал поискать поиски поистине поит поить поищи поищите поищу пой пойдем пойдемте пойдет пойдете пойдешь пойду пойдут поймав поймает поймаешь поймайте поймал поймала поймали поймана пойманной пойманным поймать поймают поймем поймет поймете поймешь пойми поймите пойму поймут пойти пока покажет покажете покажется покажи покажите покажу покажут покажутся показав показал показала показалась показали показались показалось показался показание показанием показании показания показаниями показанная показать показаться показывает показываете показывается показывай показывайся показывайте показывал показывала показывалась показывали показывало показывался показывать показываю показывают показываются показывая покамест покатаемся покатает покататься покатилась покатились покатился покатится покатости покатываться покатых покачав покачает покачал покачала покачивай покачивая покачнулись покачнулся покаюсь покаяния покаяньи покиванием покидает покидаете покидал покидала покидали покидать покидаю покидая покинет покинешь покину покинув покинули покинутой покинуть покладая покладываться поклажа поклажу поклал поклон поклонами поклонение поклонением поклонения поклонившись поклонилась поклонился поклонись поклонится поклониться поклонник поклонника поклонникам поклонники поклонников поклонником поклонов поклоном поклоны поклонюсь поклоняется поклонялась поклонялся поклонясь поклоняются поклоняясь поклявшись поклялась поклялся поклянусь покое покоем покоен покои покоившейся покоилась покоились покоит покоится покоить покоишься покой покойна покойная покойнее покойник покойника покойнике покойников покойником покойнику покойница покойницу покойницы покойно покойного покойное покойной покойному покойною покойную покойны покойные покойный покойным покойных пококетничать поколебавшись поколебалась поколебать поколением поколений поколения поколениях поколотил поколотить покомфортнее покончен поконченный покончено покончены покончив покончил покончим покончит покончить покорев покорению покорилась покориться покормит покорна покорнейше покорнейший покорно покорного покорной покорности покорностию покорность покорностью покорную покорны покорный покорным покорных покоробило покоробился покороче покорствуй покоряется покорять покоряюсь покосившись покосился покою покоюсь покоя покоятся покоях покоящимся покрадут покраже покражи покраснев покрасневший покраснеете покраснел покраснела покрашенных покрепче покривившаяся покривившимися покрикивать покричит покровителей покровитель покровительства покровительство покровительствовали покровительством покровительствует покровы покроюсь покрупнее покрутившись покрыв покрывает покрывается покрывал покрывала покрывало покрывалом покрываясь покрывший покрыл покрыла покрылись покрыта покрыто покрытый покрытыми покрытых покрыть покрышку покуда покупает покупается покупал покупалась покупатели покупать покупками покупке покупки покупкой покупкою покупку покупные покуражился покурить покуситься покушает покушается покушался покушение покушений покушения покушениях покушенья пол пола полагает полагаете полагается полагал полагали полагать полагаю полагая полакомиться поласковее полатве полбутылки полведра полгода полгорода полдень полдневный полдня полдороге полдюжины поле полевых полегоньку полегче поледенело полежав полежал полежать полежишь полежу полез полезай полезен полезет полезла полезли полезная полезнее полезнейшая полезно полезное полезной полезною полезную полезны полезные полезный полезным полей полем полен поленами поленился поленом поленька поленьке поленькой полет полете полетел полетела полетели полетело полететь полетит полетишь полетов полетом полечивать полечка полечке полечкой полечку полечу полжизни ползает ползала ползало ползают ползет ползучем ползущий ползущих ползя поливали поливающий полились полина полине полиной полину полинявшая полинявшими полиняли полиняло полинялые полинялым полинялых полис политика политике политики политико политику политическая политически политические политический политического политической политическую полицейские полицейский полицейскому полицианом полицией полиции полицию полиция полицу полишинель полк полками полкане полках полке полки полковник полковника полковнику полковничьем полковничья полководцем полковое полковой полкомнаты полку полмиллиона полминуты полна полная полнее полнейшего полнейшем полнеть полно полноводной полного полное полной полнокровный полном полностью полноте полнотой полноты полночи полночный полночным полночными полночь полною полную полны полные полный полным полными полных полов половецкий половик половина половине половинки половинку половинных половиной половину половинчатой половины половой половому половую половчей половым полог пологой положа положат положен положена положение положением положении положений положению положения положениях положенного положенной положенные положенный положенными положено положи положив положил положила положили положим положись положит положительно положительного положительное положительной положительный положительным положительных положится положить положиться положишь положу полой полок полом поломаться поломойная полон полоненная полонский полосами полосатой полоскалась полосками полоски полоской полоскою полоску полоснет полосой полосою полости полость полосы полотенце полотенцем полотна полотнами полотне полотнища полотно полоумная полоумного полоумный полоумным полощет полощутся полпивной полпивную полслова полсотня полстакана полстраницы полсутки полтинник полтинники полтиннику полтиной полтора полторы полу полубезумною полубессмысленная полубог полубреде полувеселый полугода полугоры полудворянских полуденными полудне полудня полудремоту полукругом полумглу полумертвый полумрак полумраке полуно полуночи полуночной полунощный полуотворенной полуотворенную полуотворенные полуотворенный полуоткрыты полупоклоном полупрезрение полупрезрением полупросыпаясь полупьяных полуразвалившиеся полуразрушенные полуслове полусне полусознанием полустрадный полусумасшедшая полусумасшедших полутон полуторы полуформенной получаем получаемых получает получаете получается получаешь получал получала получало получалось получасу получать получаю получая получение получении получения полученное полученному полученные полученный полученными полученных получено получены получи получив получил получила получили получило получимши получиновничьих получит получите получить получишь получу получше полушепотом полуштоф полушубком полушубок полчаса полчасика полчища полшага полы полымем полымя полыни полынь полыхнет польза пользовавшееся пользовал пользовалась пользовалось пользоваться пользой пользу пользуетесь пользуешься пользуются пользуясь пользы полька польку польски польские польскими польских польское польстилась польстит польше польши польшу польщено полю полюбил полюбила полюбили полюбим полюбит полюбите полюбить полюбишь полюблю полюбопытствовал полюбуйтесь полюс полюсах поля поляк поляки поляко поляков полям полями поляне поляны полярный полях полячишки полячка полячков полячку полячок помадки помадой помаду помаленьку помалу помахивая помашет помедли помедлив помедлить помедля помелом помене поменее поменьше поменялись поменяться помер померанцевой померанцевую померанцевые померанцы помереть померещилось померещился померзло померк померкал померкла померкло померкший померла помертвев помертвевшая помертвевшей помертвевшие помертвевший помертвевшими помертвел помертвело померяемся поместившись поместил поместились поместился поместим поместительный поместить помесячно помехи помеченную помечтаем помешает помешаете помешал помешала помешалась помешали помешало помешался помешан помешанная помешанного помешанной помешанную помешанные помешанный помешаны помешательства помешательстве помешательству помешать помешаю помешают помещается помещалась помещался помещаются помещение помещений помещению помещения помещик помещика помещиков помещиком помещику помещица помещичьи помещу помещьчьих помилует помилуй помилуйте помимо помин поминай поминайте поминал поминала поминали поминать поминкам поминках поминки поминок помину поминутно поминутные помирает помирились помирись помирите помиритесь помириться помни помнил помнила помнили помнилось помним помнит помните помнится помнить помнишь помню помня помнят помнятся помнящего помнящей помнящую помог помогает помогаешь помогал помогала помогать помогаю помогают помогая помоги помогите помогла помогли помогло помогу помогут поможет помойные помойных помолимся помолись помолитесь помолиться помолодела помолодело помоложе помолчав помолчал помолчала помолчали помолчать помолчи помолчим помолюсь поморщились поморщился помочах помочи помочил помочь помощи помощию помощник помощнике помощники помощницей помощницы помощь помощью помоями помоях помрачался помрачен помрачение помрачения помрачилась помрет помудренее помутилось помутневший помучается помучить помыкают помыслить помыслишь помыслов помыслы помышления помышленья помышлял помягче помянет помяни помяните помянул помянут помянуть помятый помять понаведаться понадеялась понадеялся понадеяться понадобились понадобилось понадобился понадобится понадобиться понадоблюсь понадобятся понапрасну понасажать понатужиться поневоле понедельник понедельника понедельники понелепее понемногу понемножку понес понесем понесет понесется понесли понеслись понесло понесут понижая пониже понижения понизив понизилась поник поникнув понимавшая понимавший понимаем понимает понимаете понимаешь понимал понимала понимали понимание понимании пониманию понимания понимать понимаю понимают понимающею понимающие понимающим понимая понмю поновее поносил поношенное поношенном поношенные поношенный поношено понравилась понравились понравилось понравился понравится понравиться понудила понуждай понукал понуканий понукая понурил поныне понюхав понюхает понюхай понюхайте понюхала понюхать поняв понявшая понявший понял поняла поняли понят понятен понятие понятием понятии понятий понятия понятиям понятливый понятна понятная понятнее понятней понятно понятное понятны понятным понятое понять пообедав пообедаем пообедаете пообедал пообедала пообедать пообедаю пообтерся поодаль поочередно поощряли поощряю попав попавший попадавшиеся попадает попадаете попадается попадал попадались попадало попадалось попадать попадают попадаются попадая попадет попадется попадешь попадешься попал попала попалась попали попались попало попалось попался попами попасть попахивает поперек попеременно поперечного поперечных попестрей попечение попираешь попираньи пописываете поплавок поплакали поплачет поплачут поплевывающих поплевывая поплелись поплелся поплотнее поплывут попов поповича попозже попойки пополам пополз поползет поползли поползновения пополнела пополнит пополудни попользоваться попомните попона попортились попотчевать поправил поправила поправилась поправилось поправился поправимое поправит поправить поправиться поправка поправке поправки поправлю поправляет поправляется поправлял поправляют поправляя поправочки поправь поправьте поправят попрекаете попрекаешь попрекал попрекала попрекать попрекнула попреков попридержать попридержу поприща поприще поприщина поприщу попробовал попробовала попробовать попробуй попробуйте попробую попрозорливее попроси попросил попросила попросили попросит попросите попросить попроситься попросишь попросту попросят попрошу попугать попудриться попустит попутчиков попытался попытать попытаться попытка попытку попыток попятился пор пора поработает поработать порабощении поравнявшись поравнялись поравнялся порадовалась порадовался порадуется поражает поражал поражало поражен поражена пораженная пораженное пораженному пораженную пораженные пораженный пораженным поражены пораженья поразившая поразил поразила поразили поразило поразительной поразить пораньше пораспотрошили порастет порах порвала порвался порванная порвать порвется поре порешивший порешил порешили порешите порешить порицал порицанию порицать поровнявшись поровнялся порог порога порогами пороге пороги пород порода породе породить порождает порождают порожнее порой порок пороках пороков пороку поросенка поросла порослях поросята порох пороха пороховом пороховые порохом пороху порочен порочна порочного порочной порочные порочный порошками порошком порошок порошочек порою портер портером портеру портила портили портит портить портмоне портниха портнихе портнихи портного портной портному портным портными портрет портрета портретик портреты портфеля портьер пору порубки поругались поругание поруганный поруганье поруганьи порумянее поручат поручают поручение поручением поручений поручению поручения поручениями порученному поручено поручившись поручик поручика поручики поручиков поручикова поручиком поручику поручил поручился поручит поручите поручить поручиться поручу порфирием порфирии порфирий порфирию порфирия порхает порхала порхнула порхнули порцию порчи поры порыв порыва порывается порывались порывалось порывался порывами порываний порываниями порыванье порываются порыве порывисто порывистой порывистую порывистых порывов порывом порывы порылась порылся порыскать порядка порядке порядки порядков порядком порядку порядок порядочная порядочно порядочного порядочное порядочную порядочные порядочный порядочным порядочных посади посадив посадил посадила посадили посадит посадите посадить посадят посаженные посаженный посажу посватал посватался посвататься посвежее посвистывая посвятил посвятить посвящается посвящал посвящают посвящена посвящением посев поседевшие посекут поселились поселился поселить поселиться поселюсь поселятся посему посередине посетило посетит посетителей посетители посетитель посетителю посетителя посетителями посетить посетят посечь посещавшие посещавших посещаемых посещайте посещал посещали посещать посещают посещающие посещение посещением посещении посещения посещениями посеял посидев посидел посидеть посиди посидим посидит посидите посидишь посиживает посижу посильное посильную посильные посимпатичнее посиневших поскакать поскачет поскольку посконного посконные поскорее поскорей послав послал послала послали послан послания посланник посланника посланную послать после последки последнего последнее последней последнем последнему последнею последние последний последним последними последних последнюю последняя последовавшего последовавшее последовал последовала последовало последовательно последовательности последовать последствием последствий последствия последствиям последствиями последующее последующий последующим послезавтра послеобеденного послеобеденную послеобеденный пословица пословице послужила послужили послужило послужит послужить послушаем послушает послушается послушаешь послушай послушайте послушайтесь послушал послушала послушали послушались послушался послушание послушании послушанию послушать послушаюсь послушна послушно послушною послушный послушным послушными послушных послышавшийся послышалась послышались послышалось послышался послышатся послышится послышишь посматривал посматривая посмеемся посмеет посмеется посмеивается посмеиваться посмел посмели посметь посмешу посмеются посмеялись посмеяться посмирнее посмотрев посмотрел посмотрела посмотрели посмотрелся посмотреть посмотри посмотрим посмотрит посмотрите посмотришь посмотрю посмотрят поснимали пособию пособия посовестился посовестится посоветовал посоветовались посоветовать посоветоваться посоветуется посоветуюсь посолиднее посолить посоловелые посольства посох поспал поспала поспевала поспевать поспевают поспеет поспел поспеть поспешайте поспешающих поспешая поспешив поспешил поспешила поспешили поспешить поспешно поспешное поспешность поспешностью поспешные поспешу поспею поспи посплетничать поспорил посреди посредине посредника посредником посредство посредством поссорились поссорился поссорить поста поставив поставил поставила поставили поставило поставим поставит поставить поставлен поставленная поставленное поставленный поставляла поставцами поставь поставьте поставят постараетесь постарается постаралась постарались постарался постараться постараюсь постаревшем постарел постарела постарели постареть постарше постели постель постельке постельку постельное постелью постелям постепенная постепенно постепенного постепенное постепенностью постиг постигать постигнет постигнешь постигнуть постилает постилая постичь постлал постлан постлано постлать постного постный постным постой постойте постольку постом посторонился посторонись постороннего постороннее посторонней постороннем постороннему посторонние посторонний посторонними посторонних посторонннй посторонняя постояв постоял постояла постояли постоялом постояннее постоянно постоянного постоянное постоянной постоянною постоянную постоянные постоянный постоянным постоянством постоять пострадавшую пострадайте пострадать постращать пострел постреленок постричься построек построен построение построения построже построив построили построит построить постройка постройки постройкой постройку посту постукивал постукиванье постукивают постукивающим поступаешь поступал поступали поступать поступаю поступают поступи поступил поступила поступили поступило поступит поступите поступить поступка поступкам поступками поступках поступке поступки поступков поступком поступку поступлю поступок поступь поступью постучав постучал постучались постучаться постучите постучу постыдились постыдился постыдится постыдного постылая постыли постылом постылый посуда посудинкой посудинку посудите посудой посуду посуды посуетиться посущественнее посылаем посылает посылается посылаешь посылал посылала посылали посылались посылалось посылать посылают посылки посыльный посыпались посягал пот пота потаенною потанчиков потапыча потаскали потасканной потаскун потаскуна поташ потащил потащила потащили потащился поте потекла потекло потекут потемневший потемневшую потемнее потемнело потемок потеплее потере потерей потерею потери потерпели потерпите потертом потертым потерь потеря потеряв потерявший потерявшийся потерявшись потеряет потеряется потеряешь потерял потеряла потерялась потеряли потерялись потерялся потерян потеряна потерянная потерянно потерянный потеряно потеряны потерять потеснее потехи потешаясь потешится потешить потешником потешно потирает потирал потирания потирая потихоньку потише поток потока потоками потоком потолка потолки потолковее потолку потолкуем потолок потолочь потолстеет потом потомили потомки потомков потомок потомству потому потонет потонуло потонут потонуть поторапливайтесь поторговалась поторопился потосковала потратить потрафил потрачено потребления потребности потребность потребностью потребностями потребностях потребовал потребовала потребовали потребовалось потребовать потребу потребует потребуется потребуешь потребую потревожить потрепал потрепала потрепанным потрепать потрепещут потрет потрогивая потроха потрохами потрудился потрудитесь потрут потряс потрясаемый потрясает потрясающие потрясающий потрясая потрясен потрясений потрясенный потрясла потрясло поту потуг потупив потупившись потупил потупила потупилась потупленные потупляет потупляясь потупя потупясь потускли потускнели потух потухал потухший потухшим потухших потушая потушены потуши потчеванье потчеванья потчевать потчую потягивается потягиваться потягиваясь потянет потяни потянул потянулась потянулись потянуло поубавил поубрать поудалее поужинал поужинать поужинаю поумнее поумничала поутру поучался поучения поучительно поучить пофанфаронить пофилософствовал пофилософствовать пофилософствуем пофилософствуешь пофорсить похаживая похвал похвала похвалами похвалил похвалила похвалить похвалиться похвалой похвалы похвальбы похвально похвальном похвальный похвастается похвастать похвать похитил похитителя похищаю похищенных похлопотать похлопочу похлопывая похмелье похмельем похмелья поход похода походами походе походившему походил походила походили походило походимте походит походите походить походка походкой походкою походку походной походы походя похож похожа похожая похождениях похоже похожего похожее похожей похожем похожему похожи похожие похожий похожими похозяйничать похолодев похолодел похолодела похолодели похолодело похорон похоронами похоронах похоронен похоронены похоронив похоронил похоронила похоронили похоронить похоронная похоронных похороны похороню похоронят похорошела похоти похотливой похудевшая похудевшее похудеешь похудел похудела похудели похудело похуже поцеловав поцеловал поцеловала поцеловать поцелуе поцелуев поцелуем поцелуемся поцелует поцелуи поцелуй поцелуйте поцелую поцелуя поцелуями почавкав початая почаще почва почве почву почвы почел почем почему почерк почерневшее почернели почернелых почерпать почерпнешь почерпнутые почерпнуть почесал почесть почестью почесывает почесывая почетно почетном почетным почетных почетом почивает почивал почивали почивать почивают почиет почил починена починивала починили починить починки починкова починку починю почистить почитает почитаешь почитай почитал почитали почитатели почитаю почитают почище почищу почиющий почли почмокавшись почт почта почтальона почтальону почтамте почте почтение почтении почтенная почтеннейшая почтеннейшие почтеннейший почтеннейшим почтенного почтенное почтенном почтенному почтенные почтенный почтенным почти почтит почтительнейшие почтительно почтительной почтительном почтительность почтительною почтительные почтительным почтить почтмейстером почто почтовой почтовом почтовые почтовый почтовым почтовых почтой почту почты почувствовав почувствовал почувствовала почувствовать почувствует почудилась почудились почудилось почудится почуя пошаливают пошарив пошарил пошатнется пошатнулось пошатывается пошатываясь пошевелив пошевеливаться пошевелил пошевелила пошевелилась пошевелились пошевелилось пошевелился пошевелится пошевелить пошевелиться пошел пошептал пошепчут пошла пошлейшая пошлейшую пошлем пошленький пошленького пошленькой пошлет пошлешь пошли пошлите пошло пошлое пошлой пошлом пошлости пошлость пошлостью пошлые пошлый пошлых пошлю пошляками пошляков пошляком пошляческая пошто пошты пошутил пошутила пошутили пощаде пощадите пощадить пощады пощекотали пощекотать пощелкав пощелкивал пощупать поэзии поэзию поэзия поэма поэму поэмы поэт поэта поэтах поэтическая поэтически поэтические поэтический поэтическим поэтических поэтическое поэтической поэтична поэтично поэтов поэтому поэты пою поют поющую появилась появились появился появится появиться появление появлением появлении появления появляется появлялась появлялось появлялся появляться поярче пояс пояса поясе пояснее пояснений пояснить поясом поят прав права правам правами правах правая правда правде правдиво правдой правдоподобия правдоподобнее правдоподобным правдою правду правды праве праведная праведным праведными правее правел правил правила правилам правилах правило правильнее правильно правильного правильное правильной правильность правильною правильную правильным правит правительствующему правительствующий правителя правление правнуки право правовед правого правой правом правому православной православному православную православный правосудие правою праву правую правы правые правый правым прадед прадедов прадедушки празден праздная праздник праздника праздникам праздниками праздниках празднике праздники праздников празднику празднично праздничные праздничный праздничных праздно праздного праздное праздной празднолюбцами праздном праздному праздности праздность праздностью праздные праздный праздных празелень практика практике практики практиковать практику практические практический практических практического практическое практической практическую праотцам праотцев прасковьи прасковью прасковья прах праха прахе прахом прачки прачку преавенантненькая преблагополучно пребудет пребуду пребывали пребывания пребывая превесело превозмогало превозмочь превозносили превозносят превосходительство превосходительством превосходительству превосходная превосходнейшем превосходно превосходного превосходный превосходными превосходных превосходства превратил превратила превратилось превратился превратится превратно превращается превращалась превращался превращать превращаться превращения превышает превышать преглупейшею преград преграда преградах преградой преграды пред предавала предавать предаваться предал предала предам предамся предан предана предание преданий предания преданиями преданнейшая преданной преданности преданность преданностью преданную преданные преданный преданьям преданьях предаст предатели предательство предать предаться предварительно предварительные предварительных предвестником предвестье предвечном предвещало предвзятому предвзятых предвидел предвидела предвидели предвиделось предвидений предвидения предвидеть предвидите предвидится предвидя предвижу предводителя предел предела пределов предзнаменование предисловие предки предлагавший предлагаем предлагаемого предлагает предлагается предлагаешь предлагал предлагала предлагали предлагать предлагаю предлагают предлагая предлог предлога предлогам предлогами предлоги предлогом предложат предложен предложение предложением предложении предложений предложению предложения предложениями предложил предложила предложит предложите предложить предложу предместье предмет предмета предметам предметах предмете предметов предметом предмету предметы предназначена предназначенное предназначенные преднамеренно преднамеренном предо предопределение предопределением предорогие предоставив предоставить предоставляла предоставляю предостерегал предостерегаю предостережение предостережений предостережения предостеречь предосторожности предотвращение предохранял предписал предписания предписанную предписанным предписано предписываю предполагавшийся предполагавшихся предполагает предполагаете предполагается предполагаешь предполагал предполагали предполагались предполагалось предполагать предполагая предположение предположением предположении предположения предположено предположи предположив предположили предположите предположительно предположить предпослать предпочел предпочитают предпочитая предпочла предпочтение предпочтет предпримет предпринимайте предпринималась предпринимаю предпринять предприятие предприятием предприятии предприятию предприятия предприятьях предрассудкам предрассудками предрассудках предрассудке предрассудки предрассудков предрассудок предрассудочные предрассудочных предрек председателем председатель председательствует председателя предсказал предсказания предсказывая предсмертного предсмертный представали представив представившейся представил представила представилась представили представилось представительницею представится представить представиться представление представления представленный представлю представляем представляет представляетесь представляется представлял представляла представлялась представляли представлялись представлялось представлялся представлять представляться представляюсь представляя представь представьте представят предстала предстанет предстать предстоит предстоял предстояла предстояли предстояло предстоящего предстоящее предстоящей предстоящим предстоящих предубеждение предубеждений предубеждения предуведомить предуведомлены предугадает предугадала предугадывает предуготовительном предузнало предупредив предупредил предупредила предупредительна предупредить предупреждал предупреждаю предупреждена предусмотрела предусмотрительностью предчувствие предчувствием предчувствий предчувствию предчувствия предчувствиями предчувствовавшая предчувствовал предчувствовала предчувствовалась предчувствовались предчувствовалось предчувствовать предчувствует предчувствую предчувствуют предчувствуя предшественником предшествовавшие предшествовавших предъявляет предыдущая предыдущего предыдущий предыдущим предыдущую прежде преждевременно преждевременными прежнего прежнее прежней прежнему прежнею прежние прежний прежним прежними прежних прежнюю прежняя президентом презиравшие презиравших презирает презираете презирал презирала презирали презирать презираю презирают презирающие презирая презревши презрение презрением презрения презренная презренно презренного презренье презренья презрительнее презрительно презрительной презрительный презрительным презрительных преимуществ преимущества преимуществе преимущественно преимущество преимуществом преимуществу преисподнего преисполнена преисполнила преклонив преклонила преклонился преклониться преклонный преклоняться прекомические прекословь прекрасен прекрасна прекрасная прекраснее прекрасней прекраснейшая прекрасно прекрасного прекрасное прекрасной прекрасном прекрасною прекрасную прекрасны прекрасные прекрасный прекрасным прекрасными прекрасных прекратив прекратились прекратило прекратить прекращалось прекращаю прела прелести прелестная прелестней прелестной прелестные прелестный прелесть прелестями преломились преломлялись прельститься прельщает прелюбопытный премило премудрости премудрствующие премудрые премудрых пренаивно пренебрег пренебрегал пренебрегать пренебрегая пренебрежение пренебрежением пренебрежения преобладали преобладало преображаются преобразилась преобразилось преобразований преогромнейшему преодолела преодолеть преподаванию преподал преподана преподаю препочтенный препровождал препровожу препьяные препятствие препятствий препятствию препятствия препятствиями препятствовала препятствуют прервав прервал прервала прервали прервался прерванных прервано прерий прерываемый прерывает прерывай прерывал прерывался прерывать прерываю прерывающее прерывающуюся прерывая прерывисто прерывистым пресвятая пресекла пресеклось пресекся пресекшую пресинее прескладной преследовал преследовала преследования преследовать преследует преследуйте преследуют пресмыкаюсь преспокойно пресс преставлю престарелая престарелому престарелую престоле престранная преступать преступают преступление преступлением преступлении преступлений преступлению преступления преступлениях преступленьице преступленья преступна преступнее преступник преступника преступникам преступниками преступнике преступники преступников преступнику преступница преступного преступной преступны преступный претендуете претендующих претендуя претензии претензий претензию претензия претензиями претерпели претит преткновения преткнулся претрудная претят преувеличенно преувеличенного преувеличенною преувеличенный преувеличено преувеличены преувеличиваете преувеличивал преувеличивающий преувеличил преувеличили преувеличить преумножать преуспеяние преуспеяния прехитрейшее прехорошенькие пречистенки прещено при прибавив прибавил прибавила прибавили прибавило прибавилось прибавим прибавит прибавить прибавишь прибавке прибавкой прибавлена прибавление прибавлением прибавления прибавлено прибавлю прибавляет прибавляешь прибавлял прибавляла прибавлять прибавляю прибавляют прибавляются прибавочные прибавь прибавьте прибегал прибегала прибегают прибегая прибегнул прибежавшей прибежал прибежала прибежали прибежит приберегла приберет приберешь прибившему прибил прибили прибиль прибирает прибирали прибирать прибираться прибитый приближавшейся приближается приближалась приближался приближающееся приближая приближения приблизив приблизилась приблизился приблизительно прибой прибор прибоя прибрал прибрано прибраны прибрать прибрежный прибрежья прибудет прибудут прибыв прибывает прибывшего прибыл прибыли прибытия прибыть прибьет прибьют привалило приватном приватным приведенное приведены приведет приведи приведу приведут привез привезет привезите привезла привезли привезти привезть привезу привезут привел привела привели привело привелось привесить привести привет приветливо приветливым приветные приветствие приветствий приветствует приветствую привечали привешены привидение привидений привидения привил привилегию привилегия привить привлек привлекает привлекало привлекательном привлекать привлекают привлекло привлечь приводил приводила приводили приводит приводить приводят привозили привозом приволье приволью приволья привскочив привскочила привстав привставший привстал привстала привстанет привык привыкает привыкали привыкла привыкли привыкнет привыкнете привыкнешь привыкну привыкнуть привыкши привыкшим привычек привычка привычками привычках привычке привычки привычкой привычку привычно привычной привычную привычны привычный привычными привяжусь привяжутся привязал привязалась привязался привязан привязана привязанности привязанность привязанностью привязанностям привязаны привязаться привязки привязчивее привязываетесь пригвожден приглаживает пригласил пригласила пригласили пригласит пригласите пригласить приглашаем приглашает приглашайте приглашал приглашала приглашалась приглашали приглашаль приглашать приглашаю приглашают приглашен приглашена приглашение приглашением приглашения приглашенных приглашены приглашенье приглядеть приглядится приглядывалась приглядывался приглядываться приглядывая приглядываясь приглянуть пригнул пригнула пригнулись пригнулся приговаривал приговаривала приговаривая приговарнвал приговор приговора приговоренного приговоренный приговорили приговоркой приговоров приговору пригодилась пригодитесь пригодится пригодное пригодятся пригорка пригорки пригорку пригорок пригорюнившись пригорюнился приготовившись приготовил приготовилась приготовился приготовит приготовить приготовиться приготовлен приготовление приготовлений приготовления приготовлениях приготовленный приготовленным приготовлено приготовлены приготовляй приготовлялось приготовлять приготовляться приготовляюсь приготовляясь приготовят пригрезился пригрозили придававшая придавал придавала придавали придавало придавил придавили придавило придавим придавить придавлено придавливая придает придаете придам приданный приданого приданое придаст придаток придать придачу придвигая придвинул придвинулась придворной придем придержал придержать придержаться придерживает придерживал придерживались придерживать придерживаться придерживаюсь придерживая придерживаясь придет придете придется придешь приди придираетесь придираться придирки придирчив придите придорожной приду придумаем придумает придумай придумал придумала придуманное придуманных придумать придумывает придумывай придумывала придумывая придут придутся придя приедем приедет приедете приедешь приеду приедут приезд приезда приезде приездом приезду приезжавшие приезжаем приезжает приезжай приезжайте приезжал приезжала приезжали приезжать приезжают приезжая приезжее приезжие приезжий приезжих прием приема приемам приемами приеме приемлеши приемлю приемная приемной приемные приемов приему приемы приехав приехавшем приехал приехала приехали приехать прижав прижавшись прижал прижала прижалась прижали прижата прижатыми приживал приживалы прижила прижимается прижимала прижималась прижимался прижимаю прижимая прижимаясь прижмешь прижмись прижму призадумался призван призвание призванием призвании призванию призванную призвать признав признавай признавайся признавал признавала признавалась признавалось признавался признаваться признавая признаваясь признает признаете признаетесь признайся признайтесь признак признака признакам признаками признаках признаки признаков признаком признал призналась признали признался признание признании признания признаниям признанную признанных признанья признательности признательный признать признаться признаю признаюсь призовет призовешь призрак призрака призраками призраках призраки призраков призраку призрачное призрение призрения призыв призывал призывать призывающе призывающий призывая призыве призывном призывных приидет прииди приидите приискал приискания приискать приискивать приищете прийти прикажет прикажете прикажешь прикажите приказ приказал приказала приказали приказан приказание приказанием приказании приказанию приказания приказаниями приказано приказать приказного приказчик приказчика приказчики приказчиков приказывает приказываете приказывайте приказывал приказывала приказывали приказывать приказывающего прикапливал прикасалось прикасаясь прикидываются прикинешься прикинулся прикинуться прикладывал прикладывали прикладывая приклеившись приклонить приключалась приключение приключений приковалась приковываться приколотить прикомандировался прикосновением прикосновения прикоснулись прикоснулось прикоснулся прикоснуться прикраситься прикрасной прикрепил прикрутит прикрыв прикрывается прикрывала прикрывалось прикрываясь прикрыл прикрыла прикрыли прикрытая прикрытого прикрытой прикрытую прикрытыми прикрытых прикрыть прикупить прикусив прилагает приладил приладиться прилаженный прилаживалась прилаживать приласкав приласкаешь приласкать прилег прилегавший прилежанием прилежно прилежным прилепившись прилепилась прилепить прилепливаются прилеплялся прилетит прилетят прилечь прилив прилива приливало приливам приливе приливом приливы прилип прилипчивая прилипшие приличие приличием приличий приличия приличнее приличненьким прилично приличного приличной приличном приличною приличную приличные приличный приличным приличных приложа приложат приложена приложением приложи приложив приложил приложила приложить прилуковой прильнет прильнув прильнул прильнула прильнуть прильпне прилягу приляжет приманивала примачивайте примелькавшиеся примем применены применилась применить применять пример примерами примерив примеривал примеривая примерит примерить примерно примеров примером примеру примерчик примерчики примеры примеряет примерять примесью примет примета приметам примете приметесь приметил приметила приметить приметишь приметная приметнее приметно приметною приметную приметным приметными примется примету приметы примечал примечательную примечать примешивается примешивал примешивалась прими примирении примирения примирительный примирительным примирительных примирится примириться примите примолвив примолвить примолк примолкла примолкли примочками примочки приму примут примутся примыкавшей примятой принагнувшись принадлежавшая принадлежавшего принадлежавшее принадлежавший принадлежал принадлежала принадлежали принадлежало принадлежат принадлежащие принадлежащий принадлежащих принадлежит принадлежите принадлежностей принадлежности принадлежностью принадлежу принарядиться принес принесенного принесенное принесенные принесенный принесенных принесено принесены принесет принесете принесешь принеси принесите принесла принесли принесло принести принесу принесут принеся принижающее принижение приниженная приниженное приниженную принижено приник приникла приникшей принимавшие принимает принимаете принимается принимаешь принимай принимайте принимал принимала принималась принимали принимались принимало принимался принимать приниматься принимаю принимаюсь принимают принимающим принимая принимаясь приносил приносили приносились приносилось приносимые приносит приносить приносят приношением приношу принудив принудила принудили принудить принуждал принужден принуждена принуждение принуждений принужденным принц принцип принципах принципиально принципу принципы приняв принявшие принял приняла принялась приняли принялись приняло принялся принят принятая принятию принято приняты принятый принятых принять приняться приобрел приобрела приобрести приобресть приобретается приобретал приобретала приобреталось приобретался приобретать приобретаю приобретают приобретая приобретение приобретению приобретенного приобретенной приобщались приоделась приостановились приостановился приотворена приотворенная приотворил приотворилась приоткрытой припав припадает припадая припадем припадка припадках припадке припадки припадков припадком припадок припадочек припадочного припадочный припал припала припас припасами припасен припасов припасы припахивает припевах припеваючи припер приписанное приписать приписываемые приписывает приписывал приписывают приплюснут приподнимайтесь приподниматься приподнимаются приподнимая приподнимаясь приподняв приподнявшись приподнял приподняла приподнялась приподняли приподнялись приподнялся приподнятые приподнять приподняться припомажен припоминаемое припоминал припоминала припоминался припоминать припоминаться припоминая припомни припомнив припомнил припомнила припомнилась припомнилось припомнился припомните припомнить припомнишь припомню припрыгнул припрятал припрятано припрятать припускает припутали припухлые припухшее припухшем припухшею припухшие припухшими прирастала приращение приревнует природа природе природная природное природной природным природой природу природы прирожденный прирос приросли приросло приручил присвататься присватывался присвистнул присвоить присев приседает приседала приседать приседая присел присела присесть прискорбием прискорбию прислал прислала прислали прислан присланные присланных прислать прислоненная прислонил прислонилась прислонился прислонясь прислуга прислуге прислуги прислугой прислуживала прислуживать прислужница прислушавшись прислушался прислушать прислушивавшийся прислушивается прислушивалась прислушивались прислушивался прислушиваться прислушиваются прислушиваясь присматривать присмиреет присмирел присмирела присмирели присмотрит присмотрите приснившийся приснилась приснились приснилось приснился приснится присовокупить присочиним приспела приспособились приспособился приспособить приспособлено пристав пристава приставай приставал приставала приставания приставать приставил приставить приставлен приставленными приставу пристает пристаешь пристал пристала пристало пристальнее пристальней пристально пристальном пристальность пристальный пристальным пристальными пристанет пристани пристанище пристань пристают пристрастно пристроены пристроил пристроили пристроились пристройками пристукнуло приступил приступила приступим приступить пристяжная присужден присуждено присутственного присутственное присутственном присутственные присутственным присутственных присутствие присутствием присутствии присутствия присутствовавших присутствовал присутствовала присутствовало присутствовать присутствует присутствующим присутствующих присылаемые присылает присылал присылали присылать присылки присягнул присягу присядет присяду присядь присядьте присяжного притаившись притаились притаился притаиться притащи притащит притащить притащу притаясь притворе притворена притворил притворила притворилась притворили притворился притворить притвориться притворной притворным притворства притворство притворщик притворяется притворяла притворялись притворяться притворяясь притих притихла притихли притихнет притом притон приторности приторный притрогивалась притупились притупленными притуши притягивал притягивая притязания притянуть приударил приумножают приуныл приучи приучил приучили прихлопнуть прихлынувшей прихлынуло приход прихода приходи приходившая приходивший приходивших приходил приходила приходилась приходили приходило приходилось приходит приходите приходится приходить приходишь приходо приходом приходя приходят приходящих прихожая прихожей прихожу прихожую прихотей прихоти прихотливо прихотливой прихотливый прихрамывал прицеливается прицепилось прицепился прицепиться причалены причастись причастных причащение причем причесал причесалась причесан причесана причесанные причесанный причесанным причесать прическа прическе прически прической причесывает причесывал причешутся причин причина причинам причине причиной причиною причину причины причисленных причислила причисляешь причитается причитал причитала причитаньях причитающиеся причтется причуд причуды пришедшего пришедши пришедшим пришедших пришел пришелся пришивать пришивая пришили пришить пришла пришлем пришлет пришли пришлись пришлите пришло пришлось пришлю пришлют пришоль пришпорить прищелкивать прищурившись прищурил прищурилась прищурился прищуря приют приюта приютиться приютов приятелей приятелем приятели приятель приятельница приятельницею приятельницы приятельские приятелю приятеля приятелям приятелями приятелях приятен приятна приятнее приятнейшего приятнейшие приятно приятного приятное приятной приятном приятности приятною приятную приятные приятный приятным приятными приятных про пробавляться пробавляются пробалтывался пробегает пробегал пробегала пробегающий пробегая пробежал пробежала пробежали пробежало пробежит пробиваем пробивается пробивалась пробивался пробивать пробившись пробил пробили пробило пробирается пробирались пробирался пробираться пробираясь пробить пробиться пробка проблеск проблеска пробовал пробовала пробовали пробои пробок проболтался проболтаться пробор пробормотал пробормотала пробормотать пробором пробочка пробравшись пробралась пробрался пробу пробудет пробудившегося пробудился пробудись пробуду пробудь пробудясь пробуждавшимися пробуждались пробуждаться пробуждающейся пробуждения пробужденного пробужденные пробуя пробывшее пробыл пробыть провале проваливается провалившимся провалилась провалился провались провалиться провалишься провалом провалялся провалятся проведать проведем проведена проведение проведенного проведенный проведет проведу проведя провезут провел провела проверил проверить провесная провести проветрись провиантский провиантского провиантскому провидел провидение провидением провидения провидит провижу провизии провизию провинциалка провинциальной провинциальный провинции провинцию провод провода проводи проводив проводил проводила проводили проводим проводимых проводит проводите проводится проводить провожавшего провожает провожай провожайте провожал провожала провожали провожать провожаю провожают провожая провождение провожу провозгласил провозили провозит провозишься проволок проворен проворковать проворная проворно проворное проворным проворонил проворством проворчав проворчал проворчат проврался провспоминаешь проглотил проглотить проглочу проглядели проглядывала проглядывало проглянула прогнал прогнала прогнали прогналь прогнать прогневали проговаривается проговариваться проговаривая проговорено проговорив проговорившись проговорил проговорила проговорилось проговорился проговорится проговориться проговорюсь проголодалась прогонами прогоните прогоны прогоню прогоняли прогонят прогонять программ программе программу прогресс прогресса прогрессивно прогрессивного прогрессивные прогрессиста прогрессистов прогрессисты прогрессом прогрессу прогуливается прогуливаясь прогулка прогулках прогулке прогулки прогулок прогуляемся прогуляла прогуляться продав продавайте продавал продавало продавать продаваться продавец продавить продавленном продавца продадут продаем продает продается продаешь продажей продажи продажной продажу продай продал продала продали продан проданный продано продаст продать продашь продают продаются продеваемой продеваемые продевала продевая проделка проделки продержали продиктует продиктую продиралась продлил продлить продлятся продовольствие продовольствии продолговатым продолжавшему продолжавшийся продолжает продолжаете продолжается продолжай продолжайте продолжал продолжала продолжалась продолжали продолжалось продолжался продолжать продолжаться продолжаю продолжают продолжаются продолжая продолжена продолжение продолжением продолжения продолженье продолжил продолжим продолжительно продолжительного продолжительное продолжительный продолжительным продолжится продранные продранным продувная продувной продувных продукт продуктов продукты продумал продумала проедет проедусь проезда проезду проезжавшей проезжавший проезжавшую проезжает проезжающих проезжая проезжий проект проекты проел проехал проехала проехали проехать прождала прожег прожектор прожжено прожжет проживавшего проживаем проживает проживаете проживал проживали проживать проживают проживем проживет проживете проживешь проживу проживут прожил прожила прожили прожита прожитой прожиток прожитую прожить прожитье прожорливым прожужжит прозвал прозвали прозвание прозвенели прозвучала прозвучали прозвучит прозе прозирать прозорлив прозорливейший прозоров прозорова прозоровых прозрачная прозрачное прозрачной прозрачный прозрачным прозревал прозрения прозреть прозябает прозябал проиграл проиграла проигрались проигрался проиграно проигрыше произведен произведена произведение произведением произведении произведений произведения произведенное произведенный произведено произведет произвел произвела произвели произвело произвести производивший производил производило производилось производился производит производительности производить производишь производят произвол произволу произвольно произнес произнесенные произнесено произнесены произнесла произнести произносили произносимые произносит произносить произнося произносят произойдет произойти произошел произошла произошли произошло происходившую происходил происходила происходили происходило происходит происходить происходят происходящее происхождение происхождения происшедшим происшедшую происшествие происшествием происшествии происшествий происшествия пройдем пройдемте пройденное пройдет пройдете пройдется пройдешь пройди пройдите пройдохе пройдохой пройду пройдусь пройдут пройдя пройдясь пройти проказ проказа проказил проказить проказнику прокаркал прокатимся прокатимтесь прокатитесь прокатиться проклинает проклинал проклинала проклинали проклинать проклял проклянешь проклясть проклят проклятая проклятие проклятием проклятий проклятию проклятия проклятиями проклято проклятого проклятое проклятой проклятом проклятую прокляты проклятые проклятый проклятым проклятых проклятье проклятья проконсула прокормить прокормиться прокормлю прокофий прокофьич прокофьича прокофьичем прокричал прокричала проку прокурора пролегли пролежал пролежала пролежали пролежало пролежит пролез пролезайте пролезают пролезая пролезть пролей пролетает пролетала пролетарий пролетария пролетевший пролетевших пролетел пролетела пролетит пролетки пролив проливать проливают проливая проливной пролил пролилась пролита пролитая пролитую пролог пролога проложат проложил проложить пролюбить промалчивал промах промаха промахнулся промаху промедленья промедлить промеж промежутками промежутках промежуток промежуточная промелькнет промелькнувшею промелькнула промелькнуло променял променяла променять променяю прометеев промозглым промозолил промокший промолвил промолвила промолвили промолвить промолчал промолчала промочит промучился промысл промысла промычал промычит промышленник промышленниками промышленников промышляет промышляют промышляющее промямлил пронес пронесет пронесется пронесла пронеслась пронесли пронеслось пронесся пронесу пронзал пронзали пронзающий пронзающим пронзенная пронзенный пронзенными пронзи пронзил пронзили пронзило пронзиль пронзительные пронзительным пронзительных пронизан пронизывает проник проникал проникать проникающим проникло проникнет проникнув проникнут проникнута проникнуты проникнуть проникся проникшись проницал проницательнее проницательного проницательности проницательным проницающим проносили проносилось проносит проносить проносящейся пронюхал пронюхают пропаганд пропаганде пропагандировать пропагандного пропадает пропадай пропадал пропадала пропадали пропадало пропадать пропадают пропадающие пропадающих пропадет пропадете пропадешь пропаду пропадут пропал пропала пропали пропало пропастей пропасти пропасть пропастью пропастями пропастях пропащего пропащий пропев пропекать пропел пропела пропели пропеллер пропетая пропивал пропил прописала прописанной прописываешь пропитан пропитанном пропитано пропитаны проплакала проплывет проплывший проплыли проповедовал проповедовали проповедует проповедуется пропоет проползет пропускал пропускающем пропускающим пропуская пропусти пропустив пропустил пропустили пропустит пропустить пропустят пропущенная пропыленные пропьет пропятие прорва прорвалось прорвался проревел прорезывались прорепетировать прореха прорехою прореху пророк пророка пророком проронил проронила проронит проронить пророчат пророческая пророчески пророческий пророчества пророчество пророчит прорубил прорывает прорывами прорывать прорыве просватали просветился просветите просветлевшее просветлеет просветлели просветлении просвечивалось просвещение просвещению просвещения просвещенной просвещенный просвещенья просвирне просеванье проседью просеет проселки проселок просидев просидел просидела просидели просидеть просиди просиживала просиживаю просил просила просилась просили просился просиль просим просимой просипел просит просите просителей просители просительных проситесь просится просить просишь просияла проскакать проскакивать просквозит проскользнет проскользнул проскользнула проскользнуть проскрежетал проскучает прославить прославленный проследил проследовали прослезились прослезился прослезится прослужил прослужили прослужит прослушал прослушали прослушать прослышал прослышали прослышать просмотрел просмотреть просмотришь проснетесь проснется проснешься проснись проснитесь проснувшаяся проснувшегося проснувшиеся проснувшись проснулась проснулись проснулся проснусь проснуться просо проспал проспали проспать проспект проспекта проспекте проспекту просроченное просроченные просроченный просрочить прост проста простаковых простая простегивала простейшей простейшем простейшим простенке простенькое простер простерла простерт простертой простерши прости простившись простил простила простили простились простился простим простирает простиралось простирать простирая простит простите простительно проститутка простить проститься простишь просто простоват простоватого простоватость простоватый простоволосая простоволосые простого простодушие простодушием простодушия простодушно простодушное простодушные простодушными простое простой простокваши простоквашу простолюдин простом простонал простонало простонародный простонародье простонет простор простора просторе просторно просторное просторную просторные просторный простору простота простоте простотой простотою простоту простоты простою простоял простояла простоять пространным пространств пространства пространство пространствовал прострелено прострет простуда простудилась простудился простудитесь простудится простудишь простудишься простуды простуживайся простужусь проступали проступок простучат просты простые простыл простыла простым простыми простыне простыней простыни простыню простыня простынях простых простят просунется просунул просунулась просунуть просушить просыпается просыпаешься просыпалась просыпали просыпались просыпалось просыпался просыпаться просыпаюсь просыпаются просыпающегося просыплю просьб просьба просьбами просьбе просьбица просьбой просьбу просьбы прося просясь просят просятся протанцевал протанцевала протанцевать протащила протекал протекли протекция протекших протер протереть протерпела протертые протеснилась протеснились протеснился протеснявшихся протеснятся протесняясь протест протестовал протестованное протестовать протестом протестуй протестую протесты против противен противиться противная противнее противник противника противником противно противное противном противны противный противным противных противоестественном противоположная противоположного противоположной противоположном противоположность противоположную противоположные противоположный противоположными противопоставить противоречие противоречии противоречий противоречило противоречит противоречить противоречия противоречу противоречь противуположную противься протискиваясь протиснулся проткнет протокол протолкнул протолкуешь протопопов протопопова протопоповым протоптанной проторила протягивает протягивай протягивайте протягивал протягивала протягивались протягивая протяжен протяжно протяжный протянет протянешь протяните протяну протянув протянувшеюся протянувшиеся протянул протянула протянулись протянулся протянут протянута протянутая протянутой протянутою протянутую протянутый протянутыми протянуть протянуться проулок профессор профессора профессоров профессором профессорскую профессору профессоршу профилем профиль профиля прохаживается прохаживался прохаживаться прохватила прохватывали прохлада прохладная прохладно прохладной прохладные прохладный прохладных прохладой прохлады проход прохода проходи проходившего проходившей проходившему проходил проходила проходили проходило проходимцев проходит проходите проходить проходишь проходная проходной проходу проходя проходят проходящих прохожего прохожей прохожем прохожему прохожие прохожий прохожим прохожих прохожу прохора прохрипел процветал процветала процветают процедил процедур процеживая процент процентами процентные процентов процентщица процентщицу процентщицы проценты процесс процесса процессе процессии процессия процессом процессу проч проча прочего прочее прочел прочесть прочие прочий прочим прочими прочитав прочитайте прочитал прочитанного прочитанное прочитано прочитать прочитают прочитывал прочих прочла прочно прочного прочной прочность прочтем прочтении прочтения прочтет прочтете прочтешь прочти прочтите прочту прочтя прочь прошагнул прошедшая прошедшего прошедшее прошедшем прошедшие прошедших прошедшую прошел прошелся прошептал прошептала прошептались прошибет прошипел прошла прошли прошло прошлого прошлогоднего прошлогодний прошлогодним прошлогодних прошлое прошлой прошлом прошлому прошлую прошлый прошлым прошлых прошлялся прошмыгнул прошмыгнуть прошу прошумевшего прошумела прощаетесь прощается прощай прощайте прощал прощали прощались прощальный прощальным прощание прощании прощанию прощанье прощаться прощаю прощаюсь прощают прощаются прощая прощаясь проще прощение прощении прощения прощено прощенья прощипаны прощу проявилась проявилось проявить проявление проявлению проявления проявлениями проявляется проявляла проявлялась проявлялись проявлялось прояснел прояснели прояснившийся прояснилось прояснить проясняет прояснялось прояснялся пруд пруда прудами пруде прудов прудовой пруды пружин пружинами пружинах пружиной пружины прусская прусскую прыг прыгавший прыгает прыгай прыгал прыгала прыгали прыгало прыганье прыгать прыгают прыгая прыгнул прыгнуть прыжка прыжком прыжок прынцессы прыскается прыснет прыснула прыснуть прыть прытью прядет прядь пряжку прям прямехонько прямились прямо прямого прямодушии прямодушия прямодушный прямое прямой прямому прямою прямую прямые прямым прямыми прямых пряничного пряно пряное пряности прянул пряный прятал прятала пряталась прятались прятался прятать прятаться пряча прячась прячет прячется прячешь прячут прячутся прячущегося пса псами психически психоз психологии психологическая психологически психологические психологический психологическим психологическое психологической психологическую психологию психология психологов психологом психопат псом псу псы птенцов птиц птица птицами птицах птице птицей птицу птицы птичек птичий птичка птички птичку птичницы птичьего птичьем птичьи пуантов публика публике публики публикой публику публичная публичного публичное публичной публичную публичные публичный публичных пугает пугаете пугается пугаешь пугай пугайся пугайтесь пугал пугала пугали пугало пугался пугать пугаться пугаюсь пугают пугающая пугающий пугающим пугаясь пугливая пугливее пугливо пугливого пугливые пугливых пуговиц пуговицами пуговицы пуговкой пуд пуда пудами пудинг пудов пудрится пудрой пужливые пузатый пузатых пузырек пузырь пуль пульс пульса пульхерией пульхерии пульхерию пульхерия пулю пуля пулями пулярку пункт пункта пунктам пунктах пункте пунктов пунктом пункту пункты пуншем пуншу пуп пуританский пурпурово пускает пускаете пускаешь пускай пускайте пускал пускала пускалась пускали пускался пускать пускаться пускают пуст пуста пустая пустейшем пустейших пусти пустив пустил пустила пустилась пустили пустились пустило пустился пустим пустит пустите пустить пуститься пусто пустого пустое пустой пустом пустому пустота пустоте пустоту пустоты пустоши пустошь пустою пустую пусты пустые пустым пустыми пустыне пустыней пустыни пустынна пустынного пустынной пустынном пустынную пустынные пустынный пустынным пустынь пустыню пустырей пустырем пустырь пустых пусть пустякам пустяками пустяках пустяки пустяков пустят пустячков пустяшная пустяшное пустяшным путал путали путались путаница путаницу путанно путаться путаю путаются путая путеводителем путеводительный путеводной путей путем путешественники путешественников путешествие путешествии путешествий путешествия путешествиях путешествует пути путилки путилкой путилку путник путнику путное путной путь путями путях пуф пух пуха пухленькие пухлое пухлых пуховые пуховым пухом пучат пучине пучину пучком пушечный пушистые пушистый пушка пушки пушкин пушкина пушкинское пушкиным пушкиньянца пушку пушок пущай пуще пущенное пущенными пущу пхайло пчелка пчеловодство пчелы пшеница пшеницын пшеницына пшеницыной пшеницыну пшеницыных пшла пшол пыжикова пыл пылает пылал пылала пылают пылающим пылающими пылая пыли пылили пылинки пылинку пылким пылко пылкой пылом пылу пыль пыльная пыльного пыльной пыльном пыльную пыльные пыльный пыльными пылью пылят пытала пытался пытая пытаясь пытка пытки пыткой пытку пытливо пытливого пытливости пытливый пыток пыхтя пышная пышнейшее пышно пышной пышном пышную пышные пышных пышут пьан пьедестал пьем пьер пьеса пьесах пьесе пьесу пьесы пьет пьете пьешь пью пьют пьющей пьющий пьющих пьян пьяна пьяная пьяненькие пьяненький пьяненькой пьянею пьянея пьяниц пьяница пьяницей пьяницу пьяницы пьяного пьяной пьяном пьяному пьянства пьянство пьянствовал пьянствовать пьянствует пьянствуй пьяную пьяны пьяные пьяный пьяным пьяных пэб пялит пялить пясту пятак пятака пятаками пятаки пятаков пятам пятачка пятачку пятачок пятая пятен пятеро пяти пятидесяти пятидесятилетнему пятидесятилетнею пятидесятирублевую пятидесятью пятилетнего пятилетняя пятиминутным пятипроцентные пятипроцентный пятипроцентных пятипудовая пятисот пятистах пятить пятиэтажного пятками пятки пятна пятнадцати пятнадцатилетнюю пятнадцатом пятнадцать пятнам пятнами пятнах пятница пятнице пятницу пятницы пятно пятном пятнышка пятнышко пятого пятое пятой пятом пяточками пятою пятую пятый пять пятьдесят пятьсот пятью пятясь р ра раб раба рабам рабами рабой рабом работ работа работавшая работавши работавший работаем работает работаете работаешь работай работал работала работали работам работами работать работах работаю работают работе работник работника работниках работники работников работнику работой работу работы рабочие рабочий рабочим рабочих рабочую рабская рабски рабских рабстве рабы равенна равеннских равенства равенство равна равнина равнины равно равновесие равновесии равновесия равнодушен равнодушие равнодушием равнодушии равнодушию равнодушия равнодушна равнодушная равнодушно равнодушного равнодушное равнодушны равнодушный равное равномерно равносильное равны равным равных равняется равняла равняли равнять равняю рагулиным рад рада радехонек ради радикально радикальный радиус радищев радо радовавшемуся радовались радовался радоваться радостей радости радостн радостная радостно радостного радостном радостный радостным радостных радость радостью радостями радостях радуется радуешься радужного радужные радушие радушием радушия радушно радушный радуюсь радуясь рады раз раза разбавлять разбегаются разбей разберет разберешь разбери разберу разберут разберутся разбивал разбивалась разбиваются разбивающее разбившимся разбил разбила разбилась разбили разбился разбирал разбирала разбиранья разбирать разбирая разбит разбито разбитого разбитое разбитой разбитою разбитую разбитым разбить разблаготворились разбогатевший разбогател разбогатеть разбой разбойник разбойника разбойниках разбойники разбойников разбойником разбойничий разбойную разболелась разболелся разболтал разболтали разболтался разбор разборчив разборы разбранил разбрасывается разбредутся разбрелись разбросала разбросались разбросанные разбросанный разбросанных разбросано разбросаны разбуди разбудил разбудила разбудили разбудило разбудит разбудить разбудишь разваливается развалившуюся развалилась развалились развалилось развалился развалинами развалинах развалине разве разведу разведывать развел развели развелось разверзается разверзалась развернув развернувший развернул развернула развернулись развернутое развернутую развернуты развернутые развернуть развертывает развертывая развеселившись развеселился развеселить развеселясь развести развешивает развешивал развешивать развивает развиваете развивается развивал развивалась развивалось развивать развиваю развиваются развившись развилась развились развился развинтился развит развитие развитием развитии развитий развитию развития развитой развитость развитый развитым развить развлекайтесь развлекать развлекись развлечения развлечь разводить разводя развозимыми разворчатся разврат разврата разврате развратен развратник развратника развратничает развратной развратный развратным развратятся развращение развращенный развяжешься развяжите развязал развязала развязался развязаны развязать развязен развязка развязке развязки развязку развязная развязнее развязно развязности развязным развязных развязывать разгадал разгадать разгадка разгадке разгадки разгадку разгаре разгибать разгладились разгладился разглаживается разглядев разглядел разглядела разглядели разглядеть разгляди разглядит разглядишь разглядывавший разглядывает разглядываете разглядывал разглядывать разглядывая разгневанная разговаривавшие разговаривает разговаривал разговаривала разговаривали разговаривать разговаривают разговаривая разговеемся разговенья разговор разговора разговорам разговорами разговорах разговоре разговорился разговоров разговором разговору разговорчивым разговоры разговорят разгонял разгорался разгоревшееся разгорелось разгорится разгороженную разгорячаясь разгорячились разгорячился разгула разгуливала разгульная разгульного разгулялась разгулялось раздавались раздавалось раздавался раздаваться раздавил раздавила раздавили раздавлен раздавлена раздавленного раздавленный раздается раздала раздалась раздались раздалось раздался раздарить раздастся раздаются раздвигать раздвигаются раздвигая раздвинув раздвинул раздвинулась раздвинулись раздвинуто раздев раздевала раздевался раздевать раздевают раздеваясь раздевшись раздел разделаешься разделался разделенная разделив разделил разделила разделит разделить раздельнее раздельно разделявший разделяет разделял разделяла разделялась разделяю разделяются разделяющего разденусь раздетого раздетые раздирающим раздольем раздольи раздор раздоры раздосадованный раздражает раздражается раздражайтесь раздражал раздражались раздражался раздражать раздражают раздражаются раздражающие раздражающим раздражаясь раздражен раздражена раздражение раздражением раздражении раздражению раздражения раздраженная раздраженнее раздраженно раздраженного раздраженное раздраженную раздраженным раздражившись раздражил раздражилась раздражило раздражит раздражителен раздражительная раздражительнее раздражительно раздражительное раздражительном раздражительностию раздражительную раздражительны раздражительные раздражительный раздражительным раздражить раздразнить раздроблен раздроблено раздробляется раздувает раздувала раздувающий раздувая раздуем раздулась раздумал раздумался раздумчиво раздумывал раздумывали раздумываться раздумывая раздумье раздумью раздумья раздуть раздушенный разевая разжал разжалобил разжалобясь разжалована разжигает раззадорило разик разини разинув разинул разинули разинутым разинутыми разиня разит разлакомившийся разлакомившись разлакомились разлегся разлейся разлетясь разлетятся разлив разливалась разливались разливался разливах разливающейся разливе разливных разлилась разлились разлилось разлился разлито разлитого разлитому разлитый различал различала различать различаю различен различил различить различия различно различные различным различными различных разложения разложенными разложено разложенье разломал разлука разлуке разлуки разлучаться разлучен разлучимся разлучиться разлюбил разлюбите разлюбить разлюбишь разлюблю разлюбя размазывать размарать размах размаха размахи размахивается размахивай размахивать размахиваются размахивая размахнувшись размахнулся размаху размашисты размен разменивать разменявший разменявшись разменял разменялся разменянного разменять размер размера размерам размере размеренно размеров размеры разметает размечтался разминается разминать размножится размозжу размолвки размучен размышление размышлении размышления размышляет размышлял размышлять размышляя размягчило размягчилось разная разнежась разнежил разнесет разнесется разнесла разнесшейся разнился разнимались разница разницей разницу разновековой разного разное разнокалиберное разноличного разнообразие разнообразило разнообразия разнообразна разнообразная разнообразно разнообразное разнообразные разнообразных разнообразят разнородной разнорядицу разносились разносилось разносится разносить разностями разносчик разносчики разнофасонное разнохарактерный разноцветные разноцветным разную разные разный разным разными разных разо разобидел разобижен разобрал разобрала разобрало разобрано разобраны разобрать разобраться разобьют разовьют разогнавший разогнать разодета разодетая разодетой разодетых разодранной разодраны разожженным разозлившись разозлил разозлился разойдемся разойтись разольет разольется разом разомкнуть разорвал разорвала разорвали разорвалось разорванного разорванное разорванной разорванные разорванным разорвано разорвать разорвусь разорение разорены разорила разорили разорились разорился разоряет разорять разостлали разостлались разочаровался разочарование разочарования разочарованный разочаровать разочароваться разочаровываться разочек разошлась разошлись разработана разработкою разработывания разразилась разразились разразился разразит разразится разразиться разрежены разрезал разрешается разрешаешь разрешалась разрешать разрешаться разрешаю разрешение разрешения разрешено разрешенье разрешил разрешила разрешили разрешился разрешите разрешится разрешить разрешиться разрешишь разрозненных разронял разрослась разрушает разрушаете разрушаешь разрушал разрушать разрушают разрушающий разрушающими разрушая разрушение разрушений разрушения разрушенное разрушено разрушила разрушители разрушительного разрушительными разрушителями разрушиться разрыв разрыва разрывала разрывались разрывать разрывающим разрывая разрыве разрыву разряд разряда разрядится разрядов разряду разряды разряженные разряженных разу разубедить разубеждали разубран разуверило разуверит разуверить разуверять разудалого разузнавать разузнал разузнали разузнамши разузнать разузнаю разукрашенную разукрашенные разукрашенный разум разума разумеет разумеется разумеешь разумел разумела разумели разумения разумихин разумихина разумихине разумихину разумихиным разумнее разумно разумного разумное разумною разумные разумом разучился разъединения разъезде разъездов разъезды разъезжать разъезжая разъехавшись разъехались разъешься разъяснением разъяснений разъяснения разъяснено разъяснил разъяснилось разъяснится разъяснить разъяснял разыграется разыгралась разыгрались разыгрался разыграть разыгрывавшиеся разыгрывается разыгрываешь разыгрывайте разыгрывалась разыгрывались разыгрывался разыскав разыскал разыскали разыскать разыскивал разыскиваний разыскивать разыскиваю разыщу разящий рай райские райского райское рак раковина раковинами рама раме рамках рамке рамкой рамой рамочку рампе рамы ран ранах ране раневская раневской ранее ранен ранена раненого ранешенько ранил ранили ранит ранить раннего ранней раннею ранние ранний ранним ранняя рано рану раны раным раньше ранят рапортом раскаетесь раскаешься раскаивался раскаиваться раскаиваюсь раскаиваясь раскаленные раскаленный раскаленными раскат раскаты раскачнувшись раскаялся раскаяние раскаянием раскаянии раскаяния раскаяться раскидал раскинется раскинув раскинул раскинулась раскинулся раскинутыми раскладывает раскладывать раскладывают раскланивается раскланялся расклю расколдуешь расколот расколотом раскольников раскольникова раскольникове раскольниковой раскольникову раскольниковым раскольничьи раскрасавицу раскрасневшаяся раскрасневшимися раскраснелся раскричалась раскричится раскрывал раскрывались раскрываться раскрывают раскрывая раскрыл раскрыла раскрыли раскрылись раскрылся раскрытая раскрытым раскрытыми раскрыть раскуси раскусивший раскусить распался распарит распахивает распахнувшийся распахнул распахнулась распахнулись распахнут распашку распечатал распечатали распечатывай распечатывать распечатывая распивочная распивочной распивочные распивочными распивочных распивошной распирать расписалась расписался расписание расписки расписок расписочки расписывались расписываться расплакалась расплата расплатившись расплатился расплатиться расплатишься расплачутся расплевывался расплескав расплести расплодилось расплывается расплываться расплываясь расплылось расплюеве распни распознавать распознать располагается располагаешь располагал располагать располагаются располагающий расползается расползлась располнел расположен расположение расположении расположения расположены расположил распомаженный распорядилась распорядился распорядись распорядительнице распорядительности распорядительность распорядится распорядиться распорядок распорядятся распоряжаетесь распоряжается распоряжалась распоряжался распоряжение распоряжений распоряжению распоряжения распоряжениям распоряжусь расправил расправила расправиться расправляя расправу распределить распри распродал распростерла распростерлось распростертый распростершийся распространение распространенное распространены распространилась распространилось распространился распространить распространяется распространять распрях распугала распускается распускал распускала распускались распускать распустив распустившийся распустил распустила распустили распустился распустить распутает распутица распухшее распущенность распущенные распущены распылавшийся распяв распятие распять рассадить рассажены рассвет рассвета рассвете рассветет рассветом рассвирепев рассвирепел рассеет расселись рассердившийся рассердившись рассердилась рассердили рассердились рассердился рассердитесь рассердится рассердиться рассердятся рассержен рассерженный рассержены рассечь рассеялось рассеян рассеянна рассеянно рассеянности рассеянность рассеянностью рассеянны рассеянный рассеянным рассеяно рассеять рассеяться расскажет расскажете расскажешь расскажи расскажит расскажите расскажу расскажут рассказ рассказа рассказав рассказал рассказала рассказали рассказам рассказами рассказана рассказанный рассказать рассказах рассказе рассказов рассказом рассказу рассказчика рассказчиц рассказы рассказывает рассказываешь рассказывай рассказывайте рассказывал рассказывала рассказывали рассказывать рассказываю рассказывают расслабленного расслабленный расслабленным расследован расслушала расслушать расслышав расслышал расслышала расслышать рассматривавшего рассматривает рассматривается рассматривал рассматривала рассматривали рассматривания рассматривать рассматривают рассматривая рассмеетесь рассмешат рассмешил рассмешила рассмешили рассмешит рассмеявшись рассмеялся рассмеяться рассмотрев рассмотрение рассмотреть рассориться расспрашивайте расспрашивал расспрашивала расспрашивали расспрашивать расспрашивая расспросами расспросила расспросит расспросить расспросов расспросы расставаться расставив расставил расставила расставили расставлено расставляет расстаемся расстается рассталась расстались расстался расстанемся расстановисто расстановками расстановкой расстаться расстаются расстегнул расстегнуть расстелет расстилалася расстилались расстилаться расстояние расстоянии расстояния расстраивались расстроен расстроена расстроенного расстроенной расстроенную расстроенные расстроенный расстроено расстроены расстроивает расстроил расстроили расстроить расстроиться расстроишь расстроишься расстройства расстройстве расстройство расступись рассуди рассудив рассудил рассудила рассудилось рассудит рассудите рассудительна рассудительная рассудительнее рассудительный рассудить рассудка рассудке рассудкина рассудком рассудок рассуждает рассуждаете рассуждаешь рассуждал рассуждать рассуждаю рассуждают рассуждая рассуждение рассуждений рассуждению рассуждения рассуждениями рассчитав рассчитайте рассчитал рассчитали рассчитаны рассчитать рассчитываем рассчитывает рассчитываете рассчитывай рассчитывал рассчитывали рассчитывать рассчитываю рассчитывая рассыпается рассыпала рассыпалась рассыпались рассыпался рассыпчатые расталкивает расталкивать растаял растворена растворенных растворил растворилась растворились растворить растворяя растение растениями растерзанном растерявшаяся растерявшимся растерявшись растеряется растеряла растеряли растерялся растерян растерянно растерянные растеряют растет расти растлелось растолкала растолковал растолковать растолковывать растолкуйте растолкую растопалась растопчет растопыренных расторопного расторопный расточаемые расточает расточаю расточилась расточители расточительности растравлять растревожили растреклятого растреклятым растрепанная растрепанного растрепанную растроган растроганная растроганной растроганный растут растущего растущей растягивает растянет растянулись растянулся расфранченную расфуфыренных расхаживаешь расхаживают расхищения расхлебывают расхлябанные расход расхода расходились расходился расходится расходными расходы расходясь расходятся расхохоталась расхохотался расцвела расцвело расцвет расцветала расцветало расцветание расцветания расцветая расцветшей расчесанные расчесанный расчесывает расчесывая расчет расчета расчетам расчетах расчетливый расчетов расчетом расчету расчеты расчищаем расчувствовался расшалясь расшаталось расшевелили расшевелить расшиб расшибанья расшибет расшибется расширением расширены расширялась расширялся расширяя расщедрилась расщедрился ратую рать раут рафаэлевой рафаэлевских рафаэля рацею рационально рационального рациональный рачителен рачительно раю рая рвал рвало рвался рвание рваного рваной рванула рванулась рванулся рвануться рваные рвать рваться рвение рвет рвется рвешь рвешься рву рдеет рдели рдеют рдея реализме реализмом реальное ребенка ребенке ребенком ребенку ребенок ребеночек ребер ребра ребре ребят ребята ребятишек ребятишкам ребятишками ребятишки ребяческая ребяческий ребячества ребячий ребячьих рев ревель реверанс реви ревизор ревматизм ревматизмом ревнивая ревнивее ревниво ревнивой ревнивый ревновал ревновала ревновать ревности ревность ревностью ревнует ревнуя револьвер револьвером революции революционных революция ревом ревущих регистраторша регистраторши регре регулировать регулярно редактора редакции редакций редакция редеет редеть редкая редки редкие редкий редким редкими редких редко редкое редкой редкости редкость редька редьки редьку реет реже режет режу резались резанная резать резв резва резвая резвому резвость резвы резвые резвясь резвятся резедой резеду резеды резиденция резиновых резкая резкие резкий резким резкими резких резко резкой резком резкость резной резню резок резоны резоня результат результатах результате результатов результату результаты резцом резче резюме рейнгольду рейт рек река рекам рекамье реке реки рекой рекомендации рекомендацию рекомендация рекомендовал рекомендовать рекомендует рекомендуется рекомендую рекомендуя рекорд рекою ректор реку религией религии религий религиозно рельефностью рельс рельсах рельсы реляции реляцию ремень ремесла ремесленник ремесленники ремесленное ремеслу ремне ремни ренегате реникса ренную рент реомюра репетитором реплики репой репу репутацией репутации репутаций репутацию репы ресниц ресницами ресницах ресницы ресслих рессорную ресторан ресторана ресторане ретивое ретировавшегося реторту ретроград ретроградна рефлексом реформа реформы рехнулись рехнулся рецепт рецепта рецептов речами речах речей речи речисто речка речке речки речкой речку речной речь речью решает решаете решается решаешься решай решайтесь решал решала решалась решались решался решатся решать решаюсь решают решаясь решен решена решение решением решении решений решения решениям решенном решенный решено решетку решетчатого решетчаты решетчатые решетчатым реши решив решившегося решившей решившийся решившись решил решила решилась решили решились решился решим решимости решимость решимостью решит решите решителем решительная решительнее решительно решительного решительное решительном решительности решительную решительный решительным решится решить решиться решоткой решусь решь реяли ржавой ржавые ржавый ржавых ржаной ржи ризами ризой рим рима римляне римские ринется ринулся ринутся ринуться рис риск риски рискнул рискнула рискнули риском рисовал рисовала рисовало рисовалось рисовался рисования рисовать рисовой рису рисует рисуешь рисунка рисунков рисунок рисую рисуюсь рисуя ритм ритма ритмическое ритмы риторика риторики рифм рифмованную ро робел робела робели робенок роберте робеют робея робка робкая робкие робкий робким робко робкого робкой робком робок робостию робость робостью робче ровесник ровна ровнее ровнехонько ровно ровное ровной ровны ровные ровный ровным ровняет рог рога рогам рогами рогатого рогатые рогов роговой рогожею род рода родах роде родей роденьку роди родила родилась родились родилось родился родименький родимой родимый родина родине родиной родину родины родион родиона родионом родиону родионыч родителей родители родитель родительская родительский родительского родительском родителя родителями родится родиться родная родне роднейший родни роднила родного родное родной родном родную родные родным родными родных родню родня родов родом родства родственник родственника родственнике родственники родственников родственнику родственница родственницей родственницу родственницы родственного родственное родственной родственному родственные родственный родственных родство роду родька родьке родькин родэ родю родя родясь родятся роем роется роешься рожа рожами рождает рождается рождал рождали рождалось рождался рождают рождающейся рождающимся рождающихся рождение рождением рождении рождения рожденные рожденья рождества рождестве рождественном рождество рождеством рождеству роже рожи рожком рожок рожу рожь рожью роз роза розан розах розни розно розные розовеет розовеют розовея розовое розовой розовом розовые розовый розовыми розовых розой розоперстая розу розы роится рой рок рока роковая рокового роковое роковой роковые роковым рокот рокотание рокотов рокотом рокоты роланда ролей роли роль ром роман романа романах романе романистов романический романическим романического романов романович романовича романовичу романовна романовне романовной романовну романовны романс романсов романчик романы романыч романыча романычем романычу ромео рому роняет ронял ронять роняя ропот ропота роптал роптали роптать ропщу рос роса росах росинки росистую роскоши роскошная роскошнее роскошно роскошного роскошные роскошный роскошным роскошных роскошь роскошью росла росли росло росный росписные россией россии российский россию россия россияда россиянину рост роста росте ростовщика ростовщица ростом росту росы рот рошую роща рощах роще рощей рощи рощу роют роются рояле роялем рояль рояля роясь роящихся рт рта ртами ртом рту рты рубах рубахе рубаху рубашек рубашечку рубашечный рубашка рубашках рубашке рубашки рубашкой рубашку рубашонке рубежей рубежи рубец руби рубивших рубикон рубила рубили рубин рубини рубинштейн рубит рубить рубища рубище рубиянить рублев рублевым рублей рубликов рубль рублю рубля рублям рублями рублях рубнуть рубцы ругает ругается ругал ругали ругались ругался ругань ругательски ругательским ругательств ругательства ругательствами ругать ругаться ругают ругая ругаясь ругнул руда рудник ружье ружьем ружья рук рука рукав рукава рукавами рукавом рукам руками руках руке руки руководитель руководителя руководить руководств руководства руководствовался руководством руководствуясь рукой рукомесло рукомойнику рукописи рукопись рукоплесканий рукопожатий рукопожатья рукою руку руля румяна румяная румянец румянил румянит румянится румяное румяную румянца румянцем румяный румяными руне рус русака русалке русалкой русалкою русалочья руси русло русская русски русские русский русским русскими русских русско русского русской русском русскому русскую руссо русые русыми русь русью рутина рутине рутинеры рутину рухлядь рухнул рухнулась рухнуло рухнулся рухнут руце ручаетесь ручайтесь ручаюсь ручей рученьку ручищи ручка ручками ручки ручкой ручку ручном ручным ручонками ручонкой ручонку ручьем ручьи ручья ручьями рушатся рушились рушилось рыба рыбак рыбаками рыбачьих рыбинскую рыбой рыбу рыбы рыбьим рыдает рыдал рыдала рыдали рыдало рыдалось рыданием рыданий рыдания рыданиями рыданиях рыдать рыдают рыдающие рыдая рыжая рыжеватыми рыжие рыжики рыкающих рыло рылом рылся рылу рынкам рынках рынке рынком рынок рысак рытвину рыться рыхлый рыцарей рыцари рыцарь рычага рычание рычаньем рыщет рыщу рьяно рю рюмка рюмке рюмки рюмкой рюмку рюмочки рюмочку рюшем рябая рябило рябину рябого рябчика рябчики рябчиков рябь ряд ряда рядах рядов рядом ряду ряды рядышком рядят ряженые ряженых рязанские с саблей сабли саблю саван саване савиновых савич савича савишна саврасая савраске савраской савраску сад сада садами садик садике садику садилась садился садись садитесь садится садить садиться садки садов садовник садовника садовнику садовой садовского садовскому садовую садовый садовых садом саду садясь садятся садящееся сажает сажал сажала сажать сажаю сажают сажая сажен сажени сажень саженях сажусь сайкой сайку саксонии саксонских саксонского салазки салат сале сало салом саломеи саломея салон салоне салонов салоном салоп салопа салопе салопов салопом салопы салфетке салфетку сальная сальной сальный сам сама самая сами самим самих само самовар самовара самоваре самоваров самоваром самоварчик самовлюбленностию самовольно самого самодержавный самодовольное самодуров самое самозабвением самозабвения самозабвенья самой самойлу самолично самолюбив самолюбивая самолюбивой самолюбивы самолюбивый самолюбивых самолюбие самолюбием самолюбия самом самомалейшему самомнение самому самонаслаждением самонужнейшее самоотвержении самоотвержения самоотверженным самопожертвования самосознания самосохранения самостоятельная самостоятельно самостоятельного самостоятельной самостоятельности самостоятельностию самоубийств самоубийства самоубийство самоубийством самоубийца самоуверенная самоуверенно самоуверенности самоуверенность самоуверенностью самоунижения самоуправство самою самую самые самый самым самыми самых сана сандалий саней санки санкт сановником сапог сапога сапогам сапогами сапогах сапоге сапоги сапогов сапогом сапожникам сапожной сарае сараев сараи сарай саранча саратов сарафане сарая сараям сараях сарказма сарказмами сарказмов саркастически саркастическую саркофаги саркофагом сатанинский сатир сатира сатире сатиров сатиру сатиры сафьянные сафьяном сахар сахарная сахарница сахарницу сахарные сахару сбавка сбавки сбалмошные сбегает сбегаешь сбегай сбегала сбегались сбегать сбегаю сбежал сбежала сбежались сбежался сбережения сбереженные сбережет сберечь сбивает сбиваете сбивается сбиваешься сбивали сбивался сбивать сбивают сбиваются сбивающих сбиваясь сбивчиво сбивчивую сбившейся сбившеюся сбившись сбил сбилась сбили сбило сбился сбирались сбирался сбит сбитая сбитый сбитых сбить сближались сближался сближение сближению сближения сблизил сбоку сболтнул сборник сборнике сборное сбрасывали сбрасывая сбредить сбрил сбрили сброду сбросил сбросила сбросили сбросить сбрось сбрую сбруя сбудется сбываться сбывают сбывающихся сбывшимся сбыл сбылось сбыта сбыть сбыться сва свадеб свадьба свадьбе свадьбой свадьбу свадьбы сваливается сваливал свалил свалилась свалился свалит свалится свалить свалял свалят сварили сварить сватался сватают сватаясь сватовства сватовство све сведений сведения сведениями сведенная сведете сведи сведущих свеж свежа свежая свежего свежее свежей свежем свежему свежеодетым свежепросоленные свежести свежесть свежестью свежею свежие свежий свежим свежими свежих свежо свезти свезу свезут свел свели свергнуть сверка сверкавшие сверкает сверкала сверкали сверканий сверканьи сверкать сверкающие сверкающим сверкающими сверкнет сверкнув сверкнувшее сверкнувший сверкнул сверкнула сверкнули сверкнуло сверкнут сверлящих свернет свернется свернула свернули свернулись свернулся свернутого свернуть свертков свертывает свертывали свертывают сверх сверху сверхъестественное сверхъестественный сверчок свершай свершать свершая свершив свершил свершилось свершился свершится свершу сверь свесив свести свет света светает светать свете светел светелке свети светик светил светила светилась светились светило светилось светит светится светить светла светлая светлее светлей светлела светленькие светленькое светлице светлицу светло светловыбритого светлого светлое светлой светлом светлою светлую светлы светлые светлый светлым светлыми светлых светляка световой светозарный светом светопреставление светская светский светскими светских светской светском светскому свету светы светятся светящими свеч свеча свече свечей свечи свечка свечке свечки свечкой свечку свечными свечой свечу свешивалась свивает свидание свидании свиданий свиданию свидания свиданиях свиданья свидетеле свидетелей свидетелем свидетели свидетель свидетельница свидетельский свидетельства свидетельстве свидетельство свидетельствовала свидетельствовать свидетельством свидетельствую свидетельствуют свидетельствуя свидетеля свидетелями свидетелях свидригайлов свидригайлова свидригайлове свидригайлову свидригайловы свидригайловым свидригайловых свинец свинкин свинства свинство свинтус свинцовый свинцовым свинцом свиным свиньи свинья свирбею свирели свирель свирепо свирепость свирепою свирепствует свисают свист свисте свистела свисти свистит свистки свистнет свистнула свисток свистя свита свиток свиты свищешь свобода свободе свободен свободна свободная свободнее свободно свободного свободное свободной свободном свободною свободную свободны свободный свободным свободу свободы свод свода своде сводил сводила сводит сводить сводная сводя свое своевольного своеволья своевременно своевременным своего своей своем своему своею свозили свои своим своими своих свой свойски свойств свойства свойствам свойствах свойстве свойственен свойственная свойственно свойственной свойственную свойственным свойство сворачивал своротит своротишь сворочен свою своя свысока свыше свяжем свяжись связав связал связали связался связан связана связанную связанных связать связаться связей связи связке связных связывала связывало связываться связь связью связям связями святая святейшая святках святки свято святого святое святой святок святому святую святые святым святыми святынею святыни святынь святых священная священник священника священником священнику священной священную священный священным священных сгиб сгибался сгинешь сгинула сгинуть сгинь сглаживался сглазим сглазить сглупа сглупил сгнил сгниют сговаривались сговариваются сговорившись сговорилась сговорились сговоримся сговориться сговорятся сгонит сгоняли сгоняют сгорал сгорало сгорая сгорбленные сгорбленный сгорел сгорела сгорело сгореть сгорит сгоряча сготовила сгреб сгруппировались сгубил сгубила сгубили сгубило сгущавшейся сгущавшихся сгущались сгущающейся сдав сдавило сдает сдается сдал сдано сдать сдач сдача сдачи сдачу сдаю сдаются сдвигала сдвинешься сдвинув сдвинул сдвинулась сдвинулись сдвинулся сдвинусь сдвинуты сдвинутых сделав сделаем сделает сделаете сделается сделаешь сделай сделайте сделал сделала сделалась сделали сделались сделалось сделался сделан сделанная сделанное сделанный сделано сделаны сделать сделаться сделаю сделают сделаются сделку сдержав сдержал сдержали сдержанная сдержанно сдержанной сдержанностью сдержанны сдержанный сдержанным сдержанных сдержать сдерживает сдерживал сдерживала сдерживали сдерживались сдерживался сдерживать сдерживаться сдерживая сдерживаясь сдержу сдернет сдернул сдерут сдесь сдобные сдружусь сдует сдурил се себе себя себятешение севастополе севастополь севастьяныч севе север севера северного северной северный северу севшей сегидилья сего сегодня сегодняшней сегодняшнему сегодняшние сегодняшний сегодняшняя седая седеть седеют седея седла седлах седого седое седой седок седоков седому седою седые седым седыми седых седьмая седьмого седьмом седьмую сеет сезон сей сейчас секи секирой секрет секретами секретарем секретарше секретаршу секретарь секретарю секретаря секрете секретного секретных секретов секрету секреты сектант секунд секундант секунду секунды секут секучим секущих сел села селадон селам селе селедка селедкой селедку селения селенья сели селиться село селу сельди сельский сельской сельтерскую сельцо селянку сем семафор семафоре семгой семейного семейное семейной семейном семейною семейные семейный семейным семейных семейств семейства семействе семейство семейством семейству семен семена семени семенила семенит семенов семенович семеновича семеновиче семеновичем семеновичу семеновна семеновне семеновной семеновну семеновны семеном семену семеныч семеныча семи семидесяти семидесятых семик семика семилетний семинарии семинарист семинариста семинаристов семицветной семнадцати семнадцать семой семом сему семь семьдесят семье семьей семьи семьсот семью семья семьянин семя сен сена сенат сенате сенатора сенатору сенатским сенатского сенату сеней сени сенная сенной сенную сенным сенных сено сеновал сеновала сеновале сеновалу сенокос сеном сентенции сентенций сентенциозно сентенцию сентенциями сентиментальности сентябре сентябрь сентября сень сенью сенях септимия сера серафимы серая сервирован сервированный сергевна сергевне сергевну сергеевич сергеевна сергеевне сергеевной сергеевну сергеич сергию сердечко сердечнее сердечно сердечного сердечной сердечные сердечный сердечными сердечных сердилась сердился сердимся сердись сердит сердита сердитая сердитесь сердито сердится сердитый сердиться сердишься сердобольная сердолики сердца сердцами сердцах сердце сердцем сердцу сердятся серебра серебре серебристо серебристые серебро серебром серебряков серебрякова серебрякову серебряная серебряника серебряной серебряную серебряные серебряный серебряным серебряных серег середи середина середине серединных середину середины середы сережками сережках серей серенькое сержусь серий серо серовато серого серое серой сером серпами серую серчал серы серые серый серыми серых серьгами серьги серьезная серьезнее серьезно серьезного серьезное серьезной серьезном серьезною серьезную серьезны серьезные серьезный серьезным серьезных сестер сестра сестрам сестре сестрино сестрину сестриц сестрица сестрице сестрицей сестрицу сестрицы сестрой сестру сестры сесть сети сетка сетке сетку сетовал сеттиньяно сеть сетях сечет сечи сечь сею сеют сея сжав сжавшись сжал сжала сжалась сжалилось сжались сжалится сжалиться сжало сжалось сжался сжато сжатую сжаты сжатые сжатый сжатыми сжевать сжег сжечь сжигающем сжимал сжимала сжималась сжимали сжимались сжимаю сжимают сжимая сзади сибарит сибариты сибири сибирке сибирь сивиллины сиволапым сигар сигарету сигарой сигару сигары сигать сигнал сидевшая сидевшего сидевшему сидевший сидевшим сидел сидела сидели сидело сиделось сиденье сидеть сиди сидим сидит сидите сидится сидишь сидя сидят сидящая сидящей сие сиена сиенский сиенского сижу сизая сизой сизые сикстинской сил сила силам силами силах силача силе силен силилась силился силится силой силою силу силушки силуэтом силы сильва сильна сильная сильнее сильней сильнейшее сильно сильного сильное сильной сильном сильною сильную сильны сильные сильный сильным сильных силясь сим символ символами символизма символизмом символизму символистов символических симеонов симеонова симеоновых сими симметрически симметрично симпатией симпатии симпатичен симпатичная симпатичной симпатичны симпатичный симпатию симпатия симплон симптомы синевой синеву синевы синего синее синей синем синенькие синеньким синенькою синеокая синечерною синеющих синие синий синим синими синих синодальный синодальных синонимы синтаксис синь синьора синьоры синюю синяках синяя сипел сипенье сипеньем сипеть сиплая сипло сиплую сиплым сиплыми сиреневая сиреневой сиреневых сирени сиреной сирень сирины сириус сиропа сирот сирота сиротами сиротка сиротливо сиротливой сиротские сиротских сиротском сироты систем система систематическом системе систему системы сите ситху ситцевое ситцевом ситцевую ситцевыми ситцевых ситцем сих сию сия сияет сияешь сиял сияла сияли сияло сияние сиянием сиянии сияния сияньем сияньи сиянья сиятельство сиять сияют сияющей сияющие сияющий сияющими сияющих сияющую скабрезный скажем скажет скажете скажешь скажи скажите скажу скажут сказ сказав сказавшись сказал сказала сказали сказались сказалось сказаль сказаниям сказаниями сказанного сказанное сказанном сказанные сказанный сказано сказаны сказать сказка сказкам сказками сказках сказке сказки сказку сказок сказочник сказочный сказывай сказывайте сказывал сказывала сказывали сказывать сказываться скакал скакала скаканье скаканьем скаканья скакать скакнет скакнуть скал скалами скалдырничество скале скалистый скалить скалишь скалкой скалой скалою скалу скалы скальда скалясь скамеечка скамеечке скамеечку скамей скамейка скамейке скамейки скамейку скамье скамьи скамью скамья скамьях скандал скандалезнейшем скандалезное скандализируя скандально скандальному скандируя сканированный скаредной скаредном скаредную скатерти скатерть скатертью скатилась скача скачет скачка скачков скважине скважину скверен скверная сквернее сквернейшего сквернейшим скверно скверное скверном скверность скверные скверный скверным скверных сквозила сквозит сквозить сквозного сквозняках сквозь сквозя сквозящий скворцов скворцом скворцы скелет скептик скидывать ский скинет скинуть скиньте скит скитайся скитались скитался скитальцам скитальцев скитальцу скитанья скитаться скитов скиты склад склада складам складенные складка складках складке складки складкой складку складно складной складок складываете складывались складывая склепе склона склоне склонен склоненный склони склонив склонившейся склонившись склонил склонила склонилась склонили склонилось склонился склоним склонится склонить склонность склонны склону склоню склоняет склоняется склоняла склонялись склонясь склоняюсь склоняют склоняющийся склоняясь склубясь склянка склянками склянки скобками скобках сковород сковорода сковороде сковородке сковороду сковороды сковывал ского ской сколотились сколоченной сколоченных сколь скользила скользит скользить скользкие скользких скользкой скользнет скользнул скользнула скользнуло скользнуть скользя скользят скользящей скользящий скольких сколько скольку ском скомкала скомканная скомканное скомпрометировать сконфуженно сконфуженный сконфужены сконфузившаяся сконфузившийся сконфузившись сконфузилась сконфузился сконфузит сконфузитесь скончался скопившегося скопившейся скопить скопленных скоплено скорбей скорбеть скорби скорбит скорбная скорбного скорбное скорбной скорбную скорбь скорбя скорбями скорее скорей скоренько скорлупами скорлупу скорлупы скорняки скоро скорого скороговорке скороговоркой скорое скорой скором скорому скоропостижно скорчившись скорый скорым скорыми скосил скот скота скотина скотининых скотины скотнице скотницу скотный скотоводство скотта скошенную скошено скрадывала скрасить скребет скребла скрежет скрежета скрежетал скрежеща скрежещущий скрепил скрепилась скрепился скрестив скрестил скривив скривил скривило скривя скрижали скрижалями скрип скрипач скрипел скрипели скрипи скрипит скрипка скрипкам скрипками скрипке скрипки скрипкой скрипнула скрипок скрипом скрипу скрипучие скрипучий скрипучим скрипят скроем скроен скроет скроется скромен скромная скромно скромной скромности скромною скромную скромные скромный скрутило скручена скрыв скрываемая скрываемого скрываемых скрывает скрываете скрываешься скрывайся скрывал скрывалась скрывали скрывались скрывать скрываться скрываю скрываются скрывая скрываясь скрыл скрыла скрылась скрыли скрылись скрылся скрыпя скрыт скрыта скрытна скрытном скрыто скрытой скрытому скрытою скрытую скрыты скрытый скрытых скрыть скрыться скрючившись скрючился скряга скуден скудной скудном скудную скудны скудный скует скука скуке скуки скукой скуку скулах скулистое скульптору скуп скупа скупает скупал скупо скупости скупость скупцом скупы скучает скучаете скучаешь скучай скучал скучала скучать скучаю скучают скучающему скучая скучен скученное скучища скучна скучная скучнее скучнейшие скучно скучного скучное скучной скучном скучную скучны скучные скучный скушали скую слаб слаба слабая слабее слабеет слабел слабенькая слабенькие слабенькою слабея слабо слабого слабое слабой слабом слабонервная слабосилие слабости слабостию слабость слабостями слабою слабы слабые слабый слабым слабыми слабых слава славе славит славить славная славненькая славно славном славны славные славный славным славных славой славу славы славянофил славянский славянском слагает слагается сладить сладишь сладкая сладкие сладкий сладким сладких сладко сладкое сладкой сладкою сладок сладостно сладостное сладостной сладостных сладострастие сладострастием сладострастного сладострастный сладость сладчайший сладчайшую слаж слаженный сластию сласть слать слаще слева слег слегка слегла след следа следам следами следившего следил следила следили следим следит следите следить следов следовал следовала следовали следовало следователей следователи следователь следовательно следователю следователя следователями следовать следственно следственных следствие следствии следствия следует следующее следующей следующем следующие следующий следующих следующую следуя следы следя следят следящий слежу слез слеза слезам слезами слезах слезинками слезинки слезла слезно слезой слезши слезы слезьми слеп слепа слепнуть слепо слепого слепой слепому слепота слепоте слепы слепых слепя слепящей слесарное слесаря слесарями слетает слетала слетаю слетел слетели слетело слететь слетит слив сливаешься сливались сливалось сливался сливами сливках сливки сливок сливочками слившиеся слилась слились слипаются слитых слить слиться сличить слишком слиянием слов слова словам словами словаре словах слове словесная словесное словечка словечкам словечки словечко словно слово словом словоохотливо словоохотливый словоохотнее слову словцо слог слога слоге слогом слоев слоем сложа сложен сложена сложение сложения сложенного сложенную сложенный сложенными сложены сложив сложившиеся сложил сложилась сложили сложились сложился сложиль сложим сложить сложиться сложнее сложно сложного сложный сложных слоистому слоистые сломал сломалась сломали сломались сломана сломать сломают сломила сломили сломит сломя слоновой слоняюсь слою слоями слуг слуга слугам слугами слуги слугой служака служаки служанка служанкам служанке служанки служат служащего служащий служб служба службе службой службу службы служебные служение служения служи служившая служившей служивший служившую служивый служил служила служили служились служим служит служите служить служишь служу слух слуха слухи слухом слуху случа случае случаев случаем случается случаи случай случайная случайно случайного случайное случайной случайному случайностей случайности случайность случайностью случайностями случайною случайные случайный случайным случайных случались случалось случался случаю случаются случая случаям случаях случившееся случившемся случившимся случилась случились случилось случился случись случится случиться слушавшая слушавшей слушавшие слушавший слушает слушаете слушается слушаешь слушай слушайся слушайте слушал слушала слушалась слушали слушался слушателе слушателей слушатель слушательниц слушателя слушателями слушать слушаю слушаюсь слушают слушающей слушая слывя слыл слыла слыхав слыхал слыхала слыхали слыханное слыхано слыхать слыхивал слыхивала слыхивали слыхом слыша слышал слышала слышалась слышали слышались слышалось слышался слышамши слышанное слышанные слышанный слышат слышатся слышать слышен слышим слышит слышите слышится слышишь слышна слышнее слышней слышно слышны слышный слышным слышу слышь слюбится слягу слякоть смазанные смазано смазаны смазать смазлива смазные смак смастерила сматывалось смахнет смахнул смахнула смеет смеете смеетесь смеется смеешь смеешься смеженные смежил смежную смей смейся смейте смейтесь смекали смекнул смел смела смелая смелее смелей смели смело смелой смелость смелостью смелую смелы смелые смелый смелым смель смене смени сменил сменились сменилось сменился сменится сменить смену сменяемых сменяла сменялась сменялись сменялось сменять сменяются смердит смерить смеркается смеркаться смерклось смертей смертельно смертельною смертельную смертельных смерти смертию смертная смертного смертной смертном смертную смертный смертным смертоубийству смерть смертью смерч смести смесь сметаешь сметал сметания сметаной сметаны смети сметливая смету сметы сметь смех смеха смехе смехом смеху смешавшись смешал смешала смешалась смешались смешанные смешанный смешения смешивал смешивала смешивался смешиваясь смешил смешит смешить смешлив смешливая смешливого смешливых смешная смешнее смешно смешного смешное смешной смешную смешны смешные смешным смешными смешон смею смеюсь смеют смеются смеющееся смеющейся смеющиеся смеющимися смея смеялась смеялись смеялось смеялся смеясь смеяться смигнув смирен смиренен смирением смиренная смиренней смиреннейший смиренно смирит смирительный смирится смириться смирно смирному смирные смирный смирных смирю смирялась смирялся смиряются смог смогла смогли смогу смоет сможем сможет смоле смоленском смолист смолк смолкает смолкай смолкали смолкло смолкнет смолой смолол смолчал смольный сморил сморила сморкается сморкался смородинной смородинном смородинную смородинные смородинным смородиновку смородиновой смородиновую сморозил сморщенной сморщены сморщившись сморщится смотр смотревшего смотревшее смотревшей смотревшую смотрел смотрела смотрели смотрело смотрель смотреть смотри смотрим смотрит смотрите смотрителя смотрится смотришь смотрю смотря смотрят смочена смоченную смочены смою смрад смрадною смрадную смрадом смуглая смугловатый смуглое смуглом смуглою смуглые смуглый смуглым смуглых смутен смутившись смутила смутилась смутился смутить смутно смутное смутной смутном смутные смутный смутным смутных смутясь смущавшей смущает смущается смущайся смущал смущала смущали смущались смущался смущать смущаться смущаю смущаюсь смущаясь смущен смущена смущение смущением смущении смущения смущенная смущенно смущенный смущенным смущеньи смущенья смываешь смывать смывая смыкала смыкая смысл смысла смысле смыслит смыслишь смыслом смыслу смысль смысля смыслят смытый смыть смычкам смычки смычок смягчают смягчающие смягчая смягчению смягчившись смягчился смягчить смял смят смятение смятенная смятенье смятеньи смятой смятые смятый сна снабжает снадобья снам снами снаружи снастью снах сначала сне снег снега снегами снеговая снеговой снеговою снеговую снегом снегу снедает снедающий снедая снежинок снежки снежком снежку снежная снежно снежной снежном снежному снежную снежные снежный снежных снес снесем снесешь снеси снесите снесла снести снизойти снизошла снизу снилась снились снилось снился снимает снимаете снимаешь снимала снималась снимали снимать снимаю снимают снимая снимет снимитесь сниму снисходителен снисходительна снисходительная снисходительно снисходительного снисходительной снисходительном снисходительностью снисходительною снисходительную снисходительным снисхождение снисхождением снисхождения снится сниться снов снова сновал сновать сновидений сновидцу сном сноп снопы сноровки сносили сносить сносное сносному сносные сносный сношение сношений сношения сну снурке снурку снурок снуют снующих сны снюхались снял сняла сняли снят снята снято снятся снять со собак собака собакам собаками собаке собаки собаку собачий собачкой собачонка собачонку собачонок собачьей собачьи собачьих соберет соберется соберу соберусь соберутся собеседника собеседниками собеседников собеседником собираемся собирает собираетесь собирается собираешь собираешься собирала собиралась собирали собирались собирался собирательным собирателя собирать собираться собираюсь собираются собираясь соблазненная соблазнитель соблазнительная соблазнительно соблазнительное соблазнительною соблазнительную соблазнительные соблазнительным соблазнительных соблазнителям соблазнить соблазнял соблазняю соблазняют соблазняя соблюдал соблюдаю соблюдения соблюсти собой соболезнованием соболезнуя собор собора соборе собору собою собрав собравшийся собравшимися собравшись собравшихся собрал собралась собрали собрались собрался собран собрание собрании собранию собрания собранье собрат собрать собраться собственная собственно собственного собственное собственной собственном собственному собственноручно собственноручное собственность собственную собственные собственный собственным собственными собственных событие событии событий события событиям событиями событиях собьет собьешь собьешься собьют сова совал соваться совершаемой совершает совершается совершал совершала совершались совершалось совершать совершаются совершающиеся совершения совершенная совершеннейшем совершеннейший совершенно совершенного совершенное совершеннолетия совершенном совершенному совершенною совершенный совершенным совершенными совершенных совершено совершенства совершенство совершенствуя совершившийся совершил совершила совершилось совершит совершить совести совестился совестится совестливо совестно совесть совестью совет совета советами совете советник советника советники советником советнику советов советовал советовала советовались советовался советовать советоваться советом совету советует советуйте советуют советы совещались совещание совещаться совиные совладал совладеть совлечет совмещаются совокупного совокупность совпадает совпадать совпадение совпадением совпадений совпало соврав соврал совралась соврать современно современного современное современный современными соврешь совсем совьется согбен согбенный согласен согласие согласии согласилась согласились согласился согласись согласитесь согласится согласиться согласишься согласия согласна согласная согласно согласны согласный согласятся соглашаетесь соглашается соглашалась соглашались соглашался соглашаться соглашаюсь соглашение соглашении соглашусь соглядатай согнать согнется согнув согнувшись согревать согреваться согрелась согрет согрето согреться согрешил согрешишь согрею содержал содержание содержанием содержании содержания содержатель содержать содержит содом содрал содрогалась содрогался содроганием содрогания содрогнется содрогнулся содрогнуться соды соединенную соединенным соединил соединилась соединилось соединимся соединить соединяющая сожалел сожаление сожалением сожалению сожаления сожаленья сожалеть сожалею сожалея сожгло сожжен сожжена сожженные сожженный сожжено сожителе сожительницы сожителя сожмет созвал созваны созвездиям созвучий создавали создавать создал создала создан создана создание созданием созданию создания созданная созданного созданной созданном созданную созданный созданным создано созданы созданьем созданья создательница создать создают создающими созерцание созерцании созерцания созерцательной созерцать сознав сознавал сознавала сознавалось сознавать сознаваться сознавая сознает сознаетесь сознается сознайся сознайтесь сознал сознала сознался сознание сознанием сознании сознанию сознания сознанье сознаньем сознанья сознательная сознательнее сознательно сознательные сознательным сознательных сознать сознаться сознаю сознаюсь сознаются созревает созревало созревшей созреет созрел созрела созрели созрело созреть созывали сойдемся сойдет сойдется сойдешь сойду сойдусь сойдутся сойдясь сойти сойтись сок сока соки сокол соколиными соколов соколы сократить сокращают сокращение сокращения сокращены сокровенные сокровенный сокровенных сокровищ сокровища сокровище сокройся сокрушаться сокрушением сокрытый солгал солгать солгу солдат солдата солдатам солдатка солдатом солдатскими солдатское солдатской солдаты солений соленого соленому соленый соленым соленье соленья солжет соли солиднее солиднейший солидно солидной солидному солидностью солидный солидным солидными солнечно солнечного солнечной солнечный солнечным солнечных солнца солнце солнцем солнцу солнышко соловей соловьев соловьи соловьиная соловьиного соловьином соловьиный соловья солома соломе соломенной соломинку солонами солонину солонины солонка солонкой солонку соль сольнес сольнеса солью сольются сом сомкнет сомкни сомкнутых сомкнуть сомневаетесь сомневаешься сомневайтесь сомневался сомневаться сомневаясь сомнение сомнением сомнений сомнению сомнения сомнениями сомнителен сомнительная сомнительно сомнительного сон соне соней сонетку сонечка сонечке сонечки сонечкин сонечкина сонечкиной сонечкой сонечку сони сонина сонливости сонливые сонм сонмы сонная сонник сонно сонного сонное сонной сонному сонною сонную сонные сонный сонным сонными сонных сонь соню сонюшка соня соображает соображал соображали соображать соображаю соображают соображая соображение соображений соображения соображениям соображениях сообразив сообразившись сообразил сообразила сообразит сообразите сообразить сообразишь сообразно сообразные сообразный сообразят сообщает сообщается сообщала сообщались сообщать сообщаю сообщая сообщению сообщено сообщества сообщи сообщил сообщила сообщили сообщился сообщим сообщите сообщительнее сообщительный сообщить сообщу соответственного соответствовала соответствует соответствующими соперник соперника соперников соперницы соперничать сопеть сопит сопля сопляк сопоставить сопоставление сопоставлении сопоставлять соприкасались соприкосновений соприкосновения сопричастные сопровождаемая сопровождаемое сопровождаемый сопровождает сопровождается сопровождала сопровождалась сопровождался сопровождать сопровождаться сопровождают сопровождении сопротивление сопротивлений сопротивлялась сопротивлялся сопротивляться сопряженные сопутников сопутствовал сопутствующих сопят сор соразмеряла соратник сорвал сорвала сорвалась сорвали сорвалось сорвался сорвана сорванец сорванным сорвать сорвет сорвется сорившему сорила сорин сорина сорину сориным сорить сорная сорок сорока сорокалетний сороковой сороковых сором соромники сорта сортировок сортов сору сосал сосед соседа соседей соседи соседнего соседней соседних соседняя соседстве соседство соседству соседу соседям сосен сосет соси соскандалит соскочил соскочить соскучились соскучился соскучитесь соскучишься сослаться сословия сослуживец сослуживцев сослуживцу сослуживцы сосновка сосновке сосновки сосновый сосну соснул соснуть сосны сосредоточеннее сосредоточенное сосредоточенной сосредоточенности сосредоточенность сосредоточенный сосредоточенным сосредоточены сосредоточивалось сосредоточивался сосредоточивая сосредоточившихся сосредоточилась сосредоточилось сосредоточится сосредоточить сосредоточиться состав составе составил составила составили составит составится составить составишь составлен составлял составляли составлялись составляло составлять составляю составляют составляющих составов состареюсь состарившимся состарилась состарилось состарился состариться состоим состоит состоите состоится состоишь состою состоявшийся состоял состояла состоялись состояло состоялось состоялся состояние состоянием состоянии состояний состоянию состояния состоять состоящая сострадай сострадание состраданием сострадания сострить состряпала состряпали состязались сосуд сосуде сосульках сосчитав сосчитает сосчитаешь сосчитал сосчитала сосчитать сот сотая сотворил соткать соткутся сотни сотню сотри сотрясался сотрясающий сотрясая сотрясения сотый соус соусниками соусов соусу софа софе софи софистика софой софочка софочкой софочку софье софьей софьи софью софья сохнут сохранение сохранения сохрани сохранившее сохранившиеся сохранившийся сохранившими сохранившихся сохранил сохранилась сохранились сохранился сохранить сохранишь сохранявшегося сохраняет сохраняете сохраняется сохранял сохраняло сохраняют сохраняя соху социалистов социальная социального социальную социальные социальный социальных соцьялизме сочетаешь сочетались сочинена сочинение сочинении сочинений сочинения сочинениях сочинился сочиниль сочинителей сочинители сочинитель сочинителя сочинить сочиняли сочинять сочиняю сочиняют сочла сочли сочная сочной сочтены сочтет сочтите сочувственно сочувствий сочувствию сочувствия сочувствовал сочувствовать сочувствую сочувствующий сочувствуя сошедшихся сошел сошелся сошла сошлась сошлется сошли сошлись сошло сошлось сошлются сошьет сошью сощуришь сощуря спавшего спавший спадать спадающая спадая спадет спазму спал спала спалена спаленных спали спало спалось спальне спальней спальни спальной спальню спальня спанье спанья спаржа спаржи спаржу спас спасаешь спасала спасалась спасем спасен спасение спасением спасения спасенный спасенных спасены спасенье спасенья спасет спаси спасибо спасите спасительнейшее спасительницы спасительно спасительного спасительной спасительный спасся спасти спастись спасу спасши спать спаяешь спб спектакль спектакля спекулянт спела спеленутое спели спелое сперва спереди сперлось спермацетной спертый спесивая спеть специальности специальный спеша спешат спешащих спеши спешил спешила спешили спешит спешите спешить спешишь спешу спи спим спина спине спинка спинке спинкой спинку спинного спинном спиной спиною спину спины спирается спиралось спиралью спиридоныч спиридоныча спирт спирта спиртами спирту спирты списки списков список спит спите спитой спится спитым спицей спицы спичей спичек спичка спички спишь сплав сплело сплелось сплести сплетен сплетенных сплетеньи сплетней сплетни сплетник сплетня сплетутся сплин сплошным сплошь сплю спляши сподручница сподряд споем споемте споет спой спойте спокоен спокой спокойна спокойная спокойнее спокойно спокойного спокойное спокойной спокойном спокойною спокойный спокойным спокойствие спокойствием спокойствии спокойствию спокойствия сполетском сполз сползает сползаю сползая сполна спор спора спорах споре спорил спорили спорим спорит спорится спорить споришь спорную споров спору споры спорю споря спорят способ способами способе способен способна способнее способно способное способностей способности способность способностью способностям способностями способностях способны способные способный способным способных способом способствовал способствовала способствовало способствовать способствует способу споспешествования споткаюсь споткнулась споткнулись спотыкаясь спохватившись спохватился спою справа справедлива справедливая справедливее справедливей справедливо справедливое справедливом справедливости справедливость справедливостью справедливые справится справить справиться справишься справки справляйтесь справлялась справлялся справляться справляю справок справочку справьтесь справят спрашивает спрашиваете спрашиваешь спрашивай спрашивайте спрашивал спрашивала спрашивали спрашивать спрашиваю спрашивают спрашивая спро спровадить спрос спроси спросив спросил спросила спросили спросим спросит спросите спросить спросишь спросонья спросят спрошу спруты спрыгну спрыгнувших спрыгнул спрятав спрятал спрятала спряталась спрятали спрятались спрятался спрятана спрятанный спрятано спрятаны спрятать спрятаться спрячется спрячусь спрячьте спугнет спугнув спугнул спугнуть спуск спускавшая спускавшаяся спускает спускал спускала спускалась спускать спускаться спускаю спуская спускаясь спустившись спустил спустила спустились спустился спустим спустить спуститься спустя спутал спутан спутанных спутник спутника спутники спутнику спущены спущу спьяна спьяну спят спятил спящего спящей спящие спящий спящим спящих спящую сравнен сравнение сравнении сравнения сравнивать сравнила сравнительно сравниться сражает сражается сражениях сраженный сразу срам срама срамил срамила срамится срамить срамишься сребреников сребристой сребристый сребристым среда средам среде среди средиземном средине средины среднего средней среднем средний средним средних среднюю средой средств средства средствами средствах средствие средство средством средству среду среды средь срезал срезала срезана срезать сродни срок срока сроках сроке сроки сроком сруб срубить срывает срываешь срывался срывая срываясь сряду ссадил ссиня ссор ссора ссорами ссоре ссорилась ссорились ссорился ссоримся ссорится ссориться ссору ссорятся ссылалась ссылался ссылают ссылаясь ссылка ссылками ссылке ссылки ссылкой ссыльно ссыльные ссыпано ста став ставало ставил ставила ставились ставило ставит ставите ставить ставка ставлю ставней ставни ставнями ставших ставь ставьте ставя ставят ставятся стад стада стадвадцатипятирублевой стадом стай стакан стакана стаканами стакане стаканов стаканом стаканчик стаканчика стаканчиками стаканчики стаканчику стаканы стакнулся стал стала стали сталкивала сталкиваться сталкиваются сталкивающихся сталкиваясь стало сталось сталь стальная стальном стальным стальными стальных сталью стами стан стана стане станем станет станете станется станешь станислава станке становившиеся становилась становились становилось становился становимся становитесь становится становитца становиться становишься становлюсь становьем становясь становятся станом стану станут станцией станции станцию станция стань станьте стар стара стараетесь старается старалась старались старался старанием старания стараниям стараниями стараться стараюсь стараются старая стараясь старее стареем стареемся стареньком старец стареющей стареющий старик старика старикам стариками старики стариков старику старилась старина старине старинная старинное старинной старинном старинному старинную старинные старинный старинным старинными старину старины старится старичка старичке старички старичок старичонка старо старого старое старой старом старому староста старосте старости старостой старосту старосты старость старою старух старуха старухе старухи старухина старухиной старухиному старухиною старухину старухины старухиными старухой старуху старушечий старушечьим старушка старушками старушке старушки старушкой старушку старушонка старушонке старушонки старушонку старушонок старую старца старцем старцы старческий старческом старческому старшая старше старшей старшенькая старшею старшие старший старшим старших стары старые старый старым старыми старых стаскивал статей статейка статейки статейку статен стати статистикой статная статочное статские статский статским статского статуй статую статуя стать статье статьи статься статью статья статьях сташная стащил стащила стащит стащить стаю стая ствола стволов стеблях стеганые стегнула стезя стекавшую стекла стеклами стеклах стекле стекло стеклом стеклу стеклянная стеклянную стеклянные стеклянный стеклянным стеклярус стелется стелющемся стелятся стемнело стен стена стенам стенами стенанье стенах стене стенке стенкой стенку стенные стенных стеной стену стены степанида степаниду степанов степеней степени степенно степень степи степная степной степном степную степным степных степь степью степями степях стер стерва стерегла стерегли стерегут стережет стереотипном стереотипных стереть стеречь стерляжьей стерпел стерт стертых стеснений стесненная стесненно стесненное стесненной стеснены стеснило стеснилось стеснительно стеснить стесняет стесняйтесь стеснял стеснялись стеснялся стеснять стесняя стесняясь сти стиле стилистические стиль стимер стирает стирал стирался стирать стираю стирка стискивал стиснув стиснул стиснула стих стиха стихает стихами стихах стихе стихи стихии стихий стихийных стихию стихия стихли стихов стихотворений стихотворения стихотворца стлал стлалась стлив сто стога стогами стоил стоила стоили стоило стоим стоит стоите стоить стоическою стоишь стой стойке стойкой стойло стойте стол стола столами столах столб столба столбик столбняк столбняка столбов столбовой столе столетии столетий столетний столечко столик столика столиках столике столики столиков столиком столица столице столицу столицы столичный столкновений столкнувшись столкнулась столкнулись столкнулся столковались столов столовая столовой столовую столовый столовых столом столпами столпившихся столпилась столпился столпы столу столы столыпин столь стольких столько стольку столяра стон стона стонал стонала стонам стонами стонет стонов стоном стонут стоны стопами стоптанные стоптанных стопы сторговались сторож сторожа сторожевой сторожей сторожило сторожит сторожу сторон сторона сторонам стороне сторонись сторонитесь сторонке сторонкой стороной сторону стороны сторублевого сторублевый стотысячной стотысячную стошнило стою стоя стоявшая стоявшего стоявшей стоявшему стоявший стоявших стоявшую стоял стояла стояли стояло стоят стоять стоячею стоячим стоящего стоящими стравить страдаем страдает страдай страдал страдала страдали страдало страдальнее страдальчески страдальческого страдание страданием страдании страданий страданию страдания страданиях страданье страданьем страданьице страданью страданья страдать страдаю страдающего страдающий страдая страж страже стражей стражем стражу стран страна странах стране странен страниц страница страницам странице страницу страницы странная страннее странник страннику странно странного странное странной странном странному странностей странности странность странною странную странны странные странный странным странными странных страной странствий странствовали странствуя страны страстей страсти страстишек страстию страстная страстно страстного страстное страстной страстном страстною страстную страстные страстный страстным страстными страстных страсть страстью страстям страстями страусового страусовым страх страха страхами страхе страхи страхов страхом страху страшась страшен страшишься страшна страшная страшнее страшней страшно страшного страшное страшной страшном страшною страшную страшны страшные страшный страшным страшных стращай стращал стрекоз стрекозу стрекочет стрел стрела стреле стрелка стрелой стрелочника стрелы стрельба стрельбы стреляет стреляй стреляйте стреляли стрелялся стрелять стремглав стремился стремит стремительно стремительной стремится стремиться стремление стремлении стремлений стремления стремлениями стремя стремят стремятся стриженая стриженых стриндберга стричь строга строгая строги строгие строгий строгим строгими строгих строго строгого строгое строгой строгом строгому строгости строгостию строгость строгостью строгою строгую строевой строевых строем строен строже строитель строить строиться строй стройка стройкой стройку стройная стройно стройной стройные стройный строк строкам строках строки строку строптивый строчить строчка строчки строчку строя строят строящегося струею струи струилась струится струиться струйках струйкой струн струна струнам струне струнно струну струны струсив струсившего струсил струсила струсили струсит струю струя струями струящей струящийся стряслась стряхивала стряхнул стряхнуть сту студенистой студеной студент студента студенте студентиком студентом студенту студенты студенческим студенческих студенческое студенческом студень стужа стуже стужи стужу стук стука стукавшее стукал стукало стуканием стуканьем стукаюсь стукаясь стукнет стукнешься стукнув стукнувшись стукнул стукнула стукнуло стукнулся стукнут стуком стуку стул стула стуле стулом стульев стулья стульям стульями стульях ступает ступай ступайте ступал ступающей ступеней ступени ступень ступенькам ступеньках ступеньке ступеньки ступенькой ступеням ступил ступила ступили ступит ступить ступкой ступку стуча стучавших стучал стучалась стучали стучались стучало стучался стучась стучат стучатся стучать стучащаяся стучи стучись стучит стучится стучишь стучусь стушевался стушевываться стыд стыда стыдил стыдилась стыдился стыдись стыдитесь стыдится стыдиться стыдлив стыдлива стыдливая стыдливо стыдливого стыдливое стыдливой стыдливом стыдливости стыдливость стыдливый стыднее стыдно стыдом стыду стыдя стыдясь стыдящийся стыдящуюся стыжу стыжусь стыл стынет стяг стяга стягивает стягивать стяжал стянул стянута стянутая су суббота субботе субботу субъект субъектами субъектом сугробе сугробы сугубо сугубым суд суда судака сударыня сударь суде судеб судебной судебному судебные судебными судейски судейские судейский суди судил судите судить судишь судия судки судом судопроизводства судопроизводство судорог судорога судорогами судороги судорогой судорожно судорожного судорожной судорожным суды судырь судьба судьбами судьбе судьбинский судьбой судьбою судьбу судьбы судьей судьи судьям судя суеверен суеверия суеверна суеверней суеверным сует суета суете суетилась суетилось суетился суетится суетливо суетливой суетливую суется суету суеты суетясь суешь суешься суждена суждение суждении суждения суждениями сужденная сужденный суждено суждены суживая сужу суздальцев сузились суйся сукна сукно суконной сулил сулила сулит султаны сулят сулящих сумарокова сумасброд сумасбродом сумасшедний сумасшедшая сумасшедшего сумасшедшей сумасшедшие сумасшедший сумасшедшим сумасшедшими сумасшедших сумасшедшую сумасшествие сумасшествии сумасшествию сумасшествия сумасшествовали сумасшествует суматоха суматохе суматохи суматохой сумеет сумеешь сумел сумела сумели сумерках сумерки сумею сумеют сумки сумкой сумление сумлении сумления сумм сумма суммам сумму суммы сумочкой сумрак сумраке сумраком сумрачен сумрачно сумрачной сумрачным сундук сундука сундуке сундуки сундуком сундуку сундучок сунет сунув сунул сунула сунуть суп супа супом супруг супруга супруге супруги супругой супругу супружеского супружеское супружеском супружества супружествах супружество супу сургуч сургуча сургучом сурдиной сурмится суров сурова суровая суровее сурово суровое суровой суровом суровости суровость суровою суровую суровые суровый суровым сурок сусальный суслов суткам сутки суток суточной суточными суточных сутуловатый суть сутяжничеством суфлеры сух сухарей сухари сухариками сухариков сухарь сухаря сухарями сухая сухи сухими сухих сухо сухого сухое сухой сухости сухую сучок сучьях сушеная сушеную сушеными сушили сушилось сушильню сушит сущая сущее существ существа существе существенного существенной существенными существо существовавшего существовали существование существованием существований существования существованья существовать существом существу существуем существует существуете существую существуют существующим существующих существующую сущности сущность сфер сфера сферах сфере сферой сферою сферу сферы сфинкс сфинкса схватив схватившись схватил схватила схватилась схватили схватился схватит схватить схватиться схватку схватывает схватывается схватывают схватывая схватят схвачена схвачу схимник схитрила схитрить схлебнув схода сходи сходил сходила сходили сходились сходим сходит сходите сходится сходить сходиться сходишь сходками сходках сходки сходную сходны сходов сходства сходство сходствовал сходством сходы сходя сходясь сходят сходятся схож схожи схожу схоластикой схоронены схоронив схоронил схоронила схоронить сцен сцена сцене сценой сцену сцены сцепившиеся сцепились сцеплений счастие счастием счастии счастию счастия счастлив счастлива счастливая счастливее счастливей счастливейшим счастливец счастливо счастливого счастливое счастливой счастливому счастливою счастливцев счастливы счастливые счастливый счастливым счастливыми счастливых счастье счастьем счастьи счастью счастья счел счесть счет счета счетам счетах счете счетов счетом счету счеты считаем считаемся считает считаете считается считаешь считай считайте считал считала считали считались считался считать считаться считаю считают считая сшедший сшиб сшил сшит сшитом съедает съедаешь съедаю съедем съедет съеду съежившийся съежившись съезди съездил съездили съездит съездить съездов съезжаете съезжай съезжайте съезжал съезжаль съезжать съезжаться съезжаю съезжу съел съели съем съест съестно съестного съестным съестных съесть съехал съехали съехать съешь сыворотка сыграет сыграй сыграл сыграла сыграли сыграна сыграть сыграю сызмалетства сызнова сын сына сынам сыне сынков сынов сыновних сыновняя сыном сыну сыны сыпал сыпала сыпались сыплется сыплющихся сыпной сыр сырая сыро сырого сырое сырой сыром сырости сырость сыростью сыру сырую сыры сырые сыскал сыскали сыскать сыскное сыт сытое сытой сытости сыты сытые сытый сычиха сычуга сыщем сыщет сыщешь сыщи сыщика сыщики сыщности сыщу сыщут сь сю сюда сюжет сюрприз сюрпризик сюрпризы сюртук сюртука сюртуке сюртучишка сюртучок сядем сядет сяду сядут сядь сядьте сякая сякой сям т та табак табакерки табакеркой табакерку табаком табаку табачного табачной табачок таблиц таблица таблицы табуны таварищество таверны тает таз таи таившейся таил таила таилась таился таим таин таинствам таинственно таинственного таинственное таинственному таинственность таинственны таинственные таинственный таинственным таинственных таинство таится тайком тайн тайна тайнами тайная тайне тайниках тайно тайного тайное тайной тайною тайну тайную тайны тайные тайный тайным тайными тайных тайте так така такая также таки такие таким такими таких таков такова таковая таково таковое таковскою таковую таковы таковые такого такое такой таком такому такою таксомотор такт такта тактика тактом таку такую талант таланта талантами талантлив талантлива талантливее талантливо талантливый талантливым талантливых талантов талантом таланты талеров талией талии талию талия талый талым таль тальки тальма тальмочку тальму талью там тама тамаре тамошнего тамошней тамошние танец танца танцам танцами танце танцев танцевала танцевали танцевальный танцевать танцует танцуют танцуя танцы тара таракана тараканами тараканах тараканов тараканы тарантас тарантьев тарантьева тарантьеву тарантьевым тарантьевыми тарара тарас тараса тараска тараску тараща таращатся таращил тарелка тарелками тарелках тарелке тарелки тарелкой тарелку тарелок таскаемый таскает таскал таскаль таскаться таскают тасуйте татар татарвою татарин татарская татарские татарским татарской тать татьяна тафты таща тащи тащил тащила тащили тащило тащит тащить тащиться тащу таюсь тают тающий тая таял таяли таянье таясь таятся твари тварь тварью тверд твердая твердеют тверди твердил твердила твердили твердим твердит твердите твердить твердишь твердо твердого твердое твердости твердостию твердостью твердым твердых твердь твердя твердят тверезый тверже тверь твое твоего твоей твоем твоему твоею твои твоим твоими твоих твой творенья творец творил творилось творим творимый творится творить творогом творца творческие творческий творческого творческою творческую творчества творят твою твоя те театр театра театре тебе тебя текла текли текло текст текста текут текущей текущею тел тела теле телег телега телеге телеги телегин телегину телеграмма телеграмму телеграммы телеграф телеграфа телеграфе телеграфировали телеграфировать телеграфирует телеграфисткой телеграфные телеграфный телеграфных телегу тележка тележке тележку тележный телемака телесная телефон телефона тело телом телу тельца телята телятина телятиной телятину телятины тем тема темами темени темень теми темляком темна темная темневших темневшую темнее темней темнело темненькое темнеют темнеющего темнеющем темно темновласый темного темное темной темноликий темном темнота темноте темноту темноты темною темную темны темные темный темным темными темных температура тему темы темя теней тени тенист тенистой тенистом тенистый тенистых тенор тень тенью теодорих теоретиком теоретически теорией теории теорий теорийка теорию теория теориям теориях тепел теперешнего теперешнее теперешней теперешнем теперешние теперешний теперешним теперешних теперешнюю теперешняя теперича теперь тепла теплая тепле теплее теплилась теплился теплится теплице тепло теплое теплой теплом теплому теплота теплоту теплоты теплые теплый теплым теплых теплятся тер теребила теребьева теребьеву теребя терем тереть терзает терзается терзаешь терзал терзала терзалась терзали терзало терзался терзание терзаний терзать терзаю терзая терминах терпевшая терпел терпелив терпеливая терпеливо терпеливость терпение терпением терпении терпения терпеть терпи терпит терпите терпится терплю терпят терраса террасах террасе террасой террасу террасы теряет теряете теряется теряешь терял теряла терялась теряли теряло терялся терять теряю теряюсь теряют теряющая теряя тесемками тесемочкой тесен теснее теснившихся теснила теснились теснилось теснит теснитесь тесно тесной тесноте тесную тесные тесный тесных теснюсь теснят теснящим тесовый теста тесто тестова тесьму тесьмы тете тетеньки тетивою тетиву тетка тетке тетки теткой тетку теток тетрадей тетради тетрадка тетрадке тетрадки тетрадкой тетрадку тетрадь тетрадях тетушка тетушки тетя тех техники технолог технологию течение течению течения теченье течет тешат тешатся тешили тешит тешится тешу теща тещи тиатр тигров тикая тил тимофеевна тип типографий типографию типы тираду тирады тиран тиранила тироль тирольской тисках тиски тит титаны титулярный титулярным тиф тифа тих тиха тихая тихие тихий тихим тихими тихих тихо тихого тихое тихой тихом тихоновну тихоньким тихонько тихою тихую тициана тише тиши тишина тишине тишиной тишиною тишину тишины тишь тка ткал ткани ткань тканью ткет ткнул тко тлеет тлеющая то тобой тобою товар товаре товарищ товарища товарищам товарищами товарище товарищей товарищем товарищества товарищи товарищу товарный товаров товаром товару товары тогда тогдашнего тогдашнее тогдашнему тогдашний тогдашнюю того тоже той токаря токмо толк толкает толкал толкала толкало толкаль толками толкать толкаться толках толкаются толкая толки толкла толкли толкнув толкнул толкнулся толкнуть толков толковал толковали толкования толковать толково толковый толковым толкотня толку толкует толкуете толкусь толкучего толкучем толкучий толкую толкуют толочь толпа толпами толпе толпившимися толпившихся толпилась толпились толпилось толпился толпится толпой толпою толпу толпы толст толста толстая толстеет толстенькая толстенькие толстоваты толстого толстогубый толстой толстоты толстою толстую толстые толстый толстым толстыми толстяков толчеи толченье толчет толчки толчков толчком толчок толщиной только том тома томах томен томи томик томила томилась томили томило томился томим томите томительною томительные томитесь томится томить томиться томишь томление томлений томления томленье томленьем томленьи томленья томлюсь томна томно томного томной томнооких томную томные томный томным томными томов тому томясь томят томятся томящихся тон тона тоне тоненькая тоненькие тоненький тоненьким тоненькими тоненькой тоненькою тоненькую тонет тонкая тонкие тонкий тонким тонкими тонких тонко тонкого тонкое тонкой тонкости тонкость тонкостью тонкую тонная тонок тоном тону тонула тонуло тонут тончайшая тончайшее тончайшей тончайший тоны тоню топ топать топил топили топит топить топливо топнув топнул топнули тополевой тополевую тополей тополем тополи тополями топор топора топорами топоре топорной топоров топором топорщиться топот топочет топтал топтать топчешь топчу топчут топью топят торгах торги торговал торговала торговали торговать торговаться торговка торговку торговле торговли торгового торговое торговой торговца торговцы торговый торгует торгуй торгуют торжеств торжества торжестве торжественно торжественного торжественное торжественной торжественном торжественностию торжественную торжественные торжественный торжественным торжество торжествовал торжествовала торжеством торжествует торжествую торжествуют торжествующе торжествующею торжествуя тороват торопил торопила торопилась торопились торопился торопит торопитесь торопится торопить торопиться торопливо торопливого торопливой торопливость торопливостью торопливою торопливый торопливыми тороплюсь торопясь торопят торопятся торт тортом торфом торчавшей торчавший торчавшими торчал торчала торчат торчит торчком тоска тосканские тосканских тоске тоски тоскливо тоскливой тоскливом тоскливый тоскливым тосковал тосковала тосковать тоской тоскою тоску тоскуем тоскует тоскуешь тоскую тоскуют тоскующего тоскующий тоскуя тот тотчас точа точек точил точит точить точка точками точке точки точкой точку точнее точнейшим точнехонько точно точного точное точном точности точностию точностью точны точные точных точь тошней тошно тошноты тощая тощий тощими тощую тою тра трав трава травами траве травила травке травку травля травой траву травы трагедии трагедий трагедия трагик трагиков трагически трагические трагического трагическое трагической трактат трактир трактира трактирам трактире трактиришке трактиришко трактирное трактирной трактирный трактиров трактиром трактиры трам трамваи транспарант трата трате тратил тратилось тратит тратите тратится тратить траты тратят тратятся траур траура трауре траурная траурного траурной траурною траурные траурный траурными трахтир трачу требовавшие требовавших требовал требовала требовали требование требовании требований требованию требования требовать требу требуемые требует требуется требуешь требуй требуйте требухой требую требуют требующее требующие требуя тревог тревога тревогам тревогах тревоге тревоги тревогой тревогою тревогу тревожа тревожась тревожат тревожил тревожила тревожилась тревожили тревожило тревожился тревожит тревожится тревожить тревожиться тревожишь тревожная тревожнее тревожно тревожного тревожное тревожном тревожные тревожный тревожных тревожусь тревожь тревожься тревожьте треволнения треволненьям трезв трезва трезво трезвой трезвонят трезвый трезвым трезор треклятая треклятые трельяже тремя тремястами тренькала трепала трепаном трепанье трепать трепет трепета трепетавшая трепетавшем трепетавших трепетал трепетала трепетали трепетания трепетанье трепете трепетно трепетного трепетным трепетом трепету трепеща трепещет трепещут трепещущего трепещущей трепещущие трепещущий трепещущим трепещущих трепещущую треплев треплева треплеву треплевым треплет треплют треплются трепля треск трескает треске треском трескотней трескотня трескучей трет третий третного треть третье третьего третьегодичные третьегодичных третьегодни третьей третьем третьему третьи третьим третьих третью третья трефовую трех трехлетней трехлетний трехлетняя трехрублевому трехрублевую трехрыбное трехсот трехстах трехударный трехэтажный треща трещавших трещал трещала трещали трещат трещать трещина трещине трещину трещит три трибуне трибуной тригорин тригорина тригорину тригориным тридцати тридцатипятилетний тридцатирублевую тридцать тридцатью трижды тризне тринадцати тринадцатилетнего тринадцатого тринадцать триста триумф трихины трогает трогается трогаешь трогай трогайте трогал трогала трогали трогались трогательнее трогательно трогательные трогательным трогать трогаться трогаю трогаясь трое троек троекратно троекратного троекратное троекратным трожь трои троими троих троица троицын тройка тройками тройке тройная тронет тронется тронешь тронул тронулся тронут тронуты тронутый тронуть тронуться тронь троньте тропами тропе тропинка тропинке тропинки тропинку тропой тропою тропу тросточку трость тростью троттуар троттуары тротуар тротуара тротуарам тротуарами тротуаре тротуаром тротуару трофимов трофимова трофимович трофимову трофимовым трофимыч трофимыча троюродные тррреклятые труб труба трубах трубе трубит трубка трубками трубки трубкой трубкою трубку трубные трубный трубочист трубочка трубу трубы труд труда трудам трудами трудах труде труди трудилась трудимся трудись трудится трудиться трудная труднее трудно трудного трудное трудной трудности трудность трудную трудные трудный трудным трудных трудов трудовая трудовое трудовой трудовую трудолюбивой трудолюбивые трудолюбивый трудолюбие трудолюбию трудом труду труды трудясь трудятся труженическое тружусь труп трупный трупом трупы трус труса трусил трусить трусишки труслив труслива трусливо трусливое трусоват трусости трын трюфелями тряпка тряпки тряпкой тряпку тряпок тряпье тряпьем трясется трясла тряслась тряслись трясся трясутся тряся трясясь тряхнул тс тсс тся ту туалет туалета туалетную туалеты туго тугой туда тужит тужить туз тузенбах тузенбаха тузенбахе тузенбаху тузил тузили тузить туловище туловищем тулон тулона тулуп тулупе тульские туман тумана туманам туманами туманах тумане туманился туманить туманное туманной туманном туманную туманные туманный туманным туманными туманных туманов туманом туманы туманят тумбе тумбу тунеядства тупая тупеет тупею тупик тупицы тупо тупой тупом тупость тупоумия тупоумного тупую тупым тур тура тургенев тургенева тургеневская турецки турецкий турецким турецкой туриста турку тускло тусклое тусклой тусклые тусклый тусклым тусклыми тускнело тут туфлей туфли туфлю туфлях тухлая тухлой туч туча тучами тучах туче тучей тучи тучки тучкой тучкою тучных тучу туш тушат тушит тушу тш тщательнее тщательно тщеславен тщеславие тщеславии тщеславия тщеславные тщета тщетно тщетным ты тый тыкать тыква тысяч тысяча тысячами тысячах тысяче тысячей тысячелетием тысячелетий тысячелетней тысячи тысячная тысячу тысячью тысящи тычет тычут тьма тьме тьмой тьму тьмы тьфу тюков тюлевом тюменев тюменева тюремном тюремные тюремный тюрьма тюрьмами тюрьме тюрьмой тюрьму тюрьмы тютчева тюфяки тягла тягой тягостию тягостно тягостное тягостную тягостны тягость тяготеют тяготили тяготились тяготился тяготит тяготить тяготишься тяготящее тягу тяжбам тяжбу тяжебным тяжел тяжела тяжелая тяжеле тяжелее тяжеленько тяжелит тяжело тяжеловеса тяжелого тяжелое тяжелой тяжелом тяжелую тяжелы тяжелые тяжелый тяжелым тяжелыми тяжелых тяжести тяжесть тяжкие тяжкий тяжким тяжких тяжко тяжкое тянет тянется тянул тянулась тянулись тянуло тянулось тянутся тянуть тяп у уа уайльда убавив убавить убаюканные убаюкивала убаюкивают убегает убегал убегающей убегающим убегая убегу убедилась убедился убедим убедительнейшая убедительно убедительностью убедительную убедится убедить убедиться убедишься убежал убежала убежали убежать убеждаюсь убеждая убежден убеждена убеждение убеждением убеждении убеждений убеждению убеждения убеждениями убеждениях убежденнее убежденно убежденный убежденным убеждены убежденьях убежит убежите убей убейте убережешь уберет уберечь уберешь уберите уберу убив убиваем убивает убиваете убивается убивал убивалась убивали убиваль убивать убивают убиваясь убивец убивство убийств убийства убийстве убийство убийству убийца убийцам убийцей убийцу убийцы убил убила убили убирает убирай убирайся убирайтесь убирал убирала убирании убирать убираю убирают убирая убит убита убитая убито убитого убитой убитою убитую убитые убитый убитым убитыми убить убогая убогий убор убора уборе уборка уборке уборки убоюсь убоялись убрал убрала убралась убрали убрались убран убранная убранной убранною убранные убранных убранстве убранство убрать убыло убытка убыткам убытками убытку убыток убьем убьет убьете убьешь убьешься убью уважавший уважаем уважаемой уважаемому уважаемым уважает уважаете уважаешь уважай уважал уважала уважали уважать уважаю уважают уважающие уважение уважением уважении уважения уварился уведи уведите уведомил уведомили уведомить уведомлен уведомление уведомлял уведомляла увез увезен увезите увезла увезли увезти увезу увезшей увековечить увел увела увеличивается увеличивалось увеличиваются увеличилась увеличились увеличился увеличить увенчали увенчались уверен уверена уверение уверений увереннее уверенно уверенности уверенность уверенностью уверенные уверенный уверенным уверены уверенья уверил уверился уверит уверительно уверить уверишь уверовала уверовали уверовать увертел увертка увертки увертывайся уверует уверуют уверяет уверяйте уверял уверяла уверяли уверять уверяю уверяют уверяя увеселения увеселительного увеселительные увеселительный увеселяли увесистых увести увесть увечному увещевая увидав увидавши увидал увидала увидали увидать увидев увидел увидела увидели увидеть увидим увидимся увидит увидите увидишь увидишься увидя увидят увижу увижусь увитое увлек увлекаем увлекал увлекалась увлекался увлекательный увлекаться увлекают увлекающаяся увлекаясь увлеклась увлекли увлекло увлекся увлекусь увлечен увлечение увлечением увлечений увлечению увлечения увлеченный увлечены увлечь увлечься уводила уводит увози уволили уволь увольняли увольте увражами увы увядать увядающей увядая увядшей увядшие увяз увязаться увязнув увязывает увял увяла угадав угадает угадаете угадай угадал угадала угадали угаданная угадано угаданы угадать угадывает угадываете угадываешь угадывал угадывало угадывать угадываю угадывая угар угарно угас угасающего угасла угасли угасло угаснет угаснувшем угасшим угла углам углах угле углов угловатое угловатую угловатым угловатыми угловом углом углу углубилась углубился углубится углубиться углублен углубление углублении углы угль угнетает угнетал угнетена уговаривай уговаривал уговаривала уговаривая уговор уговора уговорив уговорил уговорите уговорить уговориться уговором угодить угодишь угодливости угодно угождений угождения угол уголка уголке уголки уголков уголку уголовное уголовных уголок уголь угольев угольком угольной уголья угорелая угорелый угорят угостить угощает угощал угощать угощение угощением угощения угощу угрожайте угрожал угрожающее угрожающею угрожающий угрожающую угроза угрозами угрозой угрозою угрозу угрозы угрызать угрызение угрызения угрюм угрюма угрюмо угрюмого угрюмое угрюмой угрюмому угрюмость угрюмство угрюмые угрюмый угрюмым угрюмых угрях удавалось удавался удавившеюся удавившиеся удавился удавиться удается удалась удалении удали удалились удалилось удалился удалиться удалось удался удалюсь удалявшейся удалявшиеся удаляется удаляться удар удара ударам ударами ударе ударение ударившего ударил ударила ударилась ударило ударит ударится ударить удариться ударов ударом удару удары ударь ударю ударяя ударяясь удастся удаче удачнее удачно удвоенное удвоенною удвоенным удвоивается удвоил удвоилась удвоилось удвоишь удел удержал удержала удержалась удержали удержались удержался удержанность удержать удержаться удерживает удерживается удерживайте удерживал удерживала удерживали удерживать удерживая удерживаясь удержись удержит удержите удержу удивившею удивившийся удивившись удивил удивилась удивили удивились удивило удивился удивительная удивительно удивительного удивительное удивительном удивительною удивительные удивительный удивительным удивить удивлена удивление удивлением удивлении удивлению удивления удивленная удивленно удивленного удивленной удивленный удивленным удивленными удивленья удивлю удивляетесь удивляется удивляешься удивлял удивлялась удивлялись удивлялся удивлять удивляться удивляюсь удивляясь удивясь удивятся удил удит удить удобнее удобно удобного удобном удобную удобопонятную удобств удобство удовлетворен удовлетворена удовлетворение удовлетворения удовлетворенного удовлетворенное удовлетворенной удовлетворенный удовлетворенных удовлетворил удовлетворилась удовлетворительно удовлетворительного удовлетворительном удовлетворительною удовлетворительные удовлетворительный удовлетворительным удовлетворить удовлетворят удовлетворяясь удовольствие удовольствием удовольствии удовольствия удовольствиях удовольствовался удостаивают удостоверения удостоверившись удостовериться удостоивая удостоила удостоили удостоился удочками удочки удочку удрал удрученному удрученный удушливая удушливого удушливый удушьем удушья удят уедем уедет уедете уедешь уединение уединении уединения уединеннее уединенно уединенное уединенной уединенном уединился уединиться уеду уедут уезд уезда уезде уездили уездном уездную уездные уездный уездным уезду уезжавшей уезжаем уезжает уезжаете уезжаешь уезжай уезжайте уезжал уезжала уезжали уезжать уезжаю уезжают уезжая уехал уехала уехали уехать уж ужаленного ужалило ужаль ужас ужаса ужасались ужасался ужасам ужасами ужасают ужасаясь ужасе ужасен ужасная ужаснее ужасней ужаснейшая ужаснейшее ужаснейший ужаснейшим ужасно ужасного ужасное ужасной ужасном ужаснувшею ужаснулась ужаснулись ужасную ужасный ужасным ужасными ужасных ужасов ужасом ужасу ужасы уже ужели ужель уживается уживался ужин ужина ужинаем ужинает ужинаете ужинайте ужинал ужинали ужинать ужиная ужином ужину ужо уз узаконенной уздцы узел узелком узелок узенькая узенькой узенькую узкая узкие узкий узкое узкой узкому узкую узла узлами узле узлом узлы узнав узнавал узнавала узнавали узнавать узнавая узнавшим узнаем узнает узнаете узнаешь узнай узнайте узнал узнала узнали узнать узнаю узнают узор узора узоре узорном узорные узорный узорными узоров узором узоры узость узрит узрят узы уйдем уйдемте уйдет уйдете уйдешь уйди уйдите уйду уйдут уймите уймитесь уйти укажет укажете укажите укажу указав указал указала указали указание указанием указаний указанию указания указанного указательном указательным указать указки указывает указывал указывала указывали указывать указывают указывая укачивая уклада укладка укладке укладки укладкой укладкою укладку укладок укладывает укладывала укладывался укладывать укладываться уклонился уклонитесь уклончивость уклонюсь уклоняете уклоняется уклонялась уклонялись уклоняюсь уклоняясь укокошил укор укора укоренился укоризненно укоризной укоризны укоров укором укоряемый укорял укоряла укоряли укорять укоряю укоряют украденный украденным украдкой украл украли украсишь украсть укрась украшают украшение украшением украшенной укрепился укрепит укрепится укрепляет укреплялась укрепляться укроете укрощая укрываться укрыла укрыться уксуса уксусом уксусу укусил укусила уладил уладилось уладится уладить уладь уланом улеглись улеглось улегся улегшись улетает улетайте улетал улетающему улетевший улетело улететь улетим улетит улетят улизнет улизнул улизнуть улик улика улике улики уликой улиссов улиц улица улицам улицами улицах улице улицу улицы уличать уличку уличная уличное уличной уличном уличному уличною уличную уличные уличный уличным уловив уловила уловимого уловит уловить уловка уловками уловляли уложено уложив уложила уложить уложу улучить улучшение улучшений улучшения улучшились улыбавшееся улыбаетесь улыбается улыбалась улыбались улыбалось улыбался улыбаться улыбаюсь улыбаются улыбающегося улыбающийся улыбающимся улыбающихся улыбаясь улыбка улыбкам улыбками улыбке улыбки улыбкой улыбкою улыбку улыбнетесь улыбнется улыбнешься улыбнувшись улыбнулась улыбнулись улыбнулся улыбнусь улыбнуться улыбок улыбочкой ум ума умаливала умаливания умалчиваю умалялось умаялись умбрии умбрских уме умевший умеем умеет умеете умеешь умей умел умела умели умельчились умен умению уменье уменьшение уменьшения уменьшить уменья умер умеренная умеренности умеренностью умеренную умеренны умеренный умеренным умереть умерла умерли умерло умертвила умертвить умершая умершего умершее умершей умершем умерший умершим умершую уметь умещается умею умеют умея умиление умилением умилении умиления умилялась умирает умирал умирала умирали умирать умираю умирают умирающего умирающий умирающим умирающих умирая умирит умна умная умнее умней умнейший умненько умненькой умник умники умников умниц умница умницей умничает умничанья умничать умно умного умной умны умные умный умным умными умных умов умозрений умозрительная умозрительным умозрительных умолить умолк умолка умолкает умолкала умолкать умолкающая умолкая умолкло умолкшими умолот умолоте умолочено умолчать умолчу умолю умолявшего умоляет умолял умоляла умолять умоляю умоляюще умоляющим умоляющими умоляя умом умопомешательства умопомешательстве уморил уморить умоюсь умрем умрет умрете умрешь умри умрите умру умрут умственно умственного умственные умственными умственных уму умчались умчало умчит умы умывает умывайтесь умывался умывальном умываться умылась умылся умысел умысла умысле умыслом умыться умышленно умышленной умышленному унес унесенная унесено унесет унесите унесли унеслось унести унесут университет университета университете университетские университетской университетскому университету университеты унижается унижал унижающая унижен унижение унижения униженная униженный униженным унижены униженье униженьи унизился унизительно унизительного унизительны унизывались унимал унимала унимали унимались унимать унимают уничижения уничиженный уничтожают уничтожаясь уничтожен уничтоженный уничтожив уничтожил уничтожило уничтожим уничтожит уничтожится уничтожить уничтожиться уносил уносила уносился уносит уноситься уносишь уносящая унтер уныл унылая уныло унылого унылом унылый уныние унынием унынии уныния уняли унять упавшего упавший упавшим упадал упадала упадет упадете упади упадка упадку упадок упаду упадут упал упала упали упало упасла упасть упер уперлась уперлася уперся упечь упивался упиваясь упираете упирал упиралось упирался упираться упирая упираясь уписала уписалась упиться уплате уплатил уплатили уплатить уплатою уплату уплаты уплывешь уплыли упование уповать уподоблению упоена упоением упоительно упокоил упокой уполномочил уполномочила упоминает упоминается упоминайте упоминал упоминала упоминать упоминаю упомнишь упомню упомянет упомяните упомяну упомянул упомянула упомянули упомянута упомянуто упомянуть упор упорная упорно упорное упорной упорному упорную упорный упорным упорство упорством употебите употребил употребит употребить употребление употреблении употребления употреблю употребляется употреблял употреблялись употреблять употребляя управе управимся управитель управится управиться управление управлении управления управлюсь управляет управлял управлять управляющего управляющему управляющий управляющим управляющих управляя управу управы упразднена упрашивал упрашивала упрашивали упрашивания упрашиванья упрашивать упрашивая упрек упрека упрекал упрекала упрекали упрекать упрекаю упреке упреки упрекнешь упрекнул упрекнули упрекнуть упреков упреком упрет упрется упросил упросит упросить упрочились упрочить упругих упругой упругость упрыгаетесь упрямец упрямо упрямой упрямстве упрямство упрямству упрямый упрямься упрятал упускай упускал упускать упускаю упущение упущений упущения упущу упыри упьется ура ураган уразумела уразумения урезонить урной урод уродился уродится уродливое уродливы уродуется урожаем урожай урожденная урок уроками уроки уроков уроком уроненную уроненным уронены урони уронив уронил уронила уронили уронит уроню урочные урывками урывочками ус усадеб усадив усадил усадили усадит усадьба усадьбе усадьбу усадьбы усаживает усаживал усаживалась усаживался усаживать усаживаются усаживая усаживаясь усами усатые усахарил усач усача усачу усвоил усвоить уселась уселись уселся усердием усердию усердия усердная усердно усердные усердным усидел усидела усидеть усидит усидишь усиками усиления усиленно усиленное усиленной усиленному усиленным усиливается усиливалась усиливался усиливать усиливаясь усилившейся усилие усилием усилий усилиться усилия усилиям усилиях усильем усилья ускользает ускользала ускользая ускользнет ускользнул ускользнуть ускоренному ускоряла ускоряя услада усладой услады услаждает услаждать услал уследить условием условии условились условиться условию условиях условно условный услуг услуга услуги услугу услужливая услужливого услыхав услыхал услыхала услыхали услыхать услыша услышав услышал услышали услышанный услышат услышать услышая услышим услышит услышите услышишь услышу усмехается усмехаясь усмехнется усмехнувшись усмехнулась усмехнулись усмехнулся усмешка усмешке усмешки усмешкой усмотреть усмотришь уснете уснешь усните усну уснул уснула уснут уснуть усов усовещивал усомниться усопшая усопшем усопших успев успеваем успевает успевал успевая успевший успевшую успеем успеет успеете успеешь успел успела успели успение успеньев успеть успех успеха успехами успехе успехи успехов успехом успеху успею успеют успокаивал успокаивался успокоен успокоения успокоенная успокоено успокоивал успокоивала успокоивали успокоивался успокоивать успокоил успокоила успокоилась успокоили успокоились успокоилось успокоился успокоим успокоит успокоительно успокоительное успокоительном успокоительные успокоительным успокоительных успокоитесь успокоится успокоить успокоиться успокой успокойся успокойтесь успокою успокоюсь успокоятся уст уста уставай уставала уставали уставало уставать уставая уставившись уставил уставила уставилась уставились уставился уставит уставится уставленных уставом уставу уставши уставший уставшим уставясь устал устала усталая устали устало усталого усталой усталом усталости усталость усталостью усталую усталые усталый усталым усталых устанете устанешь установил установила установилась установились установителей установители установится установить установлен установленное установлено устану устанут устаревших устареете устарелым устах устающими устилал устлала устланная устоит устоят устоять устраивает устраивается устраивать устраиваются устранит устранить устраняйте устранять устраняя устраняясь устремив устремил устремила устремились устремлен устремлена устремленной устремленные устремленный устремленным устремлены устремляется устремлялись устремляющий устриц устроен устроена устроенная устроенный устроенных устроено устроены устроил устроилась устроили устроилось устроим устроит устроится устроить устроиться устройства устройстве устройство устройством устройству устрою устроятся уступает уступал уступали уступаю уступают уступая уступил уступила уступило уступить уступом уступчива устыдясь усумнится усчитать усы усыпанной усыпаны усыпите усыпить усыпленные усыплять усядется усядусь утайки утаскивая утащил утащить утвердилась утвердилось утвердительно утверждал утверждать утверждаю утверждают утверждая утвержден утекло утер утерев утерпел утерпела утерпеть утечет утешает утешал утешала утешался утешать утешаюсь утешают утешение утешений утешения утешенная утешенный утешенным утешенье утешенья утешились утешился утешительнее утешительное утешится утешить утешишь утешься утирает утирал утирала утирать утирая утихал утихала утихать утихла утихли утихло утихнет утки уткнув утлые утоли утолить утомилась утомили утомился утомит утомится утомлен утомлена утомление утомлении утомления утомленная утомленно утомленного утомленное утомленные утомленный утомленным утомлено утомлены утомлюсь утомляет утомлялись утомляло утонувшая утонувший утонул утонула утонут утонуть утонченности утонченность утонченный утопает утопала утопало утопающей утопающий утопая утопил утопилась утопится утопить утопиться утопия утопленница утопленницей утопленницу уторопленно уторопленною утр утра утрам утрат утрата утратив утративший утратил утратила утраты утраченного утраченное утреет утреннего утренней утреннем утренние утренний утренник утренним утренними утренних утречком утри утро утром утру утруждайте утружденным утучнялись утыканными уф ух уха ухаживавшего ухаживает ухаживал ухаживали ухаживанье ухватил ухватилась ухватит ухватится ухватить ухватиться ухватках ухе ухи ухитрился ухитрись ухитряется ухитрялся ухищрений ухлопал ухлопали ухлопаны ухмыляется ухмыляясь ухну ухо уход уходе уходи уходил уходила уходили уходило уходим уходит уходите уходить уходишь уходом уходя уходят уходящего уходящий ухожу ухом уху уцелевшей уцелевшие уцелел уцелела уцелели уцепились уцепился уцепиться уцепясь уча участвовала участвовали участвовать участвует участи участие участием участии участий участия участки участник участниках участница участок участь участья учат учатся учащенными учащенных учебное учебные ученая учение учений ученик учеников ученику ученические ученого ученой ученом учености ученую ученые ученый ученым ученых ученье ученью ученья учетверю учи учившей учившийся учил учила училась учили учились учился учинить учись учит учителей учителем учитель учительницу учительницы учителя учителях учится учить учиться учишь учишься учнет учреждают учтиво учтивости учу учусь ушам ушами ушата ушах ушей ушел уши ушиб ушиба ушибется ушибла ушиблена ушибленное ушибли ушки ушла ушли ушло ущельям ущерб ущербной ущипнет ущипнула ую уют уюта уютно уютный уюты уязви уязвило уязвит уязвлена уязвленной уязвлять уяснил уяснили уяснить ф фабрик фабрикам фабриках фабрики фабрику фабричные фабричными фабричных фаддеевна фаддеевны фазисы факел факелом факт факта фактам фактами фактах факте фактически фактические фактическую фактов фактом факты факультет факультета факультете фаланстера фаланстере фаланстеры фалду фальшив фальшива фальшивая фальшиво фальшивое фальшивой фальшивом фальшивую фальшивы фальшивые фальшивый фальшивым фальшивых фамилии фамилию фамилия фамильной фамильные фамильный фамильярен фамильярно фамильярное фамильярности фамильярностию фамильярность фанатизм фанатизмом фант фантазерка фантазией фантазии фантазий фантазию фантазия фантазиям фантазиях фантаст фантастичен фантастические фантастическим фантастического фантастическое фантастическому фантастическую фантастична фантастичностью фанфарон фанфаронишки фанфаронства фанфароню фартуке фартуком фартучек фарфор фарфоровые фарфоровых фаршированные фасона фат фата фатальное фатер фатера фатере фатеришку фатеру фатеры фатой фатом фатою фаты фауну фауст фаустами феврале февраль февраля федераций федор федора федоровна федоровной федоровны федосья федот федотик федотика федотиком федя федяева фейерверк фейерверков фельдшер феномен феноменальным ферапонт ферапонта ферапонтом ферапонту фермах фермера фермуар фермы ферулой фески фестонами фестонов фестоны фет фефелой фи фиалки фигляр фигура фигурам фигуре фигурка фигуркой фигурой фигуру фигуры физиологически физиологию физиология физиономией физиономии физиономий физиономию физиономия физически физические физическими физического физическое физическом физическую филе филин филипп филиппа филиппе филиппики филиппо филиппом филистера философ философии философистика философию философия философом философская философских философствовал философствовать философствуем философствует философствуете философствуй философствуют философы филька фимиам финансах финансист финансовой финик финифтяный финляндии финляндию финляндия финская финский фиолетовая фирс фирса фирсом фирсу фистулу флакон фламандки фланги фланелевое фланера флегматически флейта флер флеровое флером флигеле флигель флигеля флигелям флобера флобером флорентийский флорентийской флоренции флоренцию флоренция флору флот фокус фокусы фома фоминой фомич фомича фомиче фомичем фомичом фомичу фон фонаре фонарей фонарем фонари фонарик фонарном фонарь фонаря фонд фондов фоне фонтан фонтанов фонтаном фону форелей форели форель форм форма формалистики формально формальностей формам формами формах форме форменно форменное форменной форменном формировалась формироваться формируется формой форму формулирован формулярных формы фортепианах фортепиано фортепьян фортепьяно форточек форточки форточку фортуной фортуну фотографии фотографию фотография фотографом фра фраз фраза фразами фразах фразе фразер фразу фразы фрак фрака фраке фраком франс франт франта франтов франтом франты франц францевна францевне францевны франции франциска францию француженке француженки француженок француз француза французах французов французски французские французский французским французского французское французской французском французскому французы фрегате фреской фри фривольное фруктах фрукты фу фуй фунт фунта фунтов фуражечку фуражка фуражках фуражке фуражки фуражкой фуражку фурор фурье футляр футляра футлярах футляре футляров футляры футуризм футуризма футуриста фуфайки фуфайку фыркает фыркать фыркнул фыркнули фье фьезоле х ха хаживал халат халата халатах халате халатом хам хандра хандрил хандрит хандрить хандрой ханжу ханской хаос хаоса хаосе характер характера характере характеристике характеристики характеристику характерная характерно характерность характерные характерный характером характеру характеры харей хари харкает харламов харламова харчевен харчевне харчевни харчевню харчевня харьков харькова харькове харьковские харькоеве хата хвалил хвалилась хвалили хвалился хвалимому хвалит хвалится хвалить хвалу хвалы хвалят хвастался хвастаться хвастают хвастливостей хвастун хватавшемуся хватает хватаете хватается хватал хватало хватать хватаю хватаются хватающий хватая хватаясь хватил хватили хватились хватило хватит хватится хватишься хвать хвораете хворал хвораю хворая хвост хвостатой хвостом хе хений хераскова херес хересу херувима херувимов херувимову хижин хижина хижине химерой химеры хины хирург хитер хитон хитра хитрая хитрее хитрей хитрейшего хитрейшим хитреца хитрецы хитрила хитрить хитришь хитро хитрого хитрое хитрости хитростная хитрость хитростью хитростями хитрые хитрый хитрым хитрят хихикайте хихикал хихикали хихикание хихиканье хихикать хихикая хищник хищница хищной хищный хлад хлада хладен хладно хладнокровен хладнокровие хладнокровием хладнокровнее хладнокровно хладном хладные хладных хлам хламом хлеб хлеба хлебе хлебника хлебнику хлебнувшая хлебные хлебом хлебцем хлестала хлестнул хлещет хлещи хлой хлоп хлопает хлопали хлопаньем хлопанья хлопать хлопнул хлоповых хлопот хлопотавшей хлопотал хлопотала хлопотали хлопотам хлопотами хлопотать хлопотах хлопотливо хлопотливой хлопотливые хлопотун хлопоты хлопочем хлопочет хлопочете хлопочи хлопочу хлопочут хлопьями хлороформом хлынет хлынувшую хлынул хлынула хлынули хлынуло хлынут хлынь хлыст хлыста хлыстиком хлыстом хме хмелел хмелен хмель хмельней хмельной хмелю хмеля хмурилась хмурился хмуритесь хмурится хмуришься хмурые хмурьтесь хмурясь хныкать хнычет хнычешь хнычущих хо ход хода ходатай ходе ходи ходил ходила ходили ходило ходиль ходим ходит ходите ходить ходишь ходу ходьба ходьбы ходя ходят ходячей ходящее хожалые хождение хождением хождении хождения хожу хозе хозяев хозяева хозяевам хозяин хозяина хозяином хозяину хозяйка хозяйке хозяйки хозяйкина хозяйкиной хозяйкиною хозяйкины хозяйкой хозяйку хозяйничает хозяйничала хозяйничать хозяйская хозяйские хозяйский хозяйским хозяйского хозяйское хозяйской хозяйскую хозяйства хозяйстве хозяйственная хозяйственно хозяйственном хозяйственные хозяйственным хозяйственных хозяйство хозяйством хозяйству хозяюшка холера холеру холм холма холмам холмами холмах холмик холмов холмы холод холода холоде холодеет холодело холоден холодеющим холодея холодке холодна холодная холоднее холодно холодного холодное холодной холодном холодности холодность холодною холодную холодный холодным холодными холодных холодок холодом холоду холодя холопов холостой холостою холостых холста холстинные холстинными холстины хомут хомуты хор хора хорватов хоре хорек хоровод хороводе хоровую хором хоронит хоронить хоронишь хорош хороша хорошая хорошего хорошее хорошей хорошем хорошенькая хорошенькие хорошенький хорошенько хорошенького хорошенькое хорошенькой хорошенькою хорошенькую хороши хорошие хороший хорошим хороших хорошо хорошую хору хорунжина хотевшему хотевшую хотел хотела хотели хотело хотелось хотель хотеть хотим хотите хоть хотя хотят хохлами хохлом хохлушка хохот хохота хохотал хохотала хохотали хохотать хохоте хохотом хохоча хохочет хохочешь хохочут хочет хочется хочешь хочу хошь храбрая храбреца храбрится храбро храбрости храм храма храни хранил хранилась хранили хранилось хранит хранится хранить храню храня храп храпенье храпенья храпеть храпит храпя хребту хрен хреном хрену хрипел хрипенье хрипеть хрипит хрипло хриплый хриплым хрипят христа христе христианин христиански христианской христов христовых христом христос христу хром хромая хромой хроники хроническою хрупкие хрупко хрупкого хруст хруста хрустали хрусталь хрустальная хрустального хрустальной хрустальном хрустальные хрустальный хрустальным хрустел хрусти хрустит хрычовки худ худа худая худенькая худенькое худенькой худенькому худо худое художественная художественно художественного художественное художественной художественность художественный художество художник художника художники художников худой худосочный худощав худощавый худую худы худые хуже хулений хулу хуторков хуторок хуторочки цапли царе царевна царевнах царевне царевной царем царили цариц царица царицей царский царских царское царской царств царственный царствие царствии царство царствовал царствовали царством царствует царствуют царь царьграда царьградских царю царя цвел цвела цвели цвело цвет цвета цветам цветами цветах цвете цветенья цветет цветешь цветка цветками цветки цветком цветная цветник цветникам цветники цветной цветном цветным цветных цветов цветок цветочками цветочки цветочков цвету цветут цветущая цветущие цветы цезаря цел целая целебным целей цели целию целковому целковый целковыми целковых целовал целовала целовали целовало целовался целовании целовать целоваться целого целое целой целом целому целомудрен целомудренна целомудренно целомудренность целомудренные целомудрие целомудрием целомудрии целостности целость целою целу целует целуется целуешь целую целуют целуются целующуюся целуя целы целые целый целым целыми целых цель цельная цельного цельный целью целям целями цена цене цени ценил ценим ценит цените ценить ценишь ценными ценою центр центра центров цену цены ценю ценя ценят цепей цепенея цепи цепкими цеплялся цепной цепочка цепочками цепочках цепочке цепочки цепочку цепь цепями церемонией церемонии церемонится церемониться церемонно церемонятся церквах церквей церкви церковной церковные церковь церковью цеховое цивилизованной цикорию цикория цилиндра цилиндре циммермана циммермановская цинизм цинизме циник циников циником циническим циническое цинично циничный цирках циркуляция цирфа цирюльник цитат цитуется цифр цифрах цифру цифры цицикар цоканья цссс цугундер цуниги цусимой цыганка цыганкой цыганская цыганские цыганских цыганского цыганской цып цыпленка цыпленок цыплят цыплята цыплятами цыплятах цыпочка цыпочках цыпочки ча чад чада чадила чадин чадры чаду чае чаем чаи чаишко чай чайка чайки чайкой чайку чайная чайник чайнике чайников чайницы чайного чайное чайной чайном чайным чалма чалме чар чародейную чародею чарой чару чарует чарым час часа часам часами часах часик часика часов часовая часовенка часовни часовню часовня часовой часок часом частенько части частию частного частной частном частному частности частную частные частный частных часто частые частым частыми часть частью частям частях часу часы чахнет чахнут чахнуть чахотка чахотке чахотки чахоткой чахотку чахоточная чахоточное чахоточной чахоточном чахоточному чахоточную чахоточный чаша чашей чашек чашечку чаши чашка чашками чашки чашкой чашку чашу чащ чаще чащу чаю чая чаями че чебаров чебарова чебарову чебаровым чебутыкин чебутыкина чебутыкину чебутыкины чебутыкиным чего чей чекменев челе челках челки челн челноке чело человек человека человекам человеками человеке человеком человеку человечек человеческая человечески человеческие человеческий человеческим человеческих человеческого человеческое человеческой человеческому человеческую человечества человечество человечеству человечка человечности человечность человечьем челом чем чемодан чемодана чемоданов чемоданом чемоданы чему чепец чепраках чепуха чепухою чепуху чепца чепце чепцов чепцом чепцу чепцы чепчик чепчика чепчике чепчиком чер червовый червонно червонного червь червяки чердак чердаке черед чередовалась чередой через черемухи черемша черемши черен черенками череп черепа черепаха черепка черепки черепок черепом черепу чересчур черна черная чернее чернеет чернеется чернейшей чернелись чернелось черненький чернеют черни чернил чернила чернилах чернильница чернильнице чернильницей чернильницу чернильницы черно черноброва черноволосый черноглаза черного черное черноземным черной черном черному черноокая чернорабочий чернота черноте черноту черною черную черные черный черным черными черных чернь черпай черпают черпнуть черство черствый черствым черт черта чертам чертами чертах черте чертежа чертеже чертежи чертей черти чертиков чертил чертила чертит чертить чертовски чертог чертой черточек черточка черточками черточку чертою черту черты чертя чесал чесался чесать чеснока чествует честей честен чести честит честна честная честнее честнейшая честнейшее честнейшие честно честного честное честной честности честность честную честны честные честный честным честными честных честолюбие честчым честь честью чет чета четверг четвергам четвергу четверо четвертаки четвертая четвертей четверти четвертого четвертое четвертой четвертом четвертую четвертый четвертым четвертых четверть четверым четко четкости четыре четырем четыреста четырех четырехугольника четырехэтажного четырехэтажный четырнадцати четырнадцатилетний четырнадцатилетнюю четырнадцать чехартма чехартмы чехла чехлами чехов чеченская чешет чешется чешутся чешую чи чижами чижи чик чиканием чин чина чинах чинила чинит чинить чиннее чинно чинной чинною чинные чинный чинных чинов чиновник чиновника чиновниками чиновники чиновников чиновником чиновнику чиновница чиновницу чиновницы чиновничеством чиновничье чиновничьи чиновного чиновной чиновный чиновных чином чину чины чирикала чирикали чисел числа числе численно число числом числю числюсь чист чиста чистая чистейшего чистейший чистенькие чистенький чистеньким чистил чистит чистить чистишь чистки чисто чистого чистое чистой чистом чистому чистоплотен чистоплотно чистосердечно чистосердечное чистота чистоте чистотой чистоту чистоты чистою чистую чисты чистые чистый чистым чистыми чистых чисть чистюлька чит читаем читает читаете читаешь читай читайте читал читала читалась читали читалось читаль читальни читальню читателем читатель читателю читателя читать читаю читают читая читка читу чиханье чихать чихнул чихнуть чище чищены чищенье чищенья чищу член члена членам членах членом члену члены чмокаться чокается чопорно чопорного чопорный чорт чортом чревовещательница чреду чрез чрезвычайная чрезвычайно чрезвычайного чрезвычайное чрезвычайной чрезвычайном чрезвычайною чрезвычайный чрезвычайным чрезмерно чрезмерный чт чтение чтением чтении чтений чтения чтимый чтит чтить что чтоб чтобы чтой чту чу чубуки чубы чув чувств чувства чувствам чувствами чувствах чувстве чувствительна чувствительно чувствительного чувствительное чувствительной чувствительном чувствительные чувствительный чувствительными чувство чувствовал чувствовала чувствовали чувствовать чувством чувству чувствуем чувствует чувствуете чувствуется чувствуешь чувствую чувствуют чувствующий чувствуя чугун чугуна чугуне чугунке чугунное чугунной чугунный чугунными чуда чудак чудака чудаками чудаки чудаком чудачество чудачка чуде чуден чудес чудеса чудесен чудесная чудеснее чудеснейший чудесно чудесного чудесное чудесные чудесный чудесным чудесных чудится чудище чудищу чудная чудно чудной чудном чудные чудный чудным чудо чудовищ чудовища чудовищам чудовищами чудовище чудовищная чудовищное чудовищной чудовищности чудовищные чудовищный чудом чуду чудь чует чуешь чужая чужд чужда чуждается чуждался чуждо чуждом чужды чуждые чуждый чужие чужим чужими чужих чужого чужое чужой чужом чужому чужую чулана чулане чуланы чулка чулкам чулках чулки чулков чулок чулочки чулочную чуме чумичка чумички чурбан чутки чуткие чуткий чуткими чутко чуткость чутошном чуть чухонец чухонка чухонки чухонца чухонцы чучелами чучело чушь чуют чуя чуять чье чьего чьей чьи чьим чьими чьих чья шабаш шаг шага шагает шагал шагала шагали шагами шагания шагах шагают шагающая шагая шаге шаги шагни шагнул шагнула шагнуть шагов шагом шагу шажищами шайка шайку шакалы шали шалил шалит шалить шаловлив шаловливость шаловливый шалости шалун шалунья шалый шаль шалью шаля шалят шаман шампанским шампанского шампанское шампанском шамраев шамраева шамраеву шамшевки шансы шапка шапке шапки шапку шапочка шапочки шар шара шарах шаре шарил шарить шаркнула шаркнуло шарлатанства шарлотта шарлотте шарлотту шарманка шарманки шарманкой шарманку шарманщик шарманщика шарманщики шарманщику шармера шаро шаров шароварах шаром шарфик шаршавого шары шаря шатается шатаешься шатайся шаталось шатался шатанья шататься шатающимся шатающихся шатаясь шатер шатка шаткими шафером шахматово шахтах швам швах швейцар швейцарии швейцарию швейцарского швейцарском швейцары швыряет швыряла швыряя ше шевели шевелил шевелилась шевелилось шевелился шевелись шевелит шевелить шевелиться шевелишься шевельнет шевельнувшись шевельнулась шевельнулось шевельнулся шевельнуться шевеля шевелясь шее шеей шеи шейного шекспир шекспира шел шелест шелестит шелесты шелестят шелк шелка шелками шелковистые шелковое шелковом шелковою шелковые шелковый шелковым шелковых шелком шеловек шеломами шелопаева шелохнутся шельма шельмой шельму шепнет шепнешь шепнул шепнула шепот шепота шепотливые шепотом шепоту шептал шептала шепталась шептались шептаний шептать шепча шепчем шепчет шепчут шепчутся шепчущие шерстку шерсть шерстяное шерстяной шерстяном шерстяную шест шестая шествие шестеро шести шестидесяти шестнадцати шестнадцатилетнего шестнадцатилетний шестнадцать шестого шестое шестой шестом шестому шесть шестьдесят шестьсот шею шея ши шиканье шила шиллер шиллера шиллеровских шило шилось шиля шинели шинель шипевших шипел шипенье шипит шипка шиповник шипы шипящую шире шириной шириною ширину ширины ширится ширм ширмами ширмой ширму ширмы широк широка широкая широкие широкий широким широкими широких широко широкого широкое широкой широком широкостью широкою широкую ширь ширятся шитом шитую шитые шить шитье шитьем шитья шиш шиша шишка шкалик шкандаль шкапа шкапом шкапу шкатулка шкатулке шкатулку шкаф шкафа шкафах шкафик шкафом шкафу шкафчика шкафы школа школах школе школу школы школьник школьника шкуре шла шлафрок шлафрока шлезвиг шлеи шлейф шлем шлепохвосткам шлепохвостниц шлет шлешь шли шло шлю шлют шляешься шлялись шлялся шляпа шляпах шляпе шляпка шляпке шляпки шляпку шляпник шляпой шляпу шляпы шляркой шляться шмель шмыгали шмыгать шмыгнул шнурка шнурочек шныряли шныряю шопенгауэр шопот шопота шопоте шопотом шоркает шоркнуло шорох шоссе шоссейная шоссейный шотландский шотландском шпаги шпал шпиле шпилек шпильки шпор шпорою шпоры шпынял шта штаб штабных штабс штанами штаны штат штатам штатах штатские штатский штатскими штатских штатского штатское штату штейн штиблетах што штольне штольц штольца штольце штольцев штольцем штольцу штопать штор штора шторами штору шторы штоф штофа штраф штрих штрихом штрюком штук штука штукатурка штуки штукой штуку штучка штучки штучку штык штыки штыком шуб шубе шубенке шуберта шубка шубки шубкой шубу шубы шулер шулерами шулером шум шума шумана шуме шумел шуметь шуми шумиловой шумиловское шумит шумна шумней шумно шумного шумной шумный шумных шумом шуму шумят шумящих шурин шурша шуршит шут шути шутил шутила шутили шутим шутит шутите шутить шутишь шутка шутками шутке шутки шуткой шутку шутливо шутник шутовский шутовского шуточки шуточное шуты шутя шутят шучу шушуканье шхун шьет шьют щадил щадила щадит щадите щадя ще щебет щебечут щеголевато щеголиха щегольская щегольски щегольские щегольский щегольских щегольское щегольской щегольском щеголял щеголять щедр щедрина щедрость щедрот щей щек щекам щеками щеках щеке щеки щекотал щекотала щекотлив щекотливенькая щекотливо щекотливое щекотливость щеку щелей щели щелкает щелканье щелканья щелкать щелкают щелкнул щелкнуть щелку щелочка щелочке щелочки щелочку щелчки щель щемящей щемящему щемящие щенка щенке щенком щенок щепоточку щербатый щетина щетинистыми щетину щетками щетки щеткой щетку щечкам щечке щечки щи щина щипал щипала щиплет щипцами щит щите щитом щуки щупать щупленькая щурят ы э эа эва эвона эвося эге эгоизм эгоизма эгоизме эгоист эгоистка эгоистки эгоистом эгофутуризм эдак эдакая эдгар эдгара эй эйфелевой эк эка экая экзамен экзаменовать экзекутор экземпляра экземплярах экземпляру экзотический экие экий экипаж экипажа экипажах экипаже экипажи эко экое экой экономией экономии экономическая экономические экономическим экономических экономической экономию экономия экономка экономки экономку экс экспансивности экспансивностями экспедицию экстраординарная эксцентричен эксцентрическая эксцентрический эксцентрических эксцентрическом эксцентричнее элегий электрическую электричества электронная электронной электронный элементами элементом элементы эмалью эмансипации эмансипацию эмблема эмеритуру эмс энергией энергии энергически энергический энергическим энергично энергия энтузиазм энтузиазма энтузиазмом энциклопедическом эпи эпиграфом эпидемию эпизод эпизодическим эпизодическое эпизодов эпикуреец эпилог эпилога эпилоге эпилогом эпитафия эпитет эполеты эпопеи эпох эпоха эпохам эпохи эпоху эрара эрлангене эрмитаж эру эры эскамильо эско эспаньолкой эссенция эстет эстетика эстетике эстетики эстетическая эстетически эстетическое эстраде эстрадой эстраду эстрады эта этаж этажа этажах этаже этажерками этажерках этажерке этажерки этажерку этажи этажом этажу этак этака этакая этакие этакий этаким этакими этаких этакого этакое этакой этаком этакому этаку этакую эти этим этими этих это этого этой этом этому этот этою этта эту этюдов эфемерная эфемерное эфес эфир эфирной эфиром эффект эффекта эх эхо эшафот эшафоте эшафоту эшелон ю юбилей юбка юбке юбки юбкой юбку ювелира ювелирская ювелирский ювенал юг юга юдоли южной южный южных юла юлишь юмора юная юнкер юного юной юности юность юностью юноша юношам юношами юношей юношеские юношеский юношескою юношественные юношеством юноши юношу юную юный юным юных юпитер юридистике юридические юридический юридическими юридического юридическое юридической юридическом юридическому юриспруденции юристов юристом юристы юрию юркнула юродивая юродивой юродивые юродивым юрты юсупов юсупова юсуповом юшин ющинского я яблок яблокам яблоками яблоки яблоко яблоку яблони яблонь явившийся явившись явилась явились явилось явился явись явитесь явится явиться явка явки явку явление явлением явлений явления явлениями явлениях явленьем явлюсь является являйся являлась являлись являлось являлся являться являются являющаяся являясь явная явно явное явною явный явным явственнее явственно явь явятся ягод ягоды яд яде ядовитая ядовитей ядовито ядовитое ядовитой ядовиты ядовитый ядовитым ядовитых ядом ядра ядро яду язва язвами язвах язве язвила язвили язвите язвительно язвительного язвительное язвительность язвительный язвы язык языка языками языке языки языком языку яиц яичка яичницей яичницу яичными яйца яйцами яйцах яйцом яко якобы яков якову якорей якорь якоря ямайский ямайском ямах ямб ямба ямбы яме ямка ямку ямой ямочками ямской яму ямщик ямщика ямы январе январь января янтарная янтарной янтарь ярка яркая ярки яркие яркий ярким ярких ярко яркого яркой ярком яркость яркостью яркую ярлык ярлычки ярмарка ярмаркам ярмарке ярмарку ярмо яровое ярок ярославль ярославская ярости яростно яростное ярость яростью ярче ярый ясен ясеневыми ясли ясна ясная яснее яснеет ясно ясного ясное ясной ясном ясности ясность ясную ясны ясные ясный ясным ясных яств ястреб ястреба ят яхонтовым ячмени ячменный ячмень ячменях яша яше яшей яши яшнево яшу ящик ящика ящикам ящике LucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/000077500000000000000000000000001217574114600237675ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/subdirectory/000077500000000000000000000000001217574114600265055ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/subdirectory/testfile4.txt000066400000000000000000000000061217574114600311450ustar00rootroot00000000000000test 4LucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/subdirectory/testfile5.txt000066400000000000000000000000061217574114600311460ustar00rootroot00000000000000test 5LucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/subdirectory/testfile6.txt000066400000000000000000000000061217574114600311470ustar00rootroot00000000000000test 6LucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/testfile1.txt000066400000000000000000000000061217574114600264240ustar00rootroot00000000000000test 1LucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/testfile2.txt000066400000000000000000000000061217574114600264250ustar00rootroot00000000000000test 2LucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/testfile3.txt000066400000000000000000000000061217574114600264260ustar00rootroot00000000000000test 3LucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/testfilesize1.txt000066400000000000000000000000351217574114600273210ustar00rootroot00000000000000This file is 29 bytes in sizeLucenePlusPlus-rel_3.0.4/src/test/testfiles/testdirectory/testfilesize2.txt000066400000000000000000000000351217574114600273220ustar00rootroot00000000000000This file is 29 bytes in sizeLucenePlusPlus-rel_3.0.4/src/test/testfiles/testdoc1.txt000066400000000000000000000000331217574114600233460ustar00rootroot00000000000000This is the first test fileLucenePlusPlus-rel_3.0.4/src/test/testfiles/testdoc2.txt000066400000000000000000000000341217574114600233500ustar00rootroot00000000000000This is the second test fileLucenePlusPlus-rel_3.0.4/src/test/testfiles/testfile_text.txt000066400000000000000000000000711217574114600245050ustar00rootroot00000000000000test file that contains multiple lines of text 1 2 3 4 LucenePlusPlus-rel_3.0.4/src/test/testfiles/testfile_uft8.txt000066400000000000000000000016171217574114600244160ustar00rootroot00000000000000中华人民共和国 نداشته абиссинию абонемента абонировался абонируйся абонируюсь абрикосы август سراسر خياه ايشان وي تاكنون بيشتري دوم پس ناشي وگو يا داشتند سپس هنگام هرگز پنج австрийский автобиографию автографом автомобили автомобиль автор авторам авторитет авторитета авторитеты авторов چطور ده و دو نخستين ولي چرا چه وسط ه كدام قابل يك رفت هفت همچنين در هزار بله بلي شايد اما شناسي گرفته دهد داشته دانست داشتن خواهيم ميليارد وقتيكه امد خواهد جز اورده شده بلكه خدمات شدن برخي نبود بسياري جلوگيري englishLucenePlusPlus-rel_3.0.4/src/test/util/000077500000000000000000000000001217574114600200365ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.4/src/test/util/AttributeSourceTest.cpp000066400000000000000000000115271217574114600245340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "AttributeSource.h" #include "TermAttribute.h" #include "TypeAttribute.h" #include "FlagsAttribute.h" #include "OffsetAttribute.h" #include "PayloadAttribute.h" #include "PositionIncrementAttribute.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(AttributeSourceTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testCaptureState) { // init a first instance AttributeSourcePtr src = newLucene(); TermAttributePtr termAtt = src->addAttribute(); TypeAttributePtr typeAtt = src->addAttribute(); termAtt->setTermBuffer(L"TestTerm"); typeAtt->setType(L"TestType"); int32_t hashCode = src->hashCode(); AttributeSourceStatePtr state = src->captureState(); // modify the attributes termAtt->setTermBuffer(L"AnotherTestTerm"); typeAtt->setType(L"AnotherTestType"); BOOST_CHECK_NE(hashCode, src->hashCode()); src->restoreState(state); BOOST_CHECK_EQUAL(L"TestTerm", termAtt->term()); BOOST_CHECK_EQUAL(L"TestType", typeAtt->type()); BOOST_CHECK_EQUAL(hashCode, src->hashCode()); // restore into an exact configured copy AttributeSourcePtr copy = newLucene(); copy->addAttribute(); copy->addAttribute(); copy->restoreState(state); BOOST_CHECK_EQUAL(src->hashCode(), copy->hashCode()); BOOST_CHECK(src->equals(copy)); // init a second instance (with attributes in different order and one additional attribute) AttributeSourcePtr src2 = newLucene(); typeAtt = src2->addAttribute(); FlagsAttributePtr flagsAtt = src2->addAttribute(); termAtt = src2->addAttribute(); flagsAtt->setFlags(12345); src2->restoreState(state); BOOST_CHECK_EQUAL(L"TestTerm", termAtt->term()); BOOST_CHECK_EQUAL(L"TestType", typeAtt->type()); BOOST_CHECK_EQUAL(12345, flagsAtt->getFlags()); // init a third instance missing one Attribute AttributeSourcePtr src3 = newLucene(); termAtt = src3->addAttribute(); BOOST_CHECK_EXCEPTION(src3->restoreState(state), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); } BOOST_AUTO_TEST_CASE(testCloneAttributes) { AttributeSourcePtr src = newLucene(); TermAttributePtr termAtt = src->addAttribute(); TypeAttributePtr typeAtt = src->addAttribute(); termAtt->setTermBuffer(L"TestTerm"); typeAtt->setType(L"TestType"); AttributeSourcePtr clone = src->cloneAttributes(); Collection attributes = clone->getAttributes(); BOOST_CHECK_EQUAL(2, attributes.size()); BOOST_CHECK(MiscUtils::typeOf(attributes[0])); BOOST_CHECK(MiscUtils::typeOf(attributes[1])); TermAttributePtr termAtt2 = clone->getAttribute(); TypeAttributePtr typeAtt2 = clone->getAttribute(); BOOST_CHECK(termAtt2 != termAtt); BOOST_CHECK(typeAtt2 != typeAtt); BOOST_CHECK(termAtt2->equals(termAtt)); BOOST_CHECK(typeAtt2->equals(typeAtt)); } BOOST_AUTO_TEST_CASE(testToStringAndMultiAttributeImplementations) { AttributeSourcePtr src = newLucene(); TermAttributePtr termAtt = src->addAttribute(); TypeAttributePtr typeAtt = src->addAttribute(); termAtt->setTermBuffer(L"TestTerm"); typeAtt->setType(L"TestType"); BOOST_CHECK_EQUAL(L"(" + termAtt->toString() + L"," + typeAtt->toString() + L")", src->toString()); Collection attributes = src->getAttributes(); BOOST_CHECK_EQUAL(2, attributes.size()); BOOST_CHECK(attributes[0]->equals(termAtt)); BOOST_CHECK(attributes[1]->equals(typeAtt)); } BOOST_AUTO_TEST_CASE(testDefaultAttributeFactory) { AttributeSourcePtr src = newLucene(); BOOST_CHECK(MiscUtils::typeOf(src->addAttribute())); BOOST_CHECK(MiscUtils::typeOf(src->addAttribute())); BOOST_CHECK(MiscUtils::typeOf(src->addAttribute())); BOOST_CHECK(MiscUtils::typeOf(src->addAttribute())); BOOST_CHECK(MiscUtils::typeOf(src->addAttribute())); BOOST_CHECK(MiscUtils::typeOf(src->addAttribute())); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/Base64Test.cpp000066400000000000000000000067261217574114600224410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "Base64.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(Base64Test, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testEncodeSmall) { SingleString testBinary = "this is test binary"; String encode = Base64::encode((uint8_t*)testBinary.c_str(), testBinary.length()); BOOST_CHECK_EQUAL(encode, L"dGhpcyBpcyB0ZXN0IGJpbmFyeQ=="); } BOOST_AUTO_TEST_CASE(testEncodeLarge) { SingleString testBinary = "This is a larger test string that should convert into base64 " "This is a larger test string that should convert into base64 " "This is a larger test string that should convert into base64 " "This is a larger test string that should convert into base64 " "This is a larger test string that should convert into base64"; String encode = Base64::encode((uint8_t*)testBinary.c_str(), testBinary.length()); String expected = L"VGhpcyBpcyBhIGxhcmdlciB0ZXN0IHN0cmluZyB0aGF0IHNob3VsZCBjb252ZXJ0IGludG8gYmFz" L"ZTY0IFRoaXMgaXMgYSBsYXJnZXIgdGVzdCBzdHJpbmcgdGhhdCBzaG91bGQgY29udmVydCBpbnRv" L"IGJhc2U2NCBUaGlzIGlzIGEgbGFyZ2VyIHRlc3Qgc3RyaW5nIHRoYXQgc2hvdWxkIGNvbnZlcnQg" L"aW50byBiYXNlNjQgVGhpcyBpcyBhIGxhcmdlciB0ZXN0IHN0cmluZyB0aGF0IHNob3VsZCBjb252" L"ZXJ0IGludG8gYmFzZTY0IFRoaXMgaXMgYSBsYXJnZXIgdGVzdCBzdHJpbmcgdGhhdCBzaG91bGQg" L"Y29udmVydCBpbnRvIGJhc2U2NA=="; BOOST_CHECK_EQUAL(encode, expected); } BOOST_AUTO_TEST_CASE(testDecodeSmall) { String testString = L"dGhpcyBpcyB0ZXN0IGJpbmFyeQ=="; ByteArray decode = Base64::decode(testString); SingleString decodeBinary((char*)decode.get(), decode.size()); BOOST_CHECK_EQUAL(decodeBinary, "this is test binary"); } BOOST_AUTO_TEST_CASE(testDecodeLaarge) { String testString = L"VGhpcyBpcyBhIGxhcmdlciB0ZXN0IHN0cmluZyB0aGF0IHNob3VsZCBjb252ZXJ0IGludG8gYmFz" L"ZTY0IFRoaXMgaXMgYSBsYXJnZXIgdGVzdCBzdHJpbmcgdGhhdCBzaG91bGQgY29udmVydCBpbnRv" L"IGJhc2U2NCBUaGlzIGlzIGEgbGFyZ2VyIHRlc3Qgc3RyaW5nIHRoYXQgc2hvdWxkIGNvbnZlcnQg" L"aW50byBiYXNlNjQgVGhpcyBpcyBhIGxhcmdlciB0ZXN0IHN0cmluZyB0aGF0IHNob3VsZCBjb252" L"ZXJ0IGludG8gYmFzZTY0IFRoaXMgaXMgYSBsYXJnZXIgdGVzdCBzdHJpbmcgdGhhdCBzaG91bGQg" L"Y29udmVydCBpbnRvIGJhc2U2NA=="; ByteArray decode = Base64::decode(testString); SingleString decodeBinary((char*)decode.get(), decode.size()); SingleString expected = "This is a larger test string that should convert into base64 " "This is a larger test string that should convert into base64 " "This is a larger test string that should convert into base64 " "This is a larger test string that should convert into base64 " "This is a larger test string that should convert into base64"; BOOST_CHECK_EQUAL(decodeBinary, expected); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/BitVectorTest.cpp000066400000000000000000000157471217574114600233210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "BitVector.h" #include "RAMDirectory.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BitVectorTest, LuceneTestFixture) static const int32_t subsetPattern[] = {1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1}; static bool compareBitVectors(BitVectorPtr bv, BitVectorPtr compare) { for (int32_t i = 0; i < bv->size(); ++i) { // bits must be equal if (bv->get(i) != compare->get(i)) return false; } return true; } static void doTestConstructOfSize(int32_t n) { BitVectorPtr bv = newLucene(n); BOOST_CHECK_EQUAL(n, bv->size()); } /// Test the default constructor on BitVectors of various sizes. BOOST_AUTO_TEST_CASE(testConstructSize) { doTestConstructOfSize(8); doTestConstructOfSize(20); doTestConstructOfSize(100); doTestConstructOfSize(1000); } static void doTestGetSetVectorOfSize(int32_t n) { BitVectorPtr bv = newLucene(n); for (int32_t i = 0; i < bv->size(); ++i) { BOOST_CHECK(!bv->get(i)); bv->set(i); BOOST_CHECK(bv->get(i)); } } /// Test the get() and set() methods on BitVectors of various sizes. BOOST_AUTO_TEST_CASE(testGetSet) { doTestGetSetVectorOfSize(8); doTestGetSetVectorOfSize(20); doTestGetSetVectorOfSize(100); doTestGetSetVectorOfSize(1000); } static void doTestClearVectorOfSize(int32_t n) { BitVectorPtr bv = newLucene(n); for (int32_t i = 0; i < bv->size(); ++i) { BOOST_CHECK(!bv->get(i)); bv->set(i); BOOST_CHECK(bv->get(i)); bv->clear(i); BOOST_CHECK(!bv->get(i)); } } /// Test the clear() method on BitVectors of various sizes. BOOST_AUTO_TEST_CASE(testClear) { doTestClearVectorOfSize(8); doTestClearVectorOfSize(20); doTestClearVectorOfSize(100); doTestClearVectorOfSize(1000); } static void doTestCountVectorOfSize(int32_t n) { BitVectorPtr bv = newLucene(n); // test count when incrementally setting bits for (int32_t i = 0; i < bv->size(); ++i) { BOOST_CHECK(!bv->get(i)); BOOST_CHECK_EQUAL(i, bv->count()); bv->set(i); BOOST_CHECK(bv->get(i)); BOOST_CHECK_EQUAL(i + 1, bv->count()); } bv = newLucene(n); // test count when setting then clearing bits for (int32_t i = 0; i < bv->size(); ++i) { BOOST_CHECK(!bv->get(i)); BOOST_CHECK_EQUAL(0, bv->count()); bv->set(i); BOOST_CHECK(bv->get(i)); BOOST_CHECK_EQUAL(1, bv->count()); bv->clear(i); BOOST_CHECK(!bv->get(i)); BOOST_CHECK_EQUAL(0, bv->count()); } } /// Test the count() method on BitVectors of various sizes. BOOST_AUTO_TEST_CASE(testCount) { doTestCountVectorOfSize(8); doTestCountVectorOfSize(20); doTestCountVectorOfSize(100); doTestCountVectorOfSize(1000); } static void doTestWriteRead(int32_t n) { DirectoryPtr d = newLucene(); BitVectorPtr bv = newLucene(n); // test count when incrementally setting bits for (int32_t i = 0; i < bv->size(); ++i) { BOOST_CHECK(!bv->get(i)); BOOST_CHECK_EQUAL(i, bv->count()); bv->set(i); BOOST_CHECK(bv->get(i)); BOOST_CHECK_EQUAL(i + 1, bv->count()); bv->write(d, L"TESTBV"); BitVectorPtr compare = newLucene(d, L"TESTBV"); // compare bit vectors with bits set incrementally BOOST_CHECK(compareBitVectors(bv, compare)); } } /// Test writing and construction to/from Directory. BOOST_AUTO_TEST_CASE(testWriteRead) { doTestWriteRead(8); doTestWriteRead(20); doTestWriteRead(100); doTestWriteRead(1000); } static void doTestDgaps(int32_t size, int32_t count1, int32_t count2) { DirectoryPtr d = newLucene(); BitVectorPtr bv = newLucene(size); for (int32_t i = 0; i < count1; ++i) { bv->set(i); BOOST_CHECK_EQUAL(i + 1, bv->count()); } bv->write(d, L"TESTBV"); // gradually increase number of set bits for (int32_t i = count1; i < count2; ++i) { BitVectorPtr bv2 = newLucene(d, L"TESTBV"); BOOST_CHECK(compareBitVectors(bv, bv2)); bv = bv2; bv->set(i); BOOST_CHECK_EQUAL(i + 1, bv->count()); bv->write(d, L"TESTBV"); } // now start decreasing number of set bits for (int32_t i = count2 - 1; i >= count1; --i) { BitVectorPtr bv2 = newLucene(d, L"TESTBV"); BOOST_CHECK(compareBitVectors(bv, bv2)); bv = bv2; bv->clear(i); BOOST_CHECK_EQUAL(i, bv->count()); bv->write(d, L"TESTBV"); } } /// Test r/w when size/count cause switching between bit-set and d-gaps file formats. BOOST_AUTO_TEST_CASE(testDgaps) { doTestDgaps(1, 0, 1); doTestDgaps(10, 0, 1); doTestDgaps(100, 0, 1); doTestDgaps(1000, 4, 7); doTestDgaps(10000, 40, 43); doTestDgaps(100000, 415, 418); doTestDgaps(1000000, 3123, 3126); } static BitVectorPtr createSubsetTestVector() { int32_t length = SIZEOF_ARRAY(subsetPattern); BitVectorPtr bv = newLucene(length); for (int32_t i = 0; i < length; ++i) { if (subsetPattern[i] == 1) bv->set(i); } return bv; } /// Compare a subset against the corresponding portion of the test pattern static void doTestSubset(int32_t start, int32_t end) { BitVectorPtr full = createSubsetTestVector(); BitVectorPtr subset = full->subset(start, end); BOOST_CHECK_EQUAL(end - start, subset->size()); int32_t count = 0; for (int32_t i = start, j = 0; i < end; ++i, ++j) { if (subsetPattern[i] == 1) { ++count; BOOST_CHECK(subset->get(j)); } else BOOST_CHECK(!subset->get(j)); } BOOST_CHECK_EQUAL(count, subset->count()); } /// Tests BitVector.subset() against a pattern BOOST_AUTO_TEST_CASE(testSubset) { doTestSubset(0, 0); doTestSubset(0, 20); doTestSubset(0, 7); doTestSubset(0, 8); doTestSubset(0, 9); doTestSubset(0, 15); doTestSubset(0, 16); doTestSubset(0, 17); doTestSubset(1, 7); doTestSubset(1, 8); doTestSubset(1, 9); doTestSubset(1, 15); doTestSubset(1, 16); doTestSubset(1, 17); doTestSubset(2, 20); doTestSubset(3, 20); doTestSubset(4, 20); doTestSubset(5, 20); doTestSubset(6, 20); doTestSubset(7, 14); doTestSubset(7, 15); doTestSubset(7, 16); doTestSubset(8, 15); doTestSubset(9, 20); doTestSubset(10, 20); doTestSubset(11, 20); doTestSubset(12, 20); doTestSubset(13, 20); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/BufferedReaderTest.cpp000066400000000000000000000134261217574114600242550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FileReader.h" #include "BufferedReader.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(BufferedReaderTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testBufferedReaderChar) { BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt"))); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L't'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'e'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L's'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L't'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L' '); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'f'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'i'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'l'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'e'); } BOOST_AUTO_TEST_CASE(testBufferedReaderRead) { BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt"))); wchar_t buffer[80]; int32_t length = reader->read(buffer, 0, 80); String bufferString(buffer, length); boost::replace_all(bufferString, L"\r\n", L"\n"); // account for windows newline characters BOOST_CHECK_EQUAL(bufferString, L"test file\nthat contains\nmultiple lines of text\n\n\n1 2 3 4\n"); BOOST_CHECK_EQUAL(reader->read(buffer, 0, 1), FileReader::FILE_EOF); } BOOST_AUTO_TEST_CASE(testBufferedReaderReadLine) { BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt"))); Collection readLines = Collection::newInstance(); String line; while (reader->readLine(line)) readLines.add(line); BOOST_CHECK_EQUAL(reader->read(), FileReader::FILE_EOF); BOOST_CHECK_EQUAL(readLines.size(), 6); BOOST_CHECK_EQUAL(readLines[0], L"test file"); BOOST_CHECK_EQUAL(readLines[1], L"that contains"); BOOST_CHECK_EQUAL(readLines[2], L"multiple lines of text"); BOOST_CHECK_EQUAL(readLines[3], L""); BOOST_CHECK_EQUAL(readLines[4], L""); BOOST_CHECK_EQUAL(readLines[5], L"1 2 3 4"); } BOOST_AUTO_TEST_CASE(testBufferedReaderReset) { BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt"))); wchar_t buffer[20]; BOOST_CHECK_EQUAL(reader->read(buffer, 0, 9), 9); BOOST_CHECK_EQUAL(String(buffer, 9), L"test file"); reader->reset(); BOOST_CHECK_EQUAL(reader->read(buffer, 0, 9), 9); BOOST_CHECK_EQUAL(String(buffer, 9), L"test file"); } BOOST_AUTO_TEST_CASE(testBufferedReaderCharsSmallBuffer) { static const int32_t bufferSize = 5; BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt")), bufferSize); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L't'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'e'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L's'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L't'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L' '); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'f'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'i'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'l'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'e'); } BOOST_AUTO_TEST_CASE(testBufferedReaderReadSmallBuffer) { static const int32_t bufferSize = 5; BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt")), bufferSize); wchar_t buffer[80]; int32_t length = reader->read(buffer, 0, 80); String bufferString(buffer, length); boost::replace_all(bufferString, L"\r\n", L"\n"); // account for windows newline characters BOOST_CHECK_EQUAL(bufferString, L"test file\nthat contains\nmultiple lines of text\n\n\n1 2 3 4\n"); BOOST_CHECK_EQUAL(reader->read(buffer, 0, 1), FileReader::FILE_EOF); } BOOST_AUTO_TEST_CASE(testBufferedReaderResetSmallBuffer) { static const int32_t bufferSize = 5; BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt")), bufferSize); wchar_t buffer[20]; BOOST_CHECK_EQUAL(reader->read(buffer, 0, 9), 9); BOOST_CHECK_EQUAL(String(buffer, 9), L"test file"); reader->reset(); BOOST_CHECK_EQUAL(reader->read(buffer, 0, 9), 9); BOOST_CHECK_EQUAL(String(buffer, 9), L"test file"); } BOOST_AUTO_TEST_CASE(testBufferedReaderReadLineSmallBuffer) { static const int32_t bufferSize = 5; BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt")), bufferSize); Collection readLines = Collection::newInstance(); String line; while (reader->readLine(line)) readLines.add(line); BOOST_CHECK_EQUAL(reader->read(), FileReader::FILE_EOF); BOOST_CHECK_EQUAL(readLines.size(), 6); BOOST_CHECK_EQUAL(readLines[0], L"test file"); BOOST_CHECK_EQUAL(readLines[1], L"that contains"); BOOST_CHECK_EQUAL(readLines[2], L"multiple lines of text"); BOOST_CHECK_EQUAL(readLines[3], L""); BOOST_CHECK_EQUAL(readLines[4], L""); BOOST_CHECK_EQUAL(readLines[5], L"1 2 3 4"); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/CloseableThreadLocalTest.cpp000066400000000000000000000026521217574114600254030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "CloseableThreadLocal.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(CloseableThreadLocalTest, LuceneTestFixture) static const String TEST_VALUE = L"initvaluetest"; BOOST_AUTO_TEST_CASE(testInitValue) { class InitValueThreadLocal : public CloseableThreadLocal { public: virtual ~InitValueThreadLocal() { } protected: virtual boost::shared_ptr initialValue() { return newInstance(TEST_VALUE); } }; InitValueThreadLocal tl; String str = *(tl.get()); BOOST_CHECK_EQUAL(TEST_VALUE, str); } /// Tests that null can be set as a valid value. BOOST_AUTO_TEST_CASE(testNullValue) { CloseableThreadLocal ctl; ctl.set(boost::shared_ptr()); BOOST_CHECK(!ctl.get()); } /// Make sure default get returns null, twice in a row BOOST_AUTO_TEST_CASE(testDefaultValueWithoutSetting) { CloseableThreadLocal ctl; BOOST_CHECK(!ctl.get()); BOOST_CHECK(!ctl.get()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/CompressionToolsTest.cpp000066400000000000000000000015231217574114600247250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "CompressionTools.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(CompressionToolsTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testCompressDecompress) { ByteArray compress(CompressionTools::compressString(L"test compressed string")); BOOST_CHECK(compress.size() > 0); String decompress(CompressionTools::decompressString(compress)); BOOST_CHECK_EQUAL(decompress, L"test compressed string"); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/FieldCacheSanityCheckerTest.cpp000066400000000000000000000117321217574114600260320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "IndexReader.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "WhitespaceAnalyzer.h" #include "MultiReader.h" #include "FieldCache.h" #include "FieldCacheSanityChecker.h" #include "Document.h" #include "Field.h" using namespace Lucene; class FieldCacheSanityCheckerTestFixture : public LuceneTestFixture { public: FieldCacheSanityCheckerTestFixture() { RAMDirectoryPtr dirA = newLucene(); RAMDirectoryPtr dirB = newLucene(); IndexWriterPtr wA = newLucene(dirA, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); IndexWriterPtr wB = newLucene(dirB, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); int64_t theLong = LLONG_MAX; double theDouble = DBL_MAX; uint8_t theByte = UCHAR_MAX; int32_t theInt = INT_MAX; for (int32_t i = 0; i < NUM_DOCS; ++i) { DocumentPtr doc = newLucene(); doc->add(newLucene(L"theLong", StringUtils::toString(theLong--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"theDouble", StringUtils::toString(theDouble--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"theByte", StringUtils::toString(theByte--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"theInt", StringUtils::toString(theInt--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (i % 3 == 0) wA->addDocument(doc); else wB->addDocument(doc); } wA->close(); wB->close(); readerA = IndexReader::open(dirA, true); readerB = IndexReader::open(dirB, true); readerX = newLucene(newCollection(readerA, readerB)); } virtual ~FieldCacheSanityCheckerTestFixture() { readerA->close(); readerB->close(); readerX->close(); } protected: IndexReaderPtr readerA; IndexReaderPtr readerB; IndexReaderPtr readerX; static const int32_t NUM_DOCS; }; const int32_t FieldCacheSanityCheckerTestFixture::NUM_DOCS = 1000; BOOST_FIXTURE_TEST_SUITE(FieldCacheSanityCheckerTest, FieldCacheSanityCheckerTestFixture) BOOST_AUTO_TEST_CASE(testSanity) { FieldCachePtr cache = FieldCache::DEFAULT(); cache->purgeAllCaches(); Collection doubles = cache->getDoubles(readerA, L"theDouble"); doubles = cache->getDoubles(readerA, L"theDouble", FieldCache::DEFAULT_DOUBLE_PARSER()); doubles = cache->getDoubles(readerB, L"theDouble", FieldCache::DEFAULT_DOUBLE_PARSER()); Collection ints = cache->getInts(readerX, L"theInt"); ints = cache->getInts(readerX, L"theInt", FieldCache::DEFAULT_INT_PARSER()); Collection insanity = FieldCacheSanityChecker::checkSanity(cache->getCacheEntries()); BOOST_CHECK_EQUAL(0, insanity.size()); cache->purgeAllCaches(); } BOOST_AUTO_TEST_CASE(testInsanity1) { FieldCachePtr cache = FieldCache::DEFAULT(); cache->purgeAllCaches(); Collection ints = cache->getInts(readerX, L"theInt", FieldCache::DEFAULT_INT_PARSER()); Collection strings = cache->getStrings(readerX, L"theInt"); // this one is ok Collection bytes = cache->getBytes(readerX, L"theByte"); Collection insanity = FieldCacheSanityChecker::checkSanity(cache->getCacheEntries()); BOOST_CHECK_EQUAL(1, insanity.size()); BOOST_CHECK_EQUAL(FieldCacheSanityChecker::VALUEMISMATCH, insanity[0]->getType()); BOOST_CHECK_EQUAL(2, insanity[0]->getCacheEntries().size()); // we expect bad things, don't let tearDown complain about them cache->purgeAllCaches(); } BOOST_AUTO_TEST_CASE(testInsanity2) { FieldCachePtr cache = FieldCache::DEFAULT(); cache->purgeAllCaches(); Collection strings = cache->getStrings(readerA, L"theString"); strings = cache->getStrings(readerB, L"theString"); strings = cache->getStrings(readerX, L"theString"); // this one is ok Collection bytes = cache->getBytes(readerX, L"theByte"); Collection insanity = FieldCacheSanityChecker::checkSanity(cache->getCacheEntries()); BOOST_CHECK_EQUAL(1, insanity.size()); BOOST_CHECK_EQUAL(FieldCacheSanityChecker::SUBREADER, insanity[0]->getType()); BOOST_CHECK_EQUAL(3, insanity[0]->getCacheEntries().size()); // we expect bad things, don't let tearDown complain about them cache->purgeAllCaches(); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/FileReaderTest.cpp000066400000000000000000000042721217574114600234110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FileReader.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(FileReaderTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testFileReaderChar) { FileReaderPtr reader = newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt")); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L't'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'e'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L's'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L't'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L' '); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'f'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'i'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'l'); BOOST_CHECK_EQUAL((wchar_t)reader->read(), L'e'); } BOOST_AUTO_TEST_CASE(testFileReaderRead) { FileReaderPtr reader = newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt")); wchar_t buffer[80]; int32_t length = reader->read(buffer, 0, 80); String bufferString(buffer, length); boost::replace_all(bufferString, L"\r\n", L"\n"); // account for windows newline characters BOOST_CHECK_EQUAL(bufferString, L"test file\nthat contains\nmultiple lines of text\n\n\n1 2 3 4\n"); BOOST_CHECK_EQUAL(reader->read(buffer, 0, 1), FileReader::FILE_EOF); } BOOST_AUTO_TEST_CASE(testFileReaderReset) { FileReaderPtr reader = newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt")); wchar_t buffer[20]; BOOST_CHECK_EQUAL(reader->read(buffer, 0, 9), 9); BOOST_CHECK_EQUAL(String(buffer, 9), L"test file"); reader->reset(); BOOST_CHECK_EQUAL(reader->read(buffer, 0, 9), 9); BOOST_CHECK_EQUAL(String(buffer, 9), L"test file"); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/FileUtilsTest.cpp000066400000000000000000000207071217574114600233100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(FileUtilsTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testFileExists) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"testfile1.txt")); BOOST_CHECK(FileUtils::fileExists(fileDir)); } BOOST_AUTO_TEST_CASE(testFileModified) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"testfile1.txt")); uint64_t fileModified = FileUtils::fileModified(fileDir); BOOST_CHECK_NE(fileModified, 0); struct tm *fileTime = localtime((const time_t*)&fileModified); BOOST_CHECK(fileTime != NULL); } BOOST_AUTO_TEST_CASE(testInvalidFileModified) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"invalid")); BOOST_CHECK_EQUAL(FileUtils::fileModified(fileDir), 0); } BOOST_AUTO_TEST_CASE(testTouchFile) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"testfile1.txt")); BOOST_CHECK(FileUtils::touchFile(fileDir)); uint64_t fileModified = FileUtils::fileModified(fileDir); BOOST_CHECK_NE(fileModified, 0); struct tm *fileTime = localtime((const time_t*)&fileModified); BOOST_CHECK(fileTime != NULL); time_t current = time(NULL); struct tm *currentTime = localtime((const time_t*)¤t); BOOST_CHECK_EQUAL(fileTime->tm_year, currentTime->tm_year); BOOST_CHECK_EQUAL(fileTime->tm_mon, currentTime->tm_mon); BOOST_CHECK_EQUAL(fileTime->tm_mday, currentTime->tm_mday); } BOOST_AUTO_TEST_CASE(testInvalidTouchFile) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"invalid")); BOOST_CHECK(!FileUtils::touchFile(fileDir)); } BOOST_AUTO_TEST_CASE(testFileLength) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"testfilesize1.txt")); int64_t fileLength = FileUtils::fileLength(fileDir); BOOST_CHECK_EQUAL(fileLength, 29); } BOOST_AUTO_TEST_CASE(testInvalidFileLength) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"invalid")); BOOST_CHECK_EQUAL(FileUtils::fileLength(fileDir), 0); } BOOST_AUTO_TEST_CASE(testSetFileLength) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"testfilesize2.txt")); BOOST_CHECK(FileUtils::setFileLength(fileDir, 1234)); int64_t fileLengthGrow = FileUtils::fileLength(fileDir); BOOST_CHECK_EQUAL(fileLengthGrow, 1234); BOOST_CHECK(FileUtils::setFileLength(fileDir, 29)); int64_t fileLengthShrink = FileUtils::fileLength(fileDir); BOOST_CHECK_EQUAL(fileLengthShrink, 29); } BOOST_AUTO_TEST_CASE(testInvalidSetFileLength) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"invalid")); BOOST_CHECK(!FileUtils::setFileLength(fileDir, 1234)); } BOOST_AUTO_TEST_CASE(testRemoveFile) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"testdelete.txt")); std::ofstream f(StringUtils::toUTF8(fileDir).c_str(), std::ios::binary | std::ios::out); f.close(); BOOST_CHECK(FileUtils::fileExists(fileDir)); BOOST_CHECK(FileUtils::removeFile(fileDir)); BOOST_CHECK(!FileUtils::fileExists(fileDir)); } BOOST_AUTO_TEST_CASE(testInvalidRemoveFile) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"invalid")); BOOST_CHECK(!FileUtils::removeFile(fileDir)); } BOOST_AUTO_TEST_CASE(testIsDirectory) { String fileDir(FileUtils::joinPath(getTestDir(), L"testdirectory")); BOOST_CHECK(FileUtils::isDirectory(fileDir)); } BOOST_AUTO_TEST_CASE(testNotDirectory) { String fileDir(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"testdirectory"), L"testfile1.txt")); BOOST_CHECK(!FileUtils::isDirectory(fileDir)); } BOOST_AUTO_TEST_CASE(testNotDirectoryEmpty) { BOOST_CHECK(!FileUtils::isDirectory(L"")); } BOOST_AUTO_TEST_CASE(testListDirectory) { String fileDir(FileUtils::joinPath(getTestDir(), L"testdirectory")); HashSet list(HashSet::newInstance()); BOOST_CHECK(FileUtils::listDirectory(fileDir, false, list)); Collection expectedList(Collection::newInstance(list.begin(), list.end())); std::sort(expectedList.begin(), expectedList.end()); BOOST_CHECK_EQUAL(expectedList.size(), 6); BOOST_CHECK_EQUAL(expectedList[0], L"subdirectory"); BOOST_CHECK_EQUAL(expectedList[1], L"testfile1.txt"); BOOST_CHECK_EQUAL(expectedList[2], L"testfile2.txt"); BOOST_CHECK_EQUAL(expectedList[3], L"testfile3.txt"); BOOST_CHECK_EQUAL(expectedList[4], L"testfilesize1.txt"); BOOST_CHECK_EQUAL(expectedList[5], L"testfilesize2.txt"); } BOOST_AUTO_TEST_CASE(testListDirectoryFiles) { String fileDir(FileUtils::joinPath(getTestDir(), L"testdirectory")); HashSet list(HashSet::newInstance()); BOOST_CHECK(FileUtils::listDirectory(fileDir, true, list)); Collection expectedList(Collection::newInstance(list.begin(), list.end())); std::sort(expectedList.begin(), expectedList.end()); BOOST_CHECK_EQUAL(expectedList.size(), 5); BOOST_CHECK_EQUAL(expectedList[0], L"testfile1.txt"); BOOST_CHECK_EQUAL(expectedList[1], L"testfile2.txt"); BOOST_CHECK_EQUAL(expectedList[2], L"testfile3.txt"); BOOST_CHECK_EQUAL(expectedList[3], L"testfilesize1.txt"); BOOST_CHECK_EQUAL(expectedList[4], L"testfilesize2.txt"); } BOOST_AUTO_TEST_CASE(testJoinPath) { #if defined(_WIN32) || defined(_WIN64) BOOST_CHECK_EQUAL(FileUtils::joinPath(L"c:\\test", L"\\testfile.txt"), L"c:\\test\\testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"c:\\test", L"testfile.txt"), L"c:\\test\\testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"", L"testfile.txt"), L"testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"c:\\test", L""), L"c:\\test"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"\\test", L"\\testfile.txt"), L"\\test\\testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"\\test", L"testfile.txt"), L"\\test\\testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"", L"testfile.txt"), L"testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"\\test", L""), L"\\test"); #else BOOST_CHECK_EQUAL(FileUtils::joinPath(L"/test", L"/testfile.txt"), L"/test/testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"/test", L"testfile.txt"), L"/test/testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"", L"testfile.txt"), L"testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::joinPath(L"/test", L""), L"/test"); #endif } BOOST_AUTO_TEST_CASE(testExtractPath) { #if defined(_WIN32) || defined(_WIN64) BOOST_CHECK_EQUAL(FileUtils::extractPath(L"c:\\test"), L"c:\\"); BOOST_CHECK_EQUAL(FileUtils::extractPath(L"c:\\test\\testfile.txt"), L"c:\\test"); BOOST_CHECK_EQUAL(FileUtils::extractPath(L""), L""); BOOST_CHECK_EQUAL(FileUtils::extractPath(L"\\test"), L"\\"); BOOST_CHECK_EQUAL(FileUtils::extractPath(L"\\test\\testfile.txt"), L"\\test"); #else BOOST_CHECK_EQUAL(FileUtils::extractPath(L"/test"), L"/"); BOOST_CHECK_EQUAL(FileUtils::extractPath(L"/test/testfile.txt"), L"/test"); BOOST_CHECK_EQUAL(FileUtils::extractPath(L""), L""); #endif } BOOST_AUTO_TEST_CASE(testExtractFile) { #if defined(_WIN32) || defined(_WIN64) BOOST_CHECK_EQUAL(FileUtils::extractFile(L"c:\\test"), L"test"); BOOST_CHECK_EQUAL(FileUtils::extractFile(L"c:\\test\\testfile.txt"), L"testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::extractFile(L""), L""); BOOST_CHECK_EQUAL(FileUtils::extractFile(L"\\test"), L"test"); BOOST_CHECK_EQUAL(FileUtils::extractFile(L"\\test\\testfile.txt"), L"testfile.txt"); #else BOOST_CHECK_EQUAL(FileUtils::extractFile(L"/test"), L"test"); BOOST_CHECK_EQUAL(FileUtils::extractFile(L"/test/testfile.txt"), L"testfile.txt"); BOOST_CHECK_EQUAL(FileUtils::extractFile(L""), L""); #endif } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/InputStreamReaderTest.cpp000066400000000000000000000100701217574114600247760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FileReader.h" #include "InputStreamReader.h" #include "BufferedReader.h" #include "FileUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(InputStreamReaderTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testInputStreamReaderChar) { InputStreamReaderPtr stream = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt"))); BOOST_CHECK_EQUAL((wchar_t)stream->read(), L't'); BOOST_CHECK_EQUAL((wchar_t)stream->read(), L'e'); BOOST_CHECK_EQUAL((wchar_t)stream->read(), L's'); BOOST_CHECK_EQUAL((wchar_t)stream->read(), L't'); BOOST_CHECK_EQUAL((wchar_t)stream->read(), L' '); BOOST_CHECK_EQUAL((wchar_t)stream->read(), L'f'); BOOST_CHECK_EQUAL((wchar_t)stream->read(), L'i'); BOOST_CHECK_EQUAL((wchar_t)stream->read(), L'l'); BOOST_CHECK_EQUAL((wchar_t)stream->read(), L'e'); } BOOST_AUTO_TEST_CASE(testInputStreamReaderCharUtf8) { InputStreamReaderPtr stream = newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_uft8.txt"))); const uint8_t chinese[] = {0xe4, 0xb8, 0xad, 0xe5, 0x8d, 0x8e, 0xe4, 0xba, 0xba, 0xe6, 0xb0, 0x91, 0xe5, 0x85, 0xb1, 0xe5, 0x92, 0x8c, 0xe5, 0x9b, 0xbd}; String expectedChinese(UTF8_TO_STRING(chinese)); BOOST_CHECK_EQUAL((wchar_t)stream->read(), expectedChinese[0]); BOOST_CHECK_EQUAL((wchar_t)stream->read(), expectedChinese[1]); BOOST_CHECK_EQUAL((wchar_t)stream->read(), expectedChinese[2]); BOOST_CHECK_EQUAL((wchar_t)stream->read(), expectedChinese[3]); BOOST_CHECK_EQUAL((wchar_t)stream->read(), expectedChinese[4]); BOOST_CHECK_EQUAL((wchar_t)stream->read(), expectedChinese[5]); BOOST_CHECK_EQUAL((wchar_t)stream->read(), expectedChinese[6]); } BOOST_AUTO_TEST_CASE(testInputStreamReaderReadLine) { BufferedReaderPtr reader = newLucene(newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_text.txt")))); Collection readLines = Collection::newInstance(); String line; while (reader->readLine(line)) readLines.add(line); BOOST_CHECK_EQUAL(reader->read(), FileReader::FILE_EOF); BOOST_CHECK_EQUAL(readLines.size(), 6); BOOST_CHECK_EQUAL(readLines[0], L"test file"); BOOST_CHECK_EQUAL(readLines[1], L"that contains"); BOOST_CHECK_EQUAL(readLines[2], L"multiple lines of text"); BOOST_CHECK_EQUAL(readLines[3], L""); BOOST_CHECK_EQUAL(readLines[4], L""); BOOST_CHECK_EQUAL(readLines[5], L"1 2 3 4"); } BOOST_AUTO_TEST_CASE(testInputStreamReaderReadLineUtf8) { BufferedReaderPtr reader = newLucene(newLucene(newLucene(FileUtils::joinPath(getTestDir(), L"testfile_uft8.txt")))); Collection readLines = Collection::newInstance(); String line; while (reader->readLine(line)) readLines.add(line); const uint8_t chinese[] = {0xe4, 0xb8, 0xad, 0xe5, 0x8d, 0x8e, 0xe4, 0xba, 0xba, 0xe6, 0xb0, 0x91, 0xe5, 0x85, 0xb1, 0xe5, 0x92, 0x8c, 0xe5, 0x9b, 0xbd}; const uint8_t persian[] = {0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87}; const uint8_t russian[] = {0xd0, 0xb0, 0xd0, 0xb1, 0xd0, 0xb8, 0xd1, 0x81, 0xd1, 0x81, 0xd0, 0xb8, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x8e}; BOOST_CHECK_EQUAL(readLines.size(), 80); BOOST_CHECK_EQUAL(readLines[0], UTF8_TO_STRING(chinese)); BOOST_CHECK_EQUAL(readLines[1], UTF8_TO_STRING(persian)); BOOST_CHECK_EQUAL(readLines[2], UTF8_TO_STRING(russian)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/LuceneGlobalFixture.cpp000066400000000000000000000015711217574114600244510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneGlobalFixture.h" #include "TestUtils.h" #include "TestPoint.h" #include "FileUtils.h" namespace Lucene { LuceneGlobalFixture::LuceneGlobalFixture() { FileUtils::removeDirectory(getTempDir()); FileUtils::createDirectory(getTempDir()); TestPoint::enableTestPoints(); } LuceneGlobalFixture::~LuceneGlobalFixture() { FileUtils::removeDirectory(getTempDir()); Lucene::CycleCheck::dumpRefs(); } BOOST_GLOBAL_FIXTURE(LuceneGlobalFixture); } LucenePlusPlus-rel_3.0.4/src/test/util/LuceneTestFixture.cpp000066400000000000000000000020701217574114600241630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "ConcurrentMergeScheduler.h" #include "DateTools.h" namespace Lucene { LuceneTestFixture::LuceneTestFixture() { DateTools::setDateOrder(DateTools::DATEORDER_LOCALE); ConcurrentMergeScheduler::setTestMode(); } LuceneTestFixture::~LuceneTestFixture() { DateTools::setDateOrder(DateTools::DATEORDER_LOCALE); if (ConcurrentMergeScheduler::anyUnhandledExceptions()) { // Clear the failure so that we don't just keep failing subsequent test cases ConcurrentMergeScheduler::clearUnhandledExceptions(); BOOST_FAIL("ConcurrentMergeScheduler hit unhandled exceptions"); } } } LucenePlusPlus-rel_3.0.4/src/test/util/NumericUtilsTest.cpp000066400000000000000000000511561217574114600240350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "NumericUtils.h" #include "OpenBitSet.h" #include "Random.h" #include "MiscUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(NumericUtilsTest, LuceneTestFixture) class CheckLongRangeBuilder : public LongRangeBuilder { public: CheckLongRangeBuilder(int64_t lower, int64_t upper, OpenBitSetPtr bits, Collection::iterator neededBoundsFirst, Collection::iterator neededBoundsLast, Collection::iterator neededShiftsFirst, Collection::iterator neededShiftsLast) { this->lower = lower; this->upper = upper; this->bits = bits; this->neededBoundsFirst = neededBoundsFirst; this->neededBoundsLast = neededBoundsLast; this->neededShiftsFirst = neededShiftsFirst; this->neededShiftsLast = neededShiftsLast; } virtual ~CheckLongRangeBuilder() { } protected: int64_t lower; int64_t upper; OpenBitSetPtr bits; Collection::iterator neededBoundsFirst; Collection::iterator neededBoundsLast; Collection::iterator neededShiftsFirst; Collection::iterator neededShiftsLast; public: virtual void addRange(int64_t min, int64_t max, int32_t shift) { BOOST_CHECK(min >= lower && min <= upper && max >= lower && max <= upper); if (bits) { for (int64_t l = min; l <= max; ++l) { if (bits->getAndSet((int32_t)(l - lower))) BOOST_FAIL("getAndSet failure"); // extra exit condition to prevent overflow on MAX_VALUE if (l == max) break; } } if (neededBoundsFirst == neededBoundsLast || neededShiftsFirst == neededShiftsLast) return; // make unsigned longs for easier display and understanding min ^= 0x8000000000000000LL; max ^= 0x8000000000000000LL; BOOST_CHECK_EQUAL(*neededShiftsFirst++, shift); BOOST_CHECK_EQUAL(*neededBoundsFirst++, MiscUtils::unsignedShift(min, (int64_t)shift)); // inner min bound BOOST_CHECK_EQUAL(*neededBoundsFirst++, MiscUtils::unsignedShift(max, (int64_t)shift)); // inner max bound } }; static void checkLongRangeSplit(int64_t lower, int64_t upper, int32_t precisionStep, bool useBitSet, Collection neededBounds, Collection neededShifts) { OpenBitSetPtr bits = useBitSet ? newLucene((int32_t)(upper - lower + 1)) : OpenBitSetPtr(); NumericUtils::splitLongRange(newLucene(lower, upper, bits, neededBounds.begin(), neededBounds.end(), neededShifts.begin(), neededShifts.end()), precisionStep, lower, upper); if (useBitSet) { // after flipping all bits in the range, the cardinality should be zero bits->flip(0, (int32_t)(upper - lower + 1)); BOOST_CHECK(bits->isEmpty()); } } class CheckIntRangeBuilder : public IntRangeBuilder { public: CheckIntRangeBuilder(int32_t lower, int32_t upper, OpenBitSetPtr bits, Collection::iterator neededBoundsFirst, Collection::iterator neededBoundsLast, Collection::iterator neededShiftsFirst, Collection::iterator neededShiftsLast) { this->lower = lower; this->upper = upper; this->bits = bits; this->neededBoundsFirst = neededBoundsFirst; this->neededBoundsLast = neededBoundsLast; this->neededShiftsFirst = neededShiftsFirst; this->neededShiftsLast = neededShiftsLast; } virtual ~CheckIntRangeBuilder() { } protected: int32_t lower; int32_t upper; OpenBitSetPtr bits; Collection::iterator neededBoundsFirst; Collection::iterator neededBoundsLast; Collection::iterator neededShiftsFirst; Collection::iterator neededShiftsLast; public: virtual void addRange(int32_t min, int32_t max, int32_t shift) { BOOST_CHECK(min >= lower && min <= upper && max >= lower && max <= upper); if (bits) { for (int32_t l = min; l <= max; ++l) { if (bits->getAndSet((int32_t)(l - lower))) BOOST_FAIL("getAndSet failure"); // extra exit condition to prevent overflow on MAX_VALUE if (l == max) break; } } if (neededBoundsFirst == neededBoundsLast || neededShiftsFirst == neededShiftsLast) return; // make unsigned longs for easier display and understanding min ^= 0x80000000; max ^= 0x80000000; BOOST_CHECK_EQUAL(*neededShiftsFirst++, shift); BOOST_CHECK_EQUAL(*neededBoundsFirst++, MiscUtils::unsignedShift(min, shift)); // inner min bound BOOST_CHECK_EQUAL(*neededBoundsFirst++, MiscUtils::unsignedShift(max, shift)); // inner max bound } }; static void checkIntRangeSplit(int32_t lower, int32_t upper, int32_t precisionStep, bool useBitSet, Collection neededBounds, Collection neededShifts) { OpenBitSetPtr bits = useBitSet ? newLucene((int32_t)(upper - lower + 1)) : OpenBitSetPtr(); NumericUtils::splitIntRange(newLucene(lower, upper, bits, neededBounds.begin(), neededBounds.end(), neededShifts.begin(), neededShifts.end()), precisionStep, lower, upper); if (useBitSet) { // after flipping all bits in the range, the cardinality should be zero bits->flip(0, (int32_t)(upper - lower + 1)); BOOST_CHECK(bits->isEmpty()); } } BOOST_AUTO_TEST_CASE(testLongConversionAndOrdering) { // generate a series of encoded longs, each numerical one bigger than the one before String last; for (int64_t l = -100000; l < 100000; ++l) { String act = NumericUtils::longToPrefixCoded(l); if (!last.empty()) { // test if smaller if (last.compare(act) >= 0) BOOST_FAIL("compare failure"); } // test is back and forward conversion works BOOST_CHECK_EQUAL(l, NumericUtils::prefixCodedToLong(act)); // next step last = act; } } BOOST_AUTO_TEST_CASE(testIntConversionAndOrdering) { // generate a series of encoded ints, each numerical one bigger than the one before String last; for (int32_t l = -100000; l < 100000; ++l) { String act = NumericUtils::intToPrefixCoded(l); if (!last.empty()) { // test if smaller if (last.compare(act) >= 0) BOOST_FAIL("compare failure"); } // test is back and forward conversion works BOOST_CHECK_EQUAL(l, NumericUtils::prefixCodedToInt(act)); // next step last = act; } } BOOST_AUTO_TEST_CASE(testLongSpecialValues) { static const int64_t vals[] = {LLONG_MIN, LLONG_MIN + 1, LLONG_MIN + 2, -5003400000000LL, -4000LL, -3000LL, -2000LL, -1000LL, -1LL, 0LL, 1LL, 10LL, 300LL, 50006789999999999LL, LLONG_MAX - 2, LLONG_MAX - 1, LLONG_MAX}; int32_t length = SIZEOF_ARRAY(vals); Collection prefixVals = Collection::newInstance(length); for (int32_t i = 0; i < length; ++i) { prefixVals[i] = NumericUtils::longToPrefixCoded(vals[i]); // check forward and back conversion BOOST_CHECK_EQUAL(vals[i], NumericUtils::prefixCodedToLong(prefixVals[i])); // test if decoding values as long fails correctly BOOST_CHECK_EXCEPTION(NumericUtils::prefixCodedToInt(prefixVals[i]), NumberFormatException, check_exception(LuceneException::NumberFormat)); } // check sort order (prefixVals should be ascending) for (int32_t i = 1; i < prefixVals.size(); ++i) BOOST_CHECK(prefixVals[i - 1].compare(prefixVals[i]) < 0); // check the prefix encoding, lower precision should have the difference to original // value equal to the lower removed bits for (int32_t i = 0; i < length; ++i) { for (int32_t j = 0; j < 32; ++j) { int64_t prefixVal = NumericUtils::prefixCodedToLong(NumericUtils::longToPrefixCoded(vals[i], j)); int64_t mask = ((int64_t)1 << j) - 1; BOOST_CHECK_EQUAL(vals[i] & mask, vals[i] - prefixVal); } } } BOOST_AUTO_TEST_CASE(testIntSpecialValues) { static const int32_t vals[] = {INT_MIN, INT_MIN + 1, INT_MIN + 2, -64765767, -4000, -3000, -2000, -1000, -1, 0, 1, 10, 300, 765878989, INT_MAX - 2, INT_MAX- 1, INT_MAX}; int32_t length = SIZEOF_ARRAY(vals); Collection prefixVals = Collection::newInstance(length); for (int32_t i = 0; i < length; ++i) { prefixVals[i] = NumericUtils::intToPrefixCoded(vals[i]); // check forward and back conversion BOOST_CHECK_EQUAL(vals[i], NumericUtils::prefixCodedToInt(prefixVals[i])); // test if decoding values as long fails correctly BOOST_CHECK_EXCEPTION(NumericUtils::prefixCodedToLong(prefixVals[i]), NumberFormatException, check_exception(LuceneException::NumberFormat)); } // check sort order (prefixVals should be ascending) for (int32_t i = 1; i < prefixVals.size(); ++i) BOOST_CHECK(prefixVals[i - 1].compare(prefixVals[i]) < 0); // check the prefix encoding, lower precision should have the difference to original // value equal to the lower removed bits for (int32_t i = 0; i < length; ++i) { for (int32_t j = 0; j < 32; ++j) { int32_t prefixVal = NumericUtils::prefixCodedToInt(NumericUtils::intToPrefixCoded(vals[i], j)); int32_t mask = ((int32_t)1 << j) - 1; BOOST_CHECK_EQUAL(vals[i] & mask, vals[i] - prefixVal); } } } BOOST_AUTO_TEST_CASE(testDoubles) { static const double vals[] = {-std::numeric_limits::infinity(), -2.3E25, -1.0E15, -1.0, -1.0E-1, -1.0E-2, -0.0, +0.0, 1.0E-2, 1.0E-1, 1.0, 1.0E15, 2.3E25, std::numeric_limits::infinity()}; int32_t length = SIZEOF_ARRAY(vals); Collection longVals = Collection::newInstance(length); // check forward and back conversion for (int32_t i = 0; i < length; ++i) { longVals[i] = NumericUtils::doubleToSortableLong(vals[i]); BOOST_CHECK_EQUAL(vals[i], NumericUtils::sortableLongToDouble(longVals[i])); } // check sort order (longVals should be ascending) for (int32_t i = 1; i < longVals.size(); ++i) BOOST_CHECK(longVals[i - 1] < longVals[i]); } /// NumericRangeQuery errors with endpoints near long min and max values BOOST_AUTO_TEST_CASE(testLongExtremeValues) { // upper end extremes checkLongRangeSplit(LLONG_MAX, LLONG_MAX, 1, true, newCollection(0xffffffffffffffffLL, 0xffffffffffffffffLL), newCollection(0) ); checkLongRangeSplit(LLONG_MAX, LLONG_MAX, 2, true, newCollection(0xffffffffffffffffLL, 0xffffffffffffffffLL), newCollection(0) ); checkLongRangeSplit(LLONG_MAX, LLONG_MAX, 4, true, newCollection(0xffffffffffffffffLL, 0xffffffffffffffffLL), newCollection(0) ); checkLongRangeSplit(LLONG_MAX, LLONG_MAX, 6, true, newCollection(0xffffffffffffffffLL, 0xffffffffffffffffLL), newCollection(0) ); checkLongRangeSplit(LLONG_MAX, LLONG_MAX, 8, true, newCollection(0xffffffffffffffffLL ,0xffffffffffffffffLL), newCollection(0) ); checkLongRangeSplit(LLONG_MAX, LLONG_MAX, 64, true, newCollection(0xffffffffffffffffLL, 0xffffffffffffffffLL), newCollection(0) ); checkLongRangeSplit(LLONG_MAX - 0xfLL, LLONG_MAX, 4, true, newCollection(0xfffffffffffffffLL, 0xfffffffffffffffLL), newCollection(4) ); checkLongRangeSplit(LLONG_MAX - 0x10LL, LLONG_MAX, 4, true, newCollection(0xffffffffffffffefLL, 0xffffffffffffffefLL, 0xfffffffffffffffLL, 0xfffffffffffffffLL), newCollection(0, 4) ); // lower end extremes checkLongRangeSplit(LLONG_MIN, LLONG_MIN, 1, true, newCollection(0x0000000000000000LL,0x0000000000000000LL), newCollection(0) ); checkLongRangeSplit(LLONG_MIN, LLONG_MIN, 2, true, newCollection(0x0000000000000000LL, 0x0000000000000000LL), newCollection(0) ); checkLongRangeSplit(LLONG_MIN, LLONG_MIN, 4, true, newCollection(0x0000000000000000LL, 0x0000000000000000LL), newCollection(0) ); checkLongRangeSplit(LLONG_MIN, LLONG_MIN, 6, true, newCollection(0x0000000000000000LL, 0x0000000000000000LL), newCollection(0) ); checkLongRangeSplit(LLONG_MIN, LLONG_MIN, 8, true, newCollection(0x0000000000000000LL, 0x0000000000000000LL), newCollection(0) ); checkLongRangeSplit(LLONG_MIN, LLONG_MIN, 64, true, newCollection(0x0000000000000000LL, 0x0000000000000000LL), newCollection(0) ); checkLongRangeSplit(LLONG_MIN, LLONG_MIN + 0xfLL, 4, true, newCollection(0x000000000000000LL, 0x000000000000000LL), newCollection(4) ); checkLongRangeSplit(LLONG_MIN, LLONG_MIN + 0x10LL, 4, true, newCollection(0x0000000000000010LL, 0x0000000000000010LL, 0x000000000000000LL, 0x000000000000000LL), newCollection(0, 4) ); } static int64_t randomLong(RandomPtr random) { int64_t val; switch (random->nextInt(4)) { case 0: val = 1LL << (int64_t)random->nextInt(63); // patterns like 0x000000100000 (-1 yields patterns like 0x0000fff) break; case 1: val = -1LL << (int64_t)random->nextInt(63); // patterns like 0xfffff00000 break; default: val = (int64_t)random->nextInt(); } val += random->nextInt(5) - 2; if (random->nextInt() % 2 == 1) { if (random->nextInt() % 2 == 1) val += random->nextInt(100) - 50; if (random->nextInt() % 2 == 1) val = ~val; if (random->nextInt() % 2 == 1) val = val << 1; if (random->nextInt() % 2 == 1) val = MiscUtils::unsignedShift(val, (int64_t)1); } return val; } static void executeOneRandomSplit(RandomPtr random) { int64_t lower = randomLong(random); int64_t len = (int64_t)random->nextInt(16384 * 1024); // not too large bitsets, else OOME! while (lower + len < lower) // overflow lower >>= 1; checkLongRangeSplit(lower, lower + len, random->nextInt(64) + 1, true, Collection::newInstance(), Collection::newInstance()); } BOOST_AUTO_TEST_CASE(testRandomSplit) { RandomPtr random = newLucene(123); for (int32_t i = 0; i < 100; ++i) executeOneRandomSplit(random); } BOOST_AUTO_TEST_CASE(testSplitLongRange) { Collection neededBounds = Collection::newInstance(14); neededBounds[0] = 0x7fffffffffffec78LL; neededBounds[1] = 0x7fffffffffffec7fLL; neededBounds[2] = 0x8000000000002510LL; neededBounds[3] = 0x800000000000251cLL; neededBounds[4] = 0x7fffffffffffec8LL; neededBounds[5] = 0x7fffffffffffecfLL; neededBounds[6] = 0x800000000000250LL; neededBounds[7] = 0x800000000000250LL; neededBounds[8] = 0x7fffffffffffedLL; neededBounds[9] = 0x7fffffffffffefLL; neededBounds[10] = 0x80000000000020LL; neededBounds[11] = 0x80000000000024LL; neededBounds[12] = 0x7ffffffffffffLL; neededBounds[13] = 0x8000000000001LL; // a hard-coded "standard" range checkLongRangeSplit(-5000, 9500, 4, true, neededBounds, newCollection(0, 0, 4, 4, 8, 8, 12)); // the same with no range splitting checkLongRangeSplit(-5000, 9500, 64, true, newCollection(0x7fffffffffffec78LL, 0x800000000000251cLL), newCollection(0) ); // this tests optimized range splitting, if one of the inner bounds // is also the bound of the next lower precision, it should be used completely checkLongRangeSplit(0, 1024 + 63, 4, true, newCollection(0x800000000000040LL, 0x800000000000043LL, 0x80000000000000LL, 0x80000000000003LL), newCollection(4, 8) ); // the full long range should only consist of a lowest precision range; // no bitset testing here, as too much memory needed checkLongRangeSplit(LLONG_MIN, LLONG_MAX, 8, false, newCollection(0x00LL, 0xffLL), newCollection(56) ); // the same with precisionStep=4 checkLongRangeSplit(LLONG_MIN, LLONG_MAX, 4, false, newCollection(0x00LL, 0xfLL), newCollection(60) ); // the same with precisionStep=2 checkLongRangeSplit(LLONG_MIN, LLONG_MAX, 2, false, newCollection(0x00LL, 0x3LL), newCollection(62) ); // the same with precisionStep=1 checkLongRangeSplit(LLONG_MIN, LLONG_MAX, 1, false, newCollection(0x00LL, 0x1LL), newCollection(63) ); // a inverse range should produce no sub-ranges checkLongRangeSplit(9500, -5000, 4, false, Collection::newInstance(), Collection::newInstance()); // a 0-length range should reproduce the range itself checkLongRangeSplit(9500, 9500, 4, false, newCollection(0x800000000000251cLL, 0x800000000000251cLL), newCollection(0) ); } BOOST_AUTO_TEST_CASE(testSplitIntRange) { Collection neededBounds = Collection::newInstance(14); neededBounds[0] = 0x7fffec78; neededBounds[1] = 0x7fffec7f; neededBounds[2] = 0x80002510; neededBounds[3] = 0x8000251c; neededBounds[4] = 0x7fffec8; neededBounds[5] = 0x7fffecf; neededBounds[6] = 0x8000250; neededBounds[7] = 0x8000250; neededBounds[8] = 0x7fffed; neededBounds[9] = 0x7fffef; neededBounds[10] = 0x800020; neededBounds[11] = 0x800024; neededBounds[12] = 0x7ffff; neededBounds[13] = 0x80001; // a hard-coded "standard" range checkIntRangeSplit(-5000, 9500, 4, true, neededBounds, newCollection(0, 0, 4, 4, 8, 8, 12)); // the same with no range splitting checkIntRangeSplit(-5000, 9500, 32, true, newCollection(0x7fffec78, 0x8000251c), newCollection(0) ); // this tests optimized range splitting, if one of the inner bounds // is also the bound of the next lower precision, it should be used completely checkIntRangeSplit(0, 1024 + 63, 4, true, newCollection(0x8000040, 0x8000043, 0x800000, 0x800003), newCollection(4, 8) ); // the full int range should only consist of a lowest precision range; // no bitset testing here, as too much memory needed checkIntRangeSplit(INT_MIN, INT_MAX, 8, false, newCollection(0x00, 0xff), newCollection(24) ); // the same with precisionStep=4 checkIntRangeSplit(INT_MIN, INT_MAX, 4, false, newCollection(0x00, 0xf), newCollection(28) ); // the same with precisionStep=2 checkIntRangeSplit(INT_MIN, INT_MAX, 2, false, newCollection(0x00, 0x3), newCollection(30) ); // the same with precisionStep=1 checkIntRangeSplit(INT_MIN, INT_MAX, 1, false, newCollection(0x00, 0x1), newCollection(31) ); // a inverse range should produce no sub-ranges checkIntRangeSplit(9500, -5000, 4, false, Collection::newInstance(), Collection::newInstance()); // a 0-length range should reproduce the range itself checkIntRangeSplit(9500, 9500, 4, false, newCollection(0x8000251c, 0x8000251c), newCollection(0) ); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/OpenBitSetTest.cpp000066400000000000000000000206701217574114600234230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "OpenBitSet.h" #include "OpenBitSetIterator.h" #include "BitSet.h" #include "BitUtil.h" #include "Random.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(OpenBitSetTest, LuceneTestFixture) static RandomPtr randBitSet = newLucene(123); static void doGet(BitSetPtr a, OpenBitSetPtr b) { int32_t max = a->size(); for (int32_t i = 0; i < max; ++i) BOOST_CHECK_EQUAL(a->get(i), b->get(i)); } static void doNextSetBit(BitSetPtr a, OpenBitSetPtr b) { int32_t aa = -1; int32_t bb = -1; do { aa = a->nextSetBit(aa + 1); bb = b->nextSetBit(bb + 1); BOOST_CHECK_EQUAL(aa, bb); } while (aa >= 0); } static void doIterate1(BitSetPtr a, OpenBitSetPtr b) { int32_t aa = -1; int32_t bb = -1; OpenBitSetIteratorPtr iterator = newLucene(b); do { aa = a->nextSetBit(aa + 1); bb = (randBitSet->nextInt() % 2 == 0) ? iterator->nextDoc() : iterator->advance(bb + 1); BOOST_CHECK_EQUAL(aa == -1 ? DocIdSetIterator::NO_MORE_DOCS : aa, bb); } while (aa >= 0); } static void doIterate2(BitSetPtr a, OpenBitSetPtr b) { int32_t aa = -1; int32_t bb = -1; OpenBitSetIteratorPtr iterator = newLucene(b); do { aa = a->nextSetBit(aa + 1); bb = (randBitSet->nextInt() % 2 == 0) ? iterator->nextDoc() : iterator->advance(bb + 1); BOOST_CHECK_EQUAL(aa == -1 ? DocIdSetIterator::NO_MORE_DOCS : aa, bb); } while (aa >= 0); } static void doIterate(BitSetPtr a, OpenBitSetPtr b, int32_t mode) { if (mode == 1) doIterate1(a, b); else if (mode == 2) doIterate2(a, b); } static void doRandomSets(int32_t maxSize, int32_t iter, int32_t mode) { BitSetPtr a0; OpenBitSetPtr b0; for (int32_t i = 0; i < iter; ++i) { int32_t sz = randBitSet->nextInt(maxSize); BitSetPtr a = newLucene(sz); OpenBitSetPtr b = newLucene(sz); // test the various ways of setting bits if (sz > 0) { int32_t nOper = randBitSet->nextInt(sz); for (int32_t j = 0; j < nOper; ++j) { int32_t idx = randBitSet->nextInt(sz); a->set(idx); b->fastSet(idx); idx = randBitSet->nextInt(sz); a->clear(idx); b->fastClear(idx); idx = randBitSet->nextInt(sz); a->flip(idx); b->fastFlip(idx); bool val = b->flipAndGet(idx); bool val2 = b->flipAndGet(idx); BOOST_CHECK_NE(val, val2); val = b->getAndSet(idx); BOOST_CHECK_EQUAL(val2, val); BOOST_CHECK(b->get(idx)); if (!val) b->fastClear(idx); BOOST_CHECK_EQUAL(b->get(idx), val); } } // test that the various ways of accessing the bits are equivalent doGet(a, b); // test ranges, including possible extension int32_t fromIndex = randBitSet->nextInt(sz + 80); int32_t toIndex = fromIndex + randBitSet->nextInt((sz >> 1) + 1); BitSetPtr aa = boost::dynamic_pointer_cast(a->clone()); aa->flip(fromIndex, toIndex); OpenBitSetPtr bb = boost::dynamic_pointer_cast(b->clone()); bb->flip(fromIndex, toIndex); doIterate(aa, bb, mode); // a problem here is from flip or doIterate fromIndex = randBitSet->nextInt(sz + 80); toIndex = fromIndex + randBitSet->nextInt((sz >> 1) + 1); aa = boost::dynamic_pointer_cast(a->clone()); aa->clear(fromIndex, toIndex); bb = boost::dynamic_pointer_cast(b->clone()); bb->clear(fromIndex, toIndex); doNextSetBit(aa, bb); // a problem here is from clear() or nextSetBit fromIndex = randBitSet->nextInt(sz + 80); toIndex = fromIndex + randBitSet->nextInt((sz >> 1) + 1); aa = boost::dynamic_pointer_cast(a->clone()); aa->set((uint32_t)fromIndex, (uint32_t)toIndex); bb = boost::dynamic_pointer_cast(b->clone()); bb->set(fromIndex, toIndex); doNextSetBit(aa, bb); // a problem here is from set() or nextSetBit if (a0) { BOOST_CHECK_EQUAL(a->equals(a0), b->equals(b0)); BOOST_CHECK_EQUAL(a->cardinality(), b->cardinality()); BitSetPtr a_and = boost::dynamic_pointer_cast(a->clone()); a_and->_and(a0); BitSetPtr a_or = boost::dynamic_pointer_cast(a->clone()); a_or->_or(a0); BitSetPtr a_xor = boost::dynamic_pointer_cast(a->clone()); a_xor->_xor(a0); BitSetPtr a_andn = boost::dynamic_pointer_cast(a->clone()); a_andn->andNot(a0); OpenBitSetPtr b_and = boost::dynamic_pointer_cast(b->clone()); BOOST_CHECK(b->equals(b_and)); b_and->_and(b0); OpenBitSetPtr b_or = boost::dynamic_pointer_cast(b->clone()); b_or->_or(b0); OpenBitSetPtr b_xor = boost::dynamic_pointer_cast(b->clone()); b_xor->_xor(b0); OpenBitSetPtr b_andn = boost::dynamic_pointer_cast(b->clone()); b_andn->andNot(b0); doIterate(a_and, b_and, mode); doIterate(a_or, b_or, mode); doIterate(a_xor, b_xor, mode); doIterate(a_andn, b_andn, mode); BOOST_CHECK_EQUAL(a_and->cardinality(), b_and->cardinality()); BOOST_CHECK_EQUAL(a_or->cardinality(), b_or->cardinality()); BOOST_CHECK_EQUAL(a_xor->cardinality(), b_xor->cardinality()); BOOST_CHECK_EQUAL(a_andn->cardinality(), b_andn->cardinality()); // test non-mutating popcounts BOOST_CHECK_EQUAL(b_and->cardinality(), OpenBitSet::intersectionCount(b, b0)); BOOST_CHECK_EQUAL(b_or->cardinality(), OpenBitSet::unionCount(b, b0)); BOOST_CHECK_EQUAL(b_xor->cardinality(), OpenBitSet::xorCount(b, b0)); BOOST_CHECK_EQUAL(b_andn->cardinality(), OpenBitSet::andNotCount(b, b0)); } a0=a; b0=b; } } BOOST_AUTO_TEST_CASE(testSmall) { randBitSet->setSeed(17); doRandomSets(1200, 1000, 1); doRandomSets(1200, 1000, 2); } /* BOOST_AUTO_TEST_CASE(testBig) { randBitSet->setSeed(17); doRandomSets(2000, 200000, 1); doRandomSets(2000, 200000, 2); } */ BOOST_AUTO_TEST_CASE(testEquals) { randBitSet->setSeed(17); OpenBitSetPtr b1 = newLucene(1111); OpenBitSetPtr b2 = newLucene(2222); BOOST_CHECK(b1->equals(b2)); BOOST_CHECK(b2->equals(b1)); b1->set(10); BOOST_CHECK(!b1->equals(b2)); BOOST_CHECK(!b2->equals(b1)); b2->set(10); BOOST_CHECK(b1->equals(b2)); BOOST_CHECK(b2->equals(b1)); b2->set(2221); BOOST_CHECK(!b1->equals(b2)); BOOST_CHECK(!b2->equals(b1)); b1->set(2221); BOOST_CHECK(b1->equals(b2)); BOOST_CHECK(b2->equals(b1)); } BOOST_AUTO_TEST_CASE(testBitUtils) { randBitSet->setSeed(17); int64_t num = 100000; BOOST_CHECK_EQUAL(5, BitUtil::ntz(num)); BOOST_CHECK_EQUAL(5, BitUtil::ntz2(num)); BOOST_CHECK_EQUAL(5, BitUtil::ntz3(num)); num = 10; BOOST_CHECK_EQUAL(1, BitUtil::ntz(num)); BOOST_CHECK_EQUAL(1, BitUtil::ntz2(num)); BOOST_CHECK_EQUAL(1, BitUtil::ntz3(num)); for (int32_t i = 0; i < 64; ++i) { num = (int64_t)1 << i; BOOST_CHECK_EQUAL(i, BitUtil::ntz(num)); BOOST_CHECK_EQUAL(i, BitUtil::ntz2(num)); BOOST_CHECK_EQUAL(i, BitUtil::ntz3(num)); } } BOOST_AUTO_TEST_CASE(testHashCodeEquals) { OpenBitSetPtr bs1 = newLucene(200); OpenBitSetPtr bs2 = newLucene(64); bs1->set(3); bs2->set(3); BOOST_CHECK(bs1->equals(bs2)); BOOST_CHECK_EQUAL(bs1->hashCode(), bs2->hashCode()); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/PriorityQueueTest.cpp000066400000000000000000000061631217574114600242360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "PriorityQueue.h" #include "Random.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(PriorityQueueTest, LuceneTestFixture) DECLARE_SHARED_PTR(IntegerQueue) class IntegerQueue : public PriorityQueue { public: IntegerQueue(int32_t maxSize) : PriorityQueue(maxSize) { } virtual ~IntegerQueue() { } }; DECLARE_SHARED_PTR(IntegerPtrQueue) typedef boost::shared_ptr IntPtr; class IntegerPtrQueue : public PriorityQueue { public: IntegerPtrQueue(int32_t maxSize) : PriorityQueue(maxSize) { } virtual ~IntegerPtrQueue() { } protected: virtual bool lessThan(const IntPtr& first, const IntPtr& second) { return (*first < *second); } }; BOOST_AUTO_TEST_CASE(testPriorityQueue) { IntegerQueuePtr testQueue = newLucene(10000); int64_t sum = 0; RandomPtr random = newLucene(); for (int32_t i = 0; i < 10000; ++i) { int32_t next = random->nextInt(); sum += next; testQueue->add(next); } int32_t last = INT_MIN; int64_t sum2 = 0; for (int32_t i = 0; i < 10000; ++i) { int32_t next = testQueue->pop(); BOOST_CHECK(next >= last); last = next; sum2 += last; } BOOST_CHECK_EQUAL(sum, sum2); } BOOST_AUTO_TEST_CASE(testPriorityQueueOverflow) { IntegerQueuePtr testQueue = newLucene(3); testQueue->addOverflow(2); testQueue->addOverflow(3); testQueue->addOverflow(1); testQueue->addOverflow(5); testQueue->addOverflow(7); testQueue->addOverflow(1); BOOST_CHECK_EQUAL(testQueue->size(), 3); BOOST_CHECK_EQUAL(3, testQueue->top()); } BOOST_AUTO_TEST_CASE(testPriorityQueueClear) { IntegerQueuePtr testQueue = newLucene(3); testQueue->add(2); testQueue->add(3); testQueue->add(1); BOOST_CHECK_EQUAL(testQueue->size(), 3); testQueue->clear(); BOOST_CHECK(testQueue->empty()); } BOOST_AUTO_TEST_CASE(testPriorityQueueUpdate) { IntegerPtrQueuePtr testQueue = newLucene(1024); testQueue->add(newInstance(2)); testQueue->add(newInstance(3)); testQueue->add(newInstance(1)); testQueue->add(newInstance(4)); testQueue->add(newInstance(5)); BOOST_CHECK_EQUAL(testQueue->size(), 5); IntPtr top = testQueue->top(); BOOST_CHECK_EQUAL(*top, 1); *top = 6; testQueue->updateTop(); BOOST_CHECK_EQUAL(*testQueue->pop(), 2); BOOST_CHECK_EQUAL(*testQueue->pop(), 3); BOOST_CHECK_EQUAL(*testQueue->pop(), 4); BOOST_CHECK_EQUAL(*testQueue->pop(), 5); BOOST_CHECK_EQUAL(*testQueue->pop(), 6); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/SimpleLRUCacheTest.cpp000066400000000000000000000126541217574114600241520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "SimpleLRUCache.h" #include "Term.h" using namespace Lucene; typedef SimpleLRUCache TestLRUSimpleCache; typedef SimpleLRUCache< TermPtr, int32_t, luceneHash, luceneEquals > TestLRUTermCache; BOOST_FIXTURE_TEST_SUITE(SimpleLRUCacheTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testCachePut) { TestLRUSimpleCache testCache(5); testCache.put(1, L"test 1"); testCache.put(2, L"test 2"); testCache.put(3, L"test 3"); testCache.put(4, L"test 4"); testCache.put(5, L"test 5"); testCache.put(6, L"test 6"); // this should pop off "1" because size = 5 BOOST_CHECK_EQUAL(testCache.size(), 5); int32_t expectedKey = 6; // lru = 6, 5, 4, 3, 2 for (TestLRUSimpleCache::const_iterator cache = testCache.begin(); cache != testCache.end(); ++cache) BOOST_CHECK_EQUAL(cache->first, expectedKey--); } BOOST_AUTO_TEST_CASE(testCacheGet) { TestLRUSimpleCache testCache(5); testCache.put(1, L"test 1"); testCache.put(2, L"test 2"); testCache.put(3, L"test 3"); testCache.put(4, L"test 4"); testCache.put(5, L"test 5"); BOOST_CHECK_EQUAL(testCache.size(), 5); BOOST_CHECK_EQUAL(testCache.get(2), L"test 2"); BOOST_CHECK_EQUAL(testCache.get(3), L"test 3"); } BOOST_AUTO_TEST_CASE(testCacheExists) { TestLRUSimpleCache testCache(5); testCache.put(1, L"test 1"); testCache.put(2, L"test 2"); testCache.put(3, L"test 3"); testCache.put(4, L"test 4"); testCache.put(5, L"test 5"); BOOST_CHECK(testCache.contains(1)); BOOST_CHECK(!testCache.contains(7)); } BOOST_AUTO_TEST_CASE(testCachePutGet) { TestLRUSimpleCache testCache(5); testCache.put(1, L"test 1"); testCache.put(2, L"test 2"); testCache.put(3, L"test 3"); testCache.put(4, L"test 4"); testCache.put(5, L"test 5"); BOOST_CHECK_EQUAL(testCache.size(), 5); BOOST_CHECK_EQUAL(testCache.get(2), L"test 2"); BOOST_CHECK_EQUAL(testCache.get(3), L"test 3"); testCache.put(6, L"test 6"); testCache.put(7, L"test 7"); testCache.put(8, L"test 8"); Collection expectedLRU = Collection::newInstance(); for (TestLRUSimpleCache::const_iterator cache = testCache.begin(); cache != testCache.end(); ++cache) expectedLRU.add(cache->first); BOOST_CHECK_EQUAL(expectedLRU.size(), 5); // lru = 8, 7, 6, 3, 2 BOOST_CHECK_EQUAL(expectedLRU[0], 8); BOOST_CHECK_EQUAL(expectedLRU[1], 7); BOOST_CHECK_EQUAL(expectedLRU[2], 6); BOOST_CHECK_EQUAL(expectedLRU[3], 3); BOOST_CHECK_EQUAL(expectedLRU[4], 2); } BOOST_AUTO_TEST_CASE(testRandomAccess) { const int32_t n = 100; TestLRUSimpleCache cache(n); String value = L"test"; for (int32_t i = 0; i < n; ++i) cache.put(i, value); // access every 2nd item in cache for (int32_t i = 0; i < n; i += 2) BOOST_CHECK_NE(cache.get(i), L""); // add n/2 elements to cache, the ones that weren't touched in the previous loop should now be thrown away for (int32_t i = n; i < n + (n / 2); ++i) cache.put(i, value); // access every 4th item in cache for (int32_t i = 0; i < n; i += 4) BOOST_CHECK_NE(cache.get(i), L""); // add 3/4n elements to cache, the ones that weren't touched in the previous loops should now be thrown away for (int32_t i = n; i < n + (n * 3 / 4); ++i) cache.put(i, value); // access every 4th item in cache for (int32_t i = 0; i < n; i += 4) BOOST_CHECK_NE(cache.get(i), L""); } BOOST_AUTO_TEST_CASE(testTermCache) { TestLRUTermCache testCache(5); testCache.put(newLucene(L"field1", L"text1"), 1); testCache.put(newLucene(L"field2", L"text2"), 2); testCache.put(newLucene(L"field3", L"text3"), 3); testCache.put(newLucene(L"field4", L"text4"), 4); testCache.put(newLucene(L"field5", L"text5"), 5); BOOST_CHECK_EQUAL(testCache.size(), 5); BOOST_CHECK_EQUAL(testCache.get(newLucene(L"field2", L"text2")), 2); BOOST_CHECK_EQUAL(testCache.get(newLucene(L"field3", L"text3")), 3); testCache.put(newLucene(L"field6", L"text6"), 6); testCache.put(newLucene(L"field7", L"text7"), 7); testCache.put(newLucene(L"field8", L"text8"), 8); Collection expectedLRU = Collection::newInstance(); for (TestLRUTermCache::const_iterator cache = testCache.begin(); cache != testCache.end(); ++cache) expectedLRU.add(cache->first); BOOST_CHECK_EQUAL(expectedLRU.size(), 5); // lru = field8, field7, field6, field3, field2 BOOST_CHECK(expectedLRU[0]->equals(newLucene(L"field8", L"text8"))); BOOST_CHECK(expectedLRU[1]->equals(newLucene(L"field7", L"text7"))); BOOST_CHECK(expectedLRU[2]->equals(newLucene(L"field6", L"text6"))); BOOST_CHECK(expectedLRU[3]->equals(newLucene(L"field3", L"text3"))); BOOST_CHECK(expectedLRU[4]->equals(newLucene(L"field2", L"text2"))); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/SortedVIntListTest.cpp000066400000000000000000000124551217574114600243060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "SortedVIntList.h" #include "BitSet.h" #include "DocIdSetIterator.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(SortedVIntListTest, LuceneTestFixture) static const int32_t VB1 = 0x7F; static const int32_t BIT_SHIFT = 7; static const int32_t VB2 = (VB1 << BIT_SHIFT) | VB1; static const int32_t VB3 = (VB2 << BIT_SHIFT) | VB1; static const int32_t VB4 = (VB3 << BIT_SHIFT) | VB1; static int32_t vIntByteSize(int32_t i) { BOOST_CHECK(i >= 0); if (i <= VB1) return 1; if (i <= VB2) return 2; if (i <= VB3) return 3; if (i <= VB4) return 4; return 5; } static int32_t vIntListByteSize(Collection ints) { int32_t byteSize = 0; int32_t last = 0; for (int32_t i = 0; i < ints.size(); ++i) { byteSize += vIntByteSize(ints[i] - last); last = ints[i]; } return byteSize; } static void tstIterator(SortedVIntListPtr vintList, Collection ints) { for (int32_t i = 0; i < ints.size(); ++i) { if ((i > 0) && (ints[i - 1] == ints[i])) return; // DocNrSkipper should not skip to same document. } DocIdSetIteratorPtr m = vintList->iterator(); for (int32_t i = 0; i < ints.size(); ++i) { BOOST_CHECK(m->nextDoc() != DocIdSetIterator::NO_MORE_DOCS); BOOST_CHECK_EQUAL(ints[i], m->docID()); } BOOST_CHECK_EQUAL(m->nextDoc(), DocIdSetIterator::NO_MORE_DOCS); } static void tstVIntList(SortedVIntListPtr vintList, Collection ints, int32_t expectedByteSize) { BOOST_CHECK_EQUAL(ints.size(), vintList->size()); BOOST_CHECK_EQUAL(expectedByteSize, vintList->getByteSize()); tstIterator(vintList, ints); } static void tstViaBitSet(Collection ints, int32_t expectedByteSize) { int32_t MAX_INT_FOR_BITSET = 1024 * 1024; BitSetPtr bs = newLucene(); for (int32_t i = 0; i < ints.size(); ++i) { if (ints[i] > MAX_INT_FOR_BITSET) return; // BitSet takes too much memory if ((i > 0) && (ints[i - 1] == ints[i])) return; // BitSet cannot store duplicate. bs->set(ints[i]); } SortedVIntListPtr svil = newLucene(bs); tstVIntList(svil, ints, expectedByteSize); tstVIntList(newLucene(svil->iterator()), ints, expectedByteSize); } static void tstInts(Collection ints) { int32_t expectedByteSize = vIntListByteSize(ints); tstVIntList(newLucene(ints), ints, expectedByteSize); tstViaBitSet(ints, expectedByteSize); } static Collection fibArray(int32_t a, int32_t b, int32_t size) { Collection fib = Collection::newInstance(size); fib[0] = a; fib[1] = b; for (int32_t i = 2; i < size; ++i) fib[i] = fib[i - 1] + fib[i - 2]; return fib; } /// reverse the order of the successive differences static Collection reverseDiffs(Collection ints) { Collection res = Collection::newInstance(ints.size()); for (int32_t i = 0; i < ints.size(); ++i) res[i] = ints[ints.size() - 1] + (ints[0] - ints[ints.size() - 1 - i]); return res; } static void tstIllegalArgExc(Collection ints) { BOOST_CHECK_EXCEPTION(newLucene(ints), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); } BOOST_AUTO_TEST_CASE(test01) { tstInts(Collection::newInstance()); } BOOST_AUTO_TEST_CASE(test02) { tstInts(newCollection(0)); } BOOST_AUTO_TEST_CASE(test04a) { tstInts(newCollection(0, VB2 - 1)); } BOOST_AUTO_TEST_CASE(test04b) { tstInts(newCollection(0, VB2)); } BOOST_AUTO_TEST_CASE(test04c) { tstInts(newCollection(0, VB2 + 1)); } BOOST_AUTO_TEST_CASE(test05) { tstInts(fibArray(0, 1, 7)); // includes duplicate value 1 } BOOST_AUTO_TEST_CASE(test05b) { tstInts(reverseDiffs(fibArray(0, 1, 7))); // includes duplicate value 1 } BOOST_AUTO_TEST_CASE(test06) { tstInts(fibArray(1, 2, 45)); // no duplicates, size 46 exceeds max int. } BOOST_AUTO_TEST_CASE(test06b) { tstInts(reverseDiffs(fibArray(1, 2, 45))); // includes duplicate value 1 } BOOST_AUTO_TEST_CASE(test07a) { tstInts(newCollection(0, VB3)); } BOOST_AUTO_TEST_CASE(test07b) { tstInts(newCollection(1, VB3 + 2)); } BOOST_AUTO_TEST_CASE(test07c) { tstInts(newCollection(2, VB3 + 4)); } BOOST_AUTO_TEST_CASE(test08a) { tstInts(newCollection(0, VB4 + 1)); } BOOST_AUTO_TEST_CASE(test08b) { tstInts(newCollection(1, VB4 + 1)); } BOOST_AUTO_TEST_CASE(test08c) { tstInts(newCollection(2, VB4 + 1)); } BOOST_AUTO_TEST_CASE(test10) { tstIllegalArgExc(newCollection(-1)); } BOOST_AUTO_TEST_CASE(test11) { tstIllegalArgExc(newCollection(1, 0)); } BOOST_AUTO_TEST_CASE(test12) { tstIllegalArgExc(newCollection(0, 1, 1, 2, 3, 5, 8, 0)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/StringReaderTest.cpp000066400000000000000000000050711217574114600237760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "StringReader.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(StringReaderTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testStringReaderChar) { StringReader reader(L"Test string"); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L'T'); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L'e'); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L's'); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L't'); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L' '); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L's'); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L't'); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L'r'); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L'i'); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L'n'); BOOST_CHECK_EQUAL((wchar_t)reader.read(), L'g'); BOOST_CHECK_EQUAL(reader.read(), StringReader::READER_EOF); } BOOST_AUTO_TEST_CASE(testStringReaderBuffer) { StringReader reader(L"Longer test string"); wchar_t buffer[50]; BOOST_CHECK_EQUAL(reader.read(buffer, 0, 6), 6); BOOST_CHECK_EQUAL(String(buffer, 6), L"Longer"); BOOST_CHECK_EQUAL(reader.read(buffer, 0, 1), 1); BOOST_CHECK_EQUAL(String(buffer, 1), L" "); BOOST_CHECK_EQUAL(reader.read(buffer, 0, 4), 4); BOOST_CHECK_EQUAL(String(buffer, 4), L"test"); BOOST_CHECK_EQUAL(reader.read(buffer, 0, 1), 1); BOOST_CHECK_EQUAL(String(buffer, 1), L" "); BOOST_CHECK_EQUAL(reader.read(buffer, 0, 6), 6); BOOST_CHECK_EQUAL(String(buffer, 6), L"string"); BOOST_CHECK_EQUAL(reader.read(buffer, 0, 1), StringReader::READER_EOF); } BOOST_AUTO_TEST_CASE(testStringReaderReset) { StringReader reader(L"Longer test string"); wchar_t buffer[50]; BOOST_CHECK_EQUAL(reader.read(buffer, 0, 6), 6); BOOST_CHECK_EQUAL(String(buffer, 6), L"Longer"); reader.reset(); BOOST_CHECK_EQUAL(reader.read(buffer, 0, 6), 6); BOOST_CHECK_EQUAL(String(buffer, 6), L"Longer"); } BOOST_AUTO_TEST_CASE(testStringReaderPastEOF) { StringReader reader(L"Short string"); wchar_t buffer[50]; BOOST_CHECK_EQUAL(reader.read(buffer, 0, 20), 12); BOOST_CHECK_EQUAL(reader.read(buffer, 0, 1), StringReader::READER_EOF); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/StringUtilsTest.cpp000066400000000000000000000165321217574114600237000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "UTF8Stream.h" #include "MiscUtils.h" #include "UnicodeUtils.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(StringUtilsTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testToUtf8) { String testString(L"this is a ascii string"); BOOST_CHECK_EQUAL(StringUtils::toUTF8(testString), "this is a ascii string"); } BOOST_AUTO_TEST_CASE(testToUtf8CharArray) { String testString(L"this is a ascii string"); CharArray testArray(CharArray::newInstance(testString.length())); std::copy(testString.begin(), testString.end(), testArray.get()); ByteArray expectedUft8(ByteArray::newInstance(30)); BOOST_CHECK_EQUAL(StringUtils::toUTF8(testArray.get(), testArray.size(), expectedUft8), 22); BOOST_CHECK_EQUAL(SingleString((char*)expectedUft8.get(), 22), "this is a ascii string"); } BOOST_AUTO_TEST_CASE(testToUtf8ArrayWithOffset) { String testString(L"this is a ascii string"); CharArray testArray(CharArray::newInstance(testString.size())); std::copy(testString.begin(), testString.end(), testArray.get()); ByteArray expectedUft8(ByteArray::newInstance(30)); int32_t offset = 10; // "ascii string" BOOST_CHECK_EQUAL(StringUtils::toUTF8(testArray.get() + offset, testArray.size() - offset, expectedUft8), 12); BOOST_CHECK_EQUAL(SingleString((char*)expectedUft8.get(), 12), "ascii string"); } BOOST_AUTO_TEST_CASE(testToUtf8Result) { String testString(L"this is a ascii string"); CharArray testArray(CharArray::newInstance(testString.size())); std::copy(testString.begin(), testString.end(), testArray.get()); UTF8ResultPtr utf8Result(newLucene()); StringUtils::toUTF8(testArray.get(), testArray.size(), utf8Result); BOOST_CHECK_EQUAL(utf8Result->length, 22); BOOST_CHECK_EQUAL(SingleString((char*)utf8Result->result.get(), 22), "this is a ascii string"); } BOOST_AUTO_TEST_CASE(testToUtf8ArrayWithTerminator) { String testString(L"this is a ascii string"); CharArray testArray(CharArray::newInstance(50)); std::copy(testString.begin(), testString.end(), testArray.get()); testArray[testString.size()] = UTF8Base::UNICODE_TERMINATOR; // terminator ByteArray expectedUft8(ByteArray::newInstance(30)); BOOST_CHECK_EQUAL(StringUtils::toUTF8(testArray.get(), testArray.size(), expectedUft8), 22); BOOST_CHECK_EQUAL(SingleString((char*)expectedUft8.get(), 22), "this is a ascii string"); } BOOST_AUTO_TEST_CASE(testToUnicode) { SingleString testString("this is a unicode string"); BOOST_CHECK_EQUAL(StringUtils::toUnicode(testString), L"this is a unicode string"); } BOOST_AUTO_TEST_CASE(testToUnicodeResult) { SingleString testString("this is a unicode string"); ByteArray testArray(ByteArray::newInstance(testString.length())); std::copy(testString.begin(), testString.end(), testArray.get()); UnicodeResultPtr unicodeResult(newLucene()); StringUtils::toUnicode(testArray.get(), testArray.size(), unicodeResult); BOOST_CHECK_EQUAL(unicodeResult->length, 24); BOOST_CHECK_EQUAL(String(unicodeResult->result.get(), 24), L"this is a unicode string"); } BOOST_AUTO_TEST_CASE(testToStringInteger) { BOOST_CHECK_EQUAL(StringUtils::toString((int32_t)1234), L"1234"); } BOOST_AUTO_TEST_CASE(testToStringLong) { BOOST_CHECK_EQUAL(StringUtils::toString((int64_t)1234), L"1234"); } BOOST_AUTO_TEST_CASE(testToStringBase) { BOOST_CHECK_EQUAL(StringUtils::toString(1234, 4), L"103102"); BOOST_CHECK_EQUAL(StringUtils::toString(1234, 10), L"1234"); BOOST_CHECK_EQUAL(StringUtils::toString(1234, 16), L"4d2"); BOOST_CHECK_EQUAL(StringUtils::toString(1234, StringUtils::CHARACTER_MAX_RADIX), L"ya"); } BOOST_AUTO_TEST_CASE(testToLongBase) { BOOST_CHECK_EQUAL(StringUtils::toLong(L"1234", 4), 112); BOOST_CHECK_EQUAL(StringUtils::toLong(L"1234", 10), 1234); BOOST_CHECK_EQUAL(StringUtils::toLong(L"1234", 16), 4660); BOOST_CHECK_EQUAL(StringUtils::toLong(L"1234", StringUtils::CHARACTER_MAX_RADIX), 49360); } BOOST_AUTO_TEST_CASE(testToStringLongBase) { BOOST_CHECK_EQUAL(StringUtils::toString(1234, 4), L"103102"); BOOST_CHECK_EQUAL(StringUtils::toLong(L"103102", 4), 1234); BOOST_CHECK_EQUAL(StringUtils::toString(1234, 10), L"1234"); BOOST_CHECK_EQUAL(StringUtils::toLong(L"1234", 10), 1234); BOOST_CHECK_EQUAL(StringUtils::toString(1234, 16), L"4d2"); BOOST_CHECK_EQUAL(StringUtils::toLong(L"4d2", 16), 1234); BOOST_CHECK_EQUAL(StringUtils::toString(1234, StringUtils::CHARACTER_MAX_RADIX), L"ya"); BOOST_CHECK_EQUAL(StringUtils::toLong(L"ya", StringUtils::CHARACTER_MAX_RADIX), 1234); } BOOST_AUTO_TEST_CASE(testToHash) { BOOST_CHECK_EQUAL(StringUtils::hashCode(L"test"), 3556498); BOOST_CHECK_EQUAL(StringUtils::hashCode(L"string"), -891985903); } BOOST_AUTO_TEST_CASE(testUTF8Performance) { uint64_t startTime = MiscUtils::currentTimeMillis(); static const int32_t maxIter = 1000000; String unicode = L"this is a unicode string"; for (int32_t i = 0; i < maxIter; ++i) StringUtils::toUTF8(unicode); BOOST_TEST_MESSAGE("Encode utf8 (string): " << (MiscUtils::currentTimeMillis() - startTime) << "ms"); const wchar_t* unicodeChar = unicode.c_str(); int32_t unicodeLength = (int32_t)unicode.length(); startTime = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < maxIter; ++i) StringUtils::toUTF8(unicodeChar, unicodeLength); BOOST_TEST_MESSAGE("Encode utf8 (pointer): " << (MiscUtils::currentTimeMillis() - startTime) << "ms"); ByteArray utf8 = ByteArray::newInstance(unicodeLength * StringUtils::MAX_ENCODING_UTF8_SIZE); startTime = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < maxIter; ++i) StringUtils::toUTF8(unicodeChar, unicodeLength, utf8); BOOST_TEST_MESSAGE("Encode utf8 (buffer): " << (MiscUtils::currentTimeMillis() - startTime) << "ms"); } BOOST_AUTO_TEST_CASE(testUnicodePerformance) { uint64_t startTime = MiscUtils::currentTimeMillis(); static const int32_t maxIter = 1000000; SingleString utf8 = "this is a utf8 string"; for (int32_t i = 0; i < maxIter; ++i) StringUtils::toUnicode(utf8); BOOST_TEST_MESSAGE("Decode utf8 (string): " << (MiscUtils::currentTimeMillis() - startTime) << "ms"); const uint8_t* utf8Char = (const uint8_t*)utf8.c_str(); int32_t utf8Length = (int32_t)utf8.length(); startTime = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < maxIter; ++i) StringUtils::toUnicode(utf8Char, utf8Length); BOOST_TEST_MESSAGE("Decode utf8 (pointer): " << (MiscUtils::currentTimeMillis() - startTime) << "ms"); CharArray unicode = CharArray::newInstance(utf8Length); startTime = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < maxIter; ++i) StringUtils::toUnicode(utf8Char, utf8Length, unicode); BOOST_TEST_MESSAGE("Decode utf8 (buffer): " << (MiscUtils::currentTimeMillis() - startTime) << "ms"); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/src/test/util/TestUtils.cpp000066400000000000000000000121521217574114600225030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "test_lucene.h" #include "TestUtils.h" #include "CheckIndex.h" #include "ConcurrentMergeScheduler.h" #include "IndexWriter.h" #include "Random.h" #include "MiscUtils.h" #include "FileUtils.h" namespace Lucene { static RandomPtr randomTest = newLucene(); static String testDir; void setTestDir(const String& dir) { testDir = dir; } String getTestDir() { if (testDir.empty()) boost::throw_exception(RuntimeException(L"test directory not set")); return testDir; } String getTempDir() { static String tempDir; if (tempDir.empty()) { tempDir = FileUtils::joinPath(getTestDir(), L"temp"); FileUtils::createDirectory(tempDir); } return tempDir; } String getTempDir(const String& desc) { return FileUtils::joinPath(getTempDir(), desc + L"." + StringUtils::toString(randomTest->nextInt())); } void syncConcurrentMerges(IndexWriterPtr writer) { syncConcurrentMerges(writer->getMergeScheduler()); } void syncConcurrentMerges(MergeSchedulerPtr ms) { if (MiscUtils::typeOf(ms)) boost::dynamic_pointer_cast(ms)->sync(); } String intToEnglish(int32_t i) { String english(_intToEnglish(i)); boost::trim(english); return english; } String _intToEnglish(int32_t i) { String english; if (i == 0) return L"zero"; if (i < 0) { english += L"minus "; i = -i; } if (i >= 1000000000) // billions { english += _intToEnglish(i / 1000000000); english += L"billion, "; i = i % 1000000000; } if (i >= 1000000) // millions { english += _intToEnglish(i / 1000000); english += L"million, "; i = i % 1000000; } if (i >= 1000) // thousands { english += _intToEnglish(i / 1000); english += L"thousand, "; i = i % 1000; } if (i >= 100) // hundreds { english += _intToEnglish(i / 100); english += L"hundred "; i = i % 100; } if (i >= 20) { switch (i/10) { case 9: english += L"ninety"; break; case 8: english += L"eighty"; break; case 7: english += L"seventy"; break; case 6: english += L"sixty"; break; case 5: english += L"fifty"; break; case 4: english += L"forty"; break; case 3: english += L"thirty"; break; case 2: english += L"twenty"; break; } i = i % 10; english += i == 0 ? L" " : L"-"; } switch (i) { case 19: english += L"nineteen "; break; case 18: english += L"eighteen "; break; case 17: english += L"seventeen "; break; case 16: english += L"sixteen "; break; case 15: english += L"fifteen "; break; case 14: english += L"fourteen "; break; case 13: english += L"thirteen "; break; case 12: english += L"twelve "; break; case 11: english += L"eleven "; break; case 10: english += L"ten "; break; case 9: english += L"nine "; break; case 8: english += L"eight "; break; case 7: english += L"seven "; break; case 6: english += L"six "; break; case 5: english += L"five "; break; case 4: english += L"four "; break; case 3: english += L"three "; break; case 2: english += L"two "; break; case 1: english += L"one "; break; } return english; } bool checkIndex(DirectoryPtr dir) { CheckIndexPtr checker = newLucene(dir); IndexStatusPtr indexStatus = checker->checkIndex(); if (!indexStatus || !indexStatus->clean) { boost::throw_exception(RuntimeException(L"CheckIndex failed")); return false; } return true; } } LucenePlusPlus-rel_3.0.4/src/test/util/VersionTest.cpp000066400000000000000000000020471217574114600230320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2011 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" using namespace Lucene; BOOST_FIXTURE_TEST_SUITE(VersionTest, LuceneTestFixture) BOOST_AUTO_TEST_CASE(testVersion) { for (int32_t version = (int32_t)LuceneVersion::LUCENE_20; version <= (int32_t)LuceneVersion::LUCENE_CURRENT; ++version) BOOST_CHECK(LuceneVersion::onOrAfter(LuceneVersion::LUCENE_CURRENT, (LuceneVersion::Version)version)); BOOST_CHECK(LuceneVersion::onOrAfter(LuceneVersion::LUCENE_30, LuceneVersion::LUCENE_29)); BOOST_CHECK(LuceneVersion::onOrAfter(LuceneVersion::LUCENE_30, LuceneVersion::LUCENE_30)); BOOST_CHECK(!LuceneVersion::onOrAfter(LuceneVersion::LUCENE_29, LuceneVersion::LUCENE_30)); } BOOST_AUTO_TEST_SUITE_END() LucenePlusPlus-rel_3.0.4/waf000077500000000000000000002260141217574114600160230ustar00rootroot00000000000000#!/usr/bin/env python # encoding: ISO8859-1 # Thomas Nagy, 2005-2010 """ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ import os, sys VERSION="1.6.0" REVISION="626f7e8a01dd8a1c64c2eba0815b779a" INSTALL='' C1='#(' C2='#&' cwd = os.getcwd() join = os.path.join WAF='waf' def b(x): return x if sys.hexversion>0x300000f: WAF='waf3' def b(x): return x.encode() def err(m): print(('\033[91mError: %s\033[0m' % m)) sys.exit(1) def unpack_wafdir(dir): f = open(sys.argv[0],'rb') c = 'corrupt archive (%d)' while 1: line = f.readline() if not line: err('run waf-light from a folder containing waflib') if line == b('#==>\n'): txt = f.readline() if not txt: err(c % 1) if f.readline() != b('#<==\n'): err(c % 2) break if not txt: err(c % 3) txt = txt[1:-1].replace(b(C1), b('\n')).replace(b(C2), b('\r')) import shutil, tarfile try: shutil.rmtree(dir) except OSError: pass try: for x in ['Tools', 'extras']: os.makedirs(join(dir, 'waflib', x)) except OSError: err("Cannot unpack waf lib into %s\nMove waf into a writeable directory" % dir) os.chdir(dir) tmp = 't.bz2' t = open(tmp,'wb') t.write(txt) t.close() try: t = tarfile.open(tmp) except: try: os.system('bunzip2 t.bz2') t = tarfile.open('t') tmp = 't' except: os.chdir(cwd) try: shutil.rmtree(dir) except OSError: pass err("Waf cannot be unpacked, check that bzip2 support is present") for x in t: t.extract(x) t.close() for x in ['Tools', 'extras']: os.chmod(join('waflib',x), 493) if sys.hexversion<0x300000f: sys.path = [join(dir, 'waflib')] + sys.path import fixpy2 fixpy2.fixdir(dir) os.unlink(tmp) os.chdir(cwd) try: dir = unicode(dir, 'mbcs') except: pass try: from ctypes import windll windll.kernel32.SetFileAttributesW(dir, 2) except: pass def test(dir): try: os.stat(join(dir, 'waflib')) return os.path.abspath(dir) except OSError: pass def find_lib(): name = sys.argv[0] base = os.path.dirname(os.path.abspath(name)) #devs use $WAFDIR w=test(os.environ.get('WAFDIR', '')) if w: return w #waf-light if name.endswith('waf-light'): w = test(base) if w: return w err('waf-light requires waflib -> export WAFDIR=/folder') dirname = '%s-%s-%s' % (WAF, VERSION, REVISION) for i in [INSTALL,'/usr','/usr/local','/opt']: w = test(i + '/lib/' + dirname) if w: return w #waf-local dir = join(base, (sys.platform != 'win32' and '.' or '') + dirname) w = test(dir) if w: return w #unpack unpack_wafdir(dir) return dir wafdir = find_lib() sys.path.insert(0, wafdir) if __name__ == '__main__': import waflib.extras.compat15 from waflib import Scripting Scripting.waf_entry_point(cwd, VERSION, wafdir) #==> #BZh91AY&SYgP `9#&#B qa=u<<%Gr u]5R͈[]fpk[I*CGq.T:.E=Ǽ6ޜ}}Mo>w> I3g1mp#&)'Z;ۡl3懯Ww0vm{yv)RU钗@P!BR#(}z^jnu*퐥u<ϮU4v6ݾԧ{=yםӻWP)PD#(>޼ VuOMRޭ B Gt[mtww\@#(K[jrqgxq n|oK:ƨV1=ͺ=xΧ{T_fŞݶk}x]ܱ{y݇d#&Is7\6/cϊW,]9ݓnMf%!fY}`y׼/vEYP!AVDJ=tyeWkv"׷ç)lx{q\;Ot\`K31[`mZˮȍݬ>卺SۘǏE*={Fo.﷭3y87}گ^#&v>Z͕ǹw MQ\ 2C}G^7Ǡ=׽w9|lhvo$z|wj㻭5޺45ZهwOMltTխAMrw/Q۳]lXvЊOg8Rݹ7v\ͩY5N;}GwmzĽTkEЂd{ϧ{}ʻW]ֆ_Mhh@h&`F@dSM'2m@#&#&4F4 h#@))a#&#&&L膁H i45'=5?PzAh=C5#&z$O@Ҟ=#M=LF@"@ d&M#M53F*~S4Lz6@ hSѦ <jz@d(LaC ժ^/8b,UU 0բ% QT Qnvc]VzVD7߯rgT^5O#(&#('揘3W&BHp}**f#(iE9s|C#(˹w+x#&@*"p@Dd('""A@ #(EA#(P#&BCmUUZ@hA0Qh24FI JR&ٍ,d@*b1h4YFfF6VUIe D6Ѵ,(Mђ&Q,-kTVeL٘JƢ$j[IL&QIԭ26i#FfZ116šl!a$Ț!ԄI(dFL-CAX @AXj&(JL dDS 2I6",آAA, jHZF3-"% acF3&lX fEE$e3͓M"LBS#(Mfɑ@J#&4d͓Q2XJ&4l&$IhL)&YE,4&Lf$Me#(@"2QJɬldd4$bHL[$%4-3cF&l0R2L2R2EHƆV$҉b5&* I#(i@5&)B@d"#dDAR#iɢ$%%!+I3Q%L0%Ji2ěS)Ri0%I*m[3FX6l16K(fVI1aRR5B}[[1 %T&c%E$jMAiDee3M%"Җ210IJ"iRl`ڋ+2ef$Z$FMfc2Bѱ-MV,[bRQT`4[Z*ԘA%-MIRjQYmml,&5@YiP%2T٦"mbXɊV2J̳J4TIR"TFji-`h؆F(!"Qe(RU,j6BLJi)MBLa(j$iͦP Ih$X*Ml"͙FKh4j$4FE))`RQYjI63fl"hdKdk!)Meh+I6ibRfH,̴QE,̔RmhdJEe2ffR DdeP&TcE`QU)&C(H hA)QJ$L͊lhm"c$ԛIfbEL%hm%DlL6#(m#cd42dI!h-3dŘjLL2-4mbY-aD3#%,U0jQbhDclZ+"0b(#TZi,B3-K2ڃdlP5Jd`b̡a#`4[cS*66fLbѨVLL5lHkaʅ(E3dbbB6bՋ-$LkmfXh)*(P"d%&ڊkM61`FP`Y͙Q$5i4cZcC] ں4C$V,#&-䐇{O2* T#&=6.r#(]FD?oX|9raW ['^iO&WV܃g;r/.I{wUXRx57JWJM0T-%H34F CTtP!wFc%DѨU9W}z@g-쿸Rc}Ʈ4ԯ$9E-I9 ÝloG4#&i"IL>mDɻ25>W׮} np XX3H4x$(Wo7JXbMb>]]|l!uhr1HXiHF#&{xi&u*d@n1=zn$jɁiQֱilZ_iz0 d-N7 UF9z*V1F#(ٟoH{#&ǥ֚RkMG4.̧^Ĕ0jN6qx%Man@*d))YQՐX'{j4N{;(+e6Ydy?qT"Q$I`)Dt&^Pz{y㠄DL_+p}.߹^oO]#EQ*XotAkΌ}.;0-"$޽MvH#J. L)'&J#"1#&:Klmt֩L -5TvU3< }lъRbHy4$`渊(Q%>!#&8F}j*@PI*%MTȅ?#(*ֵkqsc#&Ys@NCPhZߌ^ܮw>*X۝"Gf!JUS>޷;%o^ޞLXg8irI\"+8fe5n׷\EWxR~r,YQ!1hB=l.(M.i޲fYԙx^94(OCoʼnR#+ im9m~-㮵~naq׫^tMA>Z_NQs/vyG0]#()H:#&a:ٚaB2A)ckzNӣ1jDctmFhY1wʩmW$}0ֻ3SQ;t))Y m>'7q5R,K^2!eӛ.S:!{_Kט,Fr;{v2KhYB٤cjƯ#уOOXXɡ7LHXFN#&ZPR1 !3W+7]hҪjƐ)} .EdEBSjZu}OL>W՟GŒ/"sޅ_XENÛ}=.=;|#('5T;{~L24jAm Hz蹈y>YT{~̃>ek3*Chhم>7sXfN  Of\Q\DS"rάԼj&hx_ud;p[qRjvK>n _#&}#09N@I .zd=]ꊣynT|f^hI$4U{@!$glhfm8sM VT_ +MyN蕊[ҹ_:T@h_|#g_b)GXu>*e:=hOkm)֯?.fzӂ(tőK\Uٱա*vЌϞ0n"3(XMO]a#(DQZ#(ڻwT)Gf:OiGlfҋ!J՜,~胂#(W#(T_p6F-2T8ng0z(Ñf"alvwS3h3!crW1f@|\?#W!d;:g42iU@C,m`Ps6{c.FҧbbWupK?g֤B`~=Hvs{9Dxmt ќ]-f%g\O\?UQvjiH;O)Laڿf?)i#,Y5NV=Gɔ])UG^3|^ޭ1zԄ`E`(XEIŚZa ݟ_"3qb*2)K3M1Y3v%oR seHKA_;Wѕa+ _W`cwt< Q?.u2Onf?'vYgR<&mR5|Z׍BdJ6t7H6Pw>r]8B&wu.=&[gSR.2#~yz$qsIB*a:lj^.\AyLYZWA68g>CKߤK2tBR% FslEh awR܃-&`3̴ډv|5 [qH\AOKٚ~ l!d@hep9R;` >V*o>'m~ɭr8PƄL f**m+*bKha̾f'#&/YuH7*WZ^n&xSd#`vnQ+3kEKL܍M{G5r6OGdLtbbp`(/GqՖH0|ѹvh!< %|p _S>q`CA\= 8|El #0˗:ٍ#&vWIwF  %o!/՛-(DOw9#(At~~ :f@aD=0kӆ2bq0AOcc9$,m'FGȁ4d&{(u>%V,F4QH#(j,ϯ;wnej8 (E":-QV#(+v|Sn׶$|攔[6mۚ!q*7Ƀ#(B0/Sv!|`0@V#(-nZ|wiKbd3ωǶGI.l|?"A>}1KD`}^Vcps}Hf; ߣw.|zZM]1u>oua$ V '[,ʐ#(Sq@boQ09{8\8 Ddb9ۮwGw};lNֵ=7TɮtK`GKⶽBhfI$Zӓ. m-Apx }`gvaU>)b2x+=UDH)X801#&m/{(Ue#t$86H""UeDWFDI30Nr%,aΌc#D"( h*K\~kAbn(k(q7ǭ#(s nY~3o |W] !E%5 P^t\(DK-"A;\ڐQ7gv~ P"圄|÷7jqʐg;X us*NFtScÝ/~O2蔺%l>!#(+mMT"`1!=ҝQ0[2u+ שO :$(BRi<8 BUj ljOtk*LSW'k haCN;oR\˖K τ2'Q*mCS22JRҥb:# "#124yў/,:dgf#AMrs熬i v D$\b3ua4JfZ[d"b':V h\ WN9CFaP014>#(ȳwJ(^<;ibWRX܃܎;`ºFz9P맫[s2"|hhqyNEVE0Sa@¹e;/3}RRKCymG=lB v4m#8mԤieT{4կK5OA]C=<$Xq⍄?Mt WOLg.IH辝ӿ#2c0 !X@qЩ`4pݲ4C洷G8 I3]y$hpU*yyq}i70kT4F1 u͏ۏ$yvRnNJ^8KaiȥXJ`A1(~u#& =Sn6ڔ(KKs+*6+d0׉Mbh{yS6-FQBŐ/܊!né,ﭸvS1y"e#qxC+qSOuQ^G0> 6ƛ߫D'UmVjBw;G/# pMڋ!2i(vU~kHޢHEq7č<uAs;e>BFI[gf_90 8I<5)ɐ+\񻽢>C[t{?. 'Lk#(ו<黠tX9#&ٖ 6ɚu^yQkO.65sJ!(L R6#&ƈ kOѕ*ņ" gMo_f!$==*?f{G<>[単Q#`bP?Z(s_C8:\'^Fĭ4vr`Ltt@$lC3 (DMMXy0vrHT{JC[28 o9c丗LA.#&J;tak1i>q#~X 3W~[L]T^QfIQ2fȬM+X%Bw?P2NAk0@7 OhE}~eAMH#&E8MpUܹmۖHH(%VWj6LExя=k^M3BT9@MIB|EgD7 Ѿ{z0^Zk#(@ ڪ:V+׫fin0?嶍_(#Wl{ok~%4IثIwo-޺=w%w]eD.qC#&kc2|;Ό0'r k?##&wѼ!l}fL$,Q'#&m{Kcxc_g =ܩO $cw@o=MB{;#1ǥ8GoڲQwq`D$Gdt[SW\dBX-)!Z+Y9۫GXhٞN6`8+R{hOO[~\Ȍ<^8xǣh琖yHfGZQܺ6OqBqVapG ]C2zM@TswxuL(GqAQHZgPW]~||gbjέ5LtGwYL22xϱz~ iMAZI5gk@H#( ѷ6ϓY`9En'T>Ϫ…Dg"corB)RafdS[ԴVb?wىDqTPYp6\ph1GւhPR/`BrEx}oKB֙|}>kalL:u;(gYB~SQ³w b+nWʶWy5oEd F@ZD2b^edn47R#(bTmoz5gNOߘպXu!*H6KC'۽i<[.sI-U !ء\`,~tPCXe$P3(1Fx#(x_QI$]x,tl_<w'%(/ּ$%$}yr2(b%GO~߾tg<q@5.v}~ &~ ZA z}\U|DHa"Xcm|H[A[ArߍGʻK0"7t8{9M:ⰱ*%PJH]JCǗ?_@"/GX"zzsF md8d4.BAr `AtvK~j/ݻ-)O^^~:C?Mx_v2]O㍙pL. Q:F ve>fm}+wdžLdcW\|׷чޣ 4#l>M]xH꺏2Qo4]xz;-YgѨ^llOž=X#f X]Ϳ-R_|n$)Y|.iVGw7|k4[D|bۼAp,g"p5[#(ן 3׳<Y?1Qkrs # ʬ9jEE}y>wm3bo, W7%wW]f?0ag-CF56Pi#&=0\'V-_1xYV#&;Fk aJ|#(I+ŇOkIa洿D\.M Cž, _\:cװ=?{p w /@ ivϏu~~h#(‘G/#yu|wWVWVZl,m)N /r?0ԭ~y*m̊07#(@0NYw{;ջ߹~1=Έw={:2x~ ڣrQ*1Q]; #(q³vv3vpy%Zơ,c%C/Qq4IzzlZ6޻uRFz^9s]0ɻyyĀ2KI*Qq#@F#I[RZ:VZҝ-lh K Mxj8lѰ%W# "1V~U3()|v>xhHͩ>[\4xO\&cE.V9ϫT8̱w,P@ ;1/mF#_6k:+S@%IBI$ofz9p;]#Gl"wznYWu5YMM9[v@I-ɖ\u_p4D*i@xH,&K*e0#( 'Vz 89C?nې2ȇr*T7~>g'%ʅ 5Y!2! G^1(z7efX>.pZ#&CUKT|_ ݃ٻKw.$pN]ʹ;@#*..#s[ ǏaSzl ]ˊUec0E#&8@l<}|N~ߊ/u|](BeȘ0#(1`"0Pԭ +G0SLGխ8ukGMB h|N58B#ΎFR$ޱxPh;-6f(dQs?tkHFQ(G<9pV4=s#&iB&r;fA8f3\ǻNZʘD)p90!5Sih0@qOM( С}79qm0ʪ*͌?-ǙdU#Aˑx; #&teN0n U"%뎾~؜__-W֡I@HX#&~>9!`I3z%b: LIvΜsI6#(1S3J#&T:Qr#&iHa7Tꊻl|Af'8ZVǻ(qEJZ,@rq.pU83N8vEskX*pg>on{{כֻ:EӉ0 (r4"P:Of"QvE>Qs纻/rvHvk#&hH§&cdm8RUފVA"s.Hv29QH%鱧 @XTc:ӭ1K 1 ƋExՎy\:l]x4Kc!XĢE<q zd4Bhayː$33 xƣ0Iݧ^rU3Tu䇂^#W)V$!.ȍzfR1'LY#(#(DQJ{E4877v[  (Gjim,F8|Mq/ucRgvv8?BI >/^Ô\H6N:#&\Pۙ(#(FɐygbK*5>A/s3Ca7u͜QMcz@:#&iMgwrc;bĒMizf֏I(XmPݭ+0%Hv6s/v3\/ ?fIu(DkjvߍN=j$;1ɶ ŕgJm6H{cEiۤ^( uT`8:Mf\>в V>*AF#&;cliiAԇX{YK9kSv= L NTɴ(fBxC7m7%㽻SRS~2O?=tSTkØ#iꉂTvfWmOJN]R6]#&S"c|ֱOg-f#&EБRלR͚#&j#&燗I,Yw, YT!c6PVHfN27FP*k5fei>>rcIl TJ[&5ˍF xv-l[V7–i{g~=Jm|z"h@4-w;g\13Ƥ^M19rيC$).>" ~xq|*9@櫴^q*SoyA)@{AyKZJ 4w09NYy[U#&Ga}fVB#(طP걏hOO ~Zw.GIޣ/FUwL^iAв`|+hPR';vYJT\'^x6M f4P~Sܬ90~M,~ %^Kp Y X5ƴ(qT9S!:n]$>DKL_fm%هiJclzßGV=M\|\|r&gvi,Zb:&(/, aq|#(E-Mvhcrk/zi0LK"XU]errQg7cdwBR#(ݸwZ=Ϊ@ lU2Fev8?iRxTuϔt![ p=Ӊ}8KX %A>1"@it"@sp5uh਷voIgXVʆ,MmvxCp5 QᓵrtA s5H1$[:&ϋ漡03m_o[_q8n%_#(<'O1%¹lf3dvXΓ=xQcH.IxqC6*LT:U](|4Wb1&F&]89ᥞlH" 'JCv*-tL4]+6UE Rx%Y i.v2T)Mz˻AwHD[E#&Q]pq('C@rNp v:wiƺXV MHggQUUTՃ—4R i"T9;^9޳؉<,e&K.5[gCkyK#)Q:?i^DSXBRP>ar&87WN6҇`tcQ} k:uc @%K\6M▸Q1DC=A^#&\_6ۣS[c6{vq6,: L!#&bD@ދI,#&~tsַQ7mIzC|%r;a/]HB#_US:MYlt+3%ݬ;3B t-(Mەp"V3;%~]~6(~ɰaWkb\niӸZDYpA2՛gf6O%DLtdH`;1M6悚M;܅Eϵk-$SRT:Vesl kPg m<$D Mx$/%:7"h/2*%:v8+plan.2mI\L"̑6_7f% -FS%Asη)Hi]Z[V,gR7jo_ݫ=bs.jqH~>ڑZT=1I\pQH|6vmkjQwp߶a}+:{-=gXѣ>"V#Ui85m`N~rS"x+O{N\Zخfv8gjok٨w߾z0/Uc0/e#&.Wsk)4YJ6kD ˭ɮScE:A6#&$ۛ &fɪisAɪ|#K(-h1Ҥ=I*eǢGtGL'a$f:9,Ɋ6h5uctoARh}_HlS;k2Z".#&\tej3i~u`\LْZT9YJRu!Q;z=}qU$,YE jpa#(n_2aMeӐOx,B-g&VxU{r?\6;,X֏t㡮6x*}>[gtBp{7[n[,=ZuC'[0'^#Yp{o\k޼aE#+\+^~gm;%1.AYwڥ!Ka]o2gG1dL~e9/[8:ongQs!F1¾$|/Œލ>lf NOų[Z66^Ͻ+uaR>>0F,0[܏,Lwl 'm6R-%MvѲ+spwP/cŊ}jVyF^k͓DQ.ddfkG<.׌s[9N[ s NoG(N'˾C#O+phtCmj\4Aѹqq~U9u@D#( 3#ݞTch .خw?ӝm@NyZm Xy[rjw5Ѷ.$Sԡ\6qhk~Ee#(ig6kZė)9*aA:pllt,4NF18*qlu8⥄m~gouŭN8p2:Ž"#({'lSIan0HNB01-`S2Q) Q{jbL9Yq~/6#M4_G6b#.|8#tO*x3ێǞݲ))h|zzXt #f[udVa3eѦq窱RALƥ\NܼmfQbE#|jՀ.jpH** [;y@!9o6ˉ}lA5T$ ;l͠(nc*e,.j+q*s;PNZq&":feØ+"evmydBت(6/'_#(3[迫w۩=#&'s(!>igպeҝYh0UP(svZ ;nljdcJ 2|m"L>;\U#(DHɓh|܊Pww>~~Ck(N"gی=W*do>bApu|#&;Z??_]0JEYcdgtnsMv85EpG>7IH : /NtNW#&_0uAD%w!*fCu(l7E`y(q'u]9Nxy6=-nq ? zT|e~T@aYX'^J( /Ny$˚?P ~khk60iMws6.0f>OHoyY$SŠvXi|ؿ}$6ǫ^wijĈ.RY77I ;rmg={#( PYgm3Ľ琚V<(oPaz$bFDT{uEF9@}l.Rl{RHxC 4|d:`v Oğ̇RX(WZzꁺy!Q0~طxQ]5\!hcMUz#&kpȠ0ohZXpC}} n1͍-IEKw:e{2ѓs0{._;_V]7(I$VFيE;Gl=iyA xuP(5oab̮ƚJZnx>yz!OdI&{,gMxS'qDO/ܢ9$VV|``l+nٶ.Y~{kb8ߑ|y<a5Ϯ)iwxfض#([I,AEJHsuɒzWyCpPL9Q U}{vռ8uaA1\HijįuD7?ivmHiywL%hv19s7z>'[uG8)-2Xs٢+HB#&u~. ofF9ƪкØ0}XFnTNZy傥@R%"~sZf i/wuՍֿ=\y#(uhH`ji &+pS;3rރﲌypijd'I'ʒ1 LgJ!>KkBohv:1S5"wsj]0Z/RZXVP#&3Nx!&钊a4D;/In{䂾Efs[=>4#()L%{@5XȐ$ TY&njtJSť&lr4a:ֲ6gd^'#&a)|Ai5۬(aNLH^RRBp9RDK_bC~NV*UbR,g#5(O0 Ñ@2Z#(Ҝr} H #&oP࢚k L³2]熱ѶSHCױդ0rmt#tkRDIP>E&Jzdd҅,H=p}h(%憷G9s۵5%r/#mem2cw/qHā(8-BE.!"/ ;7 װyfb{{05y3ЕT|4=^^ O+:H(Al'L)DZ}#Gg=%[aPW2UqI57qn1I5`D(ۈ|a _nb~ʺF2e-uRE?#&?Ӊ|ڳw*/ꯕc9 {Izaxxއ,EC`8ءO#(X~"'hmCxad|`u|,yWy]B3~#&i*Ua0 )R-ޝyBR lHrMP.fS;QABgO̿:tb?/V' Xp+C,RPaN,@Þb;:LWu"94(Ntd\MslZf̈. 2:{oAdUa7x5`,gtl`ڕ96ITe5Q.zt"F蚣SKN]K#& 8 FE遌ctez,~VCtoU~ܤ;`)\)D>K}r_ =97=; xFF0L`yFOdj(*9n6Vayzzv=w]$ /Ѷ.!wq.g>t]D%de4*'硖 (y.sL#(2_5ss7l6Wߟ=%mHImށ6Z3!ml8`ӪʩAhIWAQ3/.trNB|.2Fsm qͲ8Q pc;G$}ï|$sWwg]QfX[yb$LkP=O'[K<&gU3vRƔj:A}W A("$D@Kݑk!m}a*|sQBb#(ϴ7=NުX1-lGCA&,D*o< R̅9ugшhx+P߽_#&J&Cng~n}қLJ1M@`Ւ+͋pL$C#(1v :-"Uy1Tyr vldBR,RhNP2W]K .,Cρ +6$ y (t.pmV!T5#&1'Zj%,4B?Y^ݫ&zD>u~kIuIzGAk#j<;w"K7m1rv#(d]fښ4r{˞mmm3;d%3foI8yzM}s"/xrʠy{r#*V+9o[ooff6L.A`u10ةUK`lŝLP0˦/#&W~*_ gZI|u:H:wMIc"'SSWt\s۞Ub3 T`p@R6ۓF.6p>5lg>ۄ%1$U"*7~Sb\%%t"bb|gCF[_e\/<@@ @=O<+Ulhv3,MY'>TKXU>?gd*aW<)ZF8C|esZЃU#EHSCE-ZO~x|ށP(l|&]tR[@D><<}'J8f1<`-RS'!ZmͼnWᅝ%:?#&Ԧ -Be!|Tn-G%Pn?b-D"t}|Y{ϥ#(P^)&ڹB^a^06HPݐe!/ٲD1_`#(:W#(#&~X"r绘{΂梗0$[mw0Ty5y~ w BM;"߮s;7Ƿ<`{2XqRLX0fa'Fаu32qv< 螱miGm;j~~'Σ!%{߳nxÔ)|4C<:nm7#&WiWAhfH'̜P?&sn++N~O'[xOy|050e0{xpvy="0>98<4WӏWݧsTcݳ˱X9[SWq)>UE@= d18O_t⪃Xָ|i;a0.}Ri". B H#(Qh)uӫG$=?1 L7i0.3*Nh\ufYp#(}Ui|BeVt \*6]-/_Л#((ķʴՏvi3}'QXS*G!''z-NdwBI:xN#&46N}・@1b6:3+fd嘂ϘF튺HGpa!7?tƯ8#(MuȐwnmFsVbSd &e֞t΅8mOqƮR^?vj+M$ɾryyh)X3syU bAHY#(܊TP⥅^zzVF 0uHcQҫ{%j%9ʥ pv)]#98+nR!eLoC2%n:5l>sx-:K¾>?"6yF4o Wͨ(2sL!ЌFa;s,pFH Jsd ,*AΘ-/7볆>'iFMdoV.>,a-L6mUQOjC j&Ë6_[vAzZƼOw~Tq}Vio~\;}.X~ p=C#wSp]:s(߁#ֱ_lf@R9 #&A6Q8Rⷧǻ$H$/`<>!/FAꩤ7(KOE#v^Ay?H#NyA#nJ5BhQDLO.F(Y\6?pSb*@xQS~_0 #&?O/~\D:0Bp\7(9K.#&0Aw/ZLe%$"2nAg#Gl~0`o+h\$ -7r=s5C~KaTn{t?NK,0kl~=o$hd {|>EeZ~{_Q xTB.:\%OT9ÿBG~~luJ< n־ՎH}Wd.~;읩>2܌1%k*_b׶Q0IЌ _?vt) 1pV}q4T(r#&bMzώWPh-)Fo#&[@B1.eb_=X}l!ޅ#..(UD4:ΎlzYϿYl?oޡwW=]f,ܯ>*H"jY ӣّ8H#(_vŗ˗5{/fTcyo_Ry0l @igv<7ܝR:)ei}ӜDQ %|{(evA|tpxu|@~:ܸB(l"*K6ґ65^&Oߠ`}AKٵ_T㹆l]rqvEB)}+V C(;yf_ڦŮft>ޓO*TO{|3/LNb$eb7ƽ7Vh\ t;nk]QCTQۮގVb3W 񈘬WBF6Q.J=]-3Aٷ+])xMA|~M`I q4TwGu_)"dā`^ ̡%ۮNۄ Q3#!ӳt`:nPEuaj#M_H_cP99xvigjT1)fh$nmy] '?>)"V-$`j_^-Hʸoܝ en]F1mНcy RM2~پ:6d϶%+#&fs:#&ӮoVq#&M~صa)\KՅE7fVmLijQnoTpX 0v("R=0t4nL]CھOQ4nBh#&c ʍb|ԠnO!hJٍ] _8NFW8RD E#(^('?yQmfdVb'B%s윁~/ͦ2gG3a5DA#(?k9Z*z)E^ }#B#&\'pԎӿDj䭴eopqIp;5;C2|jL2[s?[cD筵q,k>Vt: (%i[7f;|9nʩݙe] _.uHc׼U!JJ2*k`UU8Q-f95ٔ}ePSxuuuP,3"Rv7Q=p6 &,1]83uf1u\'u ?fլZ^N#&v0f~_ZojGFp\@섁mĈ)O) ȟMk Y n?Sm,tg+0 0jߨ5@u'&D p:BL\k:YģIQ[0܏S@xv=(k4;(}eZpWE?߰99KgCۺ:ibhG~!5XqP̨_&z@Ci_-fٌXُ ̆)˦WAXs@6p$GeS2H悝*Ϩ(?#(kb`~D]ΐo+㆗K;"03!j#(`91vPT|b\L#P=b#&fo0 uKw͠ != wF@Y(R#(7@')] n͛*i#(.,Nsoү#({,w#&FѵI`| ^ QC#*f% UKg@OgptM۟gaH{y]w6|&u߾.|\[2q?ӝ3#&e c;gűMS2{r±r4ÿ/~^X~o*կ*_L?Q4<OSjq?z} d8@O#(;/TK1s#(5q qO֘x٩UM*.Ҵ*D#(TV1l8V8h4^߬"~K~Mnu 4 'wJTPE,/`{i>8|W#(=فo3,LϫӠY#w\FNR(`n3MGU&@=`Sd3RcTW6g7̇ 7|@" )#(K1]l5qHi_`];" g8?$?iWOέbO߈I+jP!VR0Eߒ0z}gxR 06, $c!p~;OP' }b@}s:tu]J9Q_G5{۴ӧh|W>'CPE HC&E#&;r:10Iunk׸#(Y0#('( /U'<:M\/}4\7k/M@! $ d ĊPD{>$@;a!:uqHVZD3x(%(|xm*݊ )%ˉ1aET5y_ Չx3;!X ?@,EaIIgث򼢍}H3XsONuк I-`}}#&Ϧ:#(MVD'pPYDXTIB7 %)OwȮ}8oC|4rϯ'gטqȏV푠f+M 6'SE-#(¤\ H1DRaȳ2htl`#(#(K!ѩq!#&%HPP 0sT O!?f &a?& $5D2NS&' bQ#&=N<}az^%H<R9$I#ԘĂ=|^-뻺̒diUX(+'g}c}g!N) y;raf+ }}<^ M[bNġ#&#&hUb % fp,!q/j\#(n8%) #&'Xp*yQA\ypy35cC#&X'@F῟cb< ǂ^&B=rzNa%b(GkKI7vu"#(fy?#LY"iOϐ{{F QHQ, C(%B@*`~:h}S: 䑘y넁iT!وB'Hã\%ukjS;22#(g#s\'YMw8vC@؛ 2qF5ԗթS|F߾X`ީvIJ_OVN=Q'xPSY S~~s[RX*l3M֫V '"z[9#(@)`2#LμrC8j fP,JlI\ @15@)z.SIbf߉y\U_Qs?<.3Pjā_ 6c%}[y%}#&H2w|5ˡ#&#(YX :j^fK,{uwk^Pcg|wi[VK7-)&5^6AMlnD uj zn$8M]wJ 2e;D=Ǻ>J:@qh9&ǁK`Y8b|zWcId8Ѹʉ ZV0&O9:+2owl؀Kgn{j2YIC ;7;P1䟍T;DAvUBXN͟gO8utcHx!ttVyӗ\z|XEmy x܍Ď#'{5+~jH^9!Sq BZ1^졛+ώDlxĖq#&#(%@ph} HM6LhALQlzC#(n]u4A`xu8t$|iam;!q7ORkP8 ( rI>g\0*1Y$r EFY`=45F3CX45 W#S{t1&" SšH~FCqS:3-Ay{!K GW?9߀;Y0f0m[c/v a8xf+}Ϸ|a0SգGhD)`U.~L$Ε5.EMaII[u:#4ˁ>(qI'HAprDxUmaj<Ͼl$S_/Z7Nvʞ/ M>Zx#ğt$aztKjk$FL*Ryϴ.G-~DI#&0XH5AQ䫗T3C"!q|seO}dn 8 @χ >ܟCˠ |#&iC_??GqgQ=?("P2(Ր00αJK@(h"6%b*4UV6 V$6Wo%"/f[ץ Y#(H47o _ΐm#&61#&mЍd3n58#&6clklѥ6;6H\o`*Vq 󾳉҇13l`DgQ4%U#(y?dC6zݙC*Գ}6\<8U 8 K€xATwjQ>%@ Zd)㿷nhknoD$tpݺVfǷXWĴ7T4й99AadʩQ;fOd˳E0A#&Q-9AGׯFF$ RD#(HAɅCۧ<'5Vlޯ&^#(5dE[d rC}%< ?u ۖE+xv/ ;2A[F#&Ȼ@HitECGR.Gُc>o<_y?7==}g ?b?tj UDE@ }O~JN>tAAћv׸Pm۷^.VeP=́ӂ{hU`^^0:U/Wy'.!*|9ר[i,>hA/`"^qzl((TXҁJf<90(/ŕ#(–ӊ¸:tY-md$A͆/A%?ْIjRvưDZ[.޾%ƼD{9뎱KYslmxˀaf +_R?&dPJ_#&𺧥ژKT]D9lٛ%HHpwK/Ɗ xt)5]\ZT#()̪sNw!>Ꜵ?mp^=R3ZOVV:kM@:n/z;LN"tuC79#&ֺxu$?zo(ҟuOV+jҭ`C9u9ol-2.{;%$AG 0\*ԯմp7ѩoN^m=SLNvBh\R #& kFǗﭹ#(B|PY{^_9Gi:A@ #&5?P9}G4rۂ2ur[8TiOȤSazr%pi'0q/((#&jHVk}p!3Lif!Lд(4X$VLzճ.tL. 4t.GdـP/3 b&#(\)8i#& ж$ha%XVڀe146H#& _$Sߟ=p"AߧBBl5 ,Cej׻OaeCiRM~eUP}K8s#{>+Ob#/8J3j CY9x9?`bz0Y k2}m8J@fxbwzޑ,~@#(, n 'YBͷ4vf#&,:am 8(NYFؔ0gY<7+͂8NKst[LI&ݶ8#&^'6,dlfGg./a q9NEɵNfFJ$!XtϏX2عfo_yo94Pƫ!w:A ;Yg'f.86ӖP]ı`@YlfTl'GeةH,*]⫁Cf{rص}I0r)EPs}j:SwWcu׾]la%.BdEwKrP5`xLٴfQA1<;AN\p|'Mʝw2I]gBٙyqٍ3sXʤyR٫lŜis-#&^Sݸ9pQD`ZM꓉Y|Be㴃"o1=tCmmYYRk1peP]ul!Ԥ*NhA$E͌y?+D8YL۬[9MΥX*3tUr/ Knu;O$@ os gT!:]qF@;Ҽպ(A;GeȲKI|6(#&Ѷ#&'A>ſ)p^XpOobU&I Ђ 110ʓ_%l`A@D%=2g\a:tM#gDacY BXX8-M#(ij~1QbC7!gHN("Wjކ#lf?7IQKn#&5 Ä U Ķ<0L9!MO4eGB nxAp=1#(A>0 ( o#&P`1amM<(YNE5?չ0pY.U:gO-KaǴy}0r>?IOUBBBњ:ҵ?^&Jha7#(ZJ=\/*daA]KeuA^qB`Ϛit@x(g'H%N>1Q@ ^( W1"$(Cs!;CMSr>àBaa#XH0 >iEj*4ݶۈl<¡#( QQc7^cGDǒȢ:h ~$`1hj( jQuNIӝ[8+0Gw*I ܧĤI8Md"Ċ "$>+<G) @0tu<|ek!¨pű;XАo#̅U:1M6jA #/CqiLF4필;XVMG^:u$L8")T"[@@cRka$+,74"Wui9Lnvf{kYdM[}qݡ١ BQEQ~^r>k$*w_gwtDwtDF/gć#&Afö1yVJOAXLD-A(&^4@'aܾ"D5Ϳ=߇CʡŲ1\\l  6n6МPzрmlpNq{]n=aw]}U{:'dBJ{zەlNePs"GvQTaC,H" O2 &HH#w 7,A;NuzEݺHp"e Y#()M1#s=R*: йzT-c[Em)TZZZmk&ՖZd`!H]lΞEFNv$$H*}Hrtn \5jj,K)jB+(!1@~:G}هn1z&ԍlQt1JfeDSI ]LM "ԅ*xkjR <:иaBE]tBSnlhξ{jtQ_L2Ta,4vfsW sY~Y)ZC9!lpt;#N55v:>tD4RA1Hn6[[`b_]5ǩ&:)2w;a8[ïm{&{"_I\m%ճ~;/&*AMA#&a3D1HKFŷ.#&20XaZqKU,sT6)\ 3CiZjBdDX@H@{ñ3Є֐7Y*6ӧ^!;EH>MY(> p)qb7wUIg.Lm }:1=ooQ-ao`Bc\hQb?6#(}$(EOn $W! bz9r=gCstB*-T+xmʂ#&&/n缝bVQ6my> dRwɃnjslnם^LI|#&W(S6Z<;m :-0RBă9֢Ct#|e*&!\,,;Skb9Sji2ɅRѫ e%~}ETN;!zI*n !W[)NNq c#1a$"HsNfZ*(pp^E2?"(bj!#&}`Mξh;bI+TwpzBJ8X4 >sĹ9PU:SmG@Z~ZGsyϊ9pƣ"UܺcksnݷmwQ༸NyVm]m,1JLIm;: `,bby#&$ꆘͲ;X]kkf>seY80[0WZ%k0(EҪ3{##&!ɿ%;<)=цN@]rʷe}}i3 '}1yv=Ooxܗ.3*DKoV"Q(J^}ܞocڮY "SfCX84-#(A*or<;MJ7]%9@7۲YSe|wea傦ͻ?/pK/#(̉z06{xFȘ4*#("C&%dq1nQ-̳jqr!8M#č-jk1cFc8W ^7Eؠ@q %',#&҆_C#(`ɅyUިYAm&!|EDB#(F P1Lf.'?ä۞/;ʈ}s-v<=O(GFsN46#edB6P_K['ȟHUu ~?'IU4U5#(E}\mY!)I8r;bC!D25#&cDI{&6ӰO@pCuÅu$Hm-#&L۔w)$oN#(@p~Nl8e'VaL#,d:h…PwIOTCJI\^G벍:EƌH$P!¹J0Pկ! ߡu8TˈLm yB ce=#(/,$2f[ *œP5ai13ܝOPa{NY@F#8g˃#&MlunB[toMv!ؗZt!qf!dg׍*t񒉏^xx=jcB+;1~$jڰ,A"Ahm|vԚڎ#(Ta; iT !zړ~_^N!H{~#&r`w-UɾluUjG8FZVMA3p ڂZVl ހՊ#(i%@cQWJ:}SEĖ<@&4/ V" 7/v"F_\T+auzo2V8HonWRF&؆UI{,Dft]W.)M@}}K]S %&á 5^m2ی=M2q>]BA|fjp٥dˬ@*tzY 6?ϋO[1 xCV\ ۙAY Av]UM}M{t6R$hW2d;d].Ѩf>L@ -$#(IEo۫jsM&Iƪ-QkdQkkR ?DA#( 0(6cYa^Ɂlx hB'a}Ԅ]@$T32fũ&JEf`SI %_#bHI)1$c܌#(YH 2mE#(#&F4%16[)L!,ٱ$b#(AOk&t .zwwǬ4c?dxػn63E!z,`Was_DBҷb}RL$gZΝĢwSٱ/scinbt VWضי#(lYJX4Gr3Qzvߌ&,),d2i$'_,`ed(Rc1N X@J+3ܒ/"/+*6,6D}lL5kw8 xxqnv<#(JuUO!Aկ#&%b1ٸѯ9783!C|J[a7ђ$q׮d0A(SG׊+bCS[l|>"0&N-8M[ޥIge#&ǔhم$HE+PyCaX/oXAs`/+pqۇC̀QݍxgvuSwPl/:J^ZMٕlM֚K0~D&6_e]'#b[1g w]$&0/!)8#&g&TvW#U\"Nn(Q! ~гF>:$\2rt_#.(Ԙͅ0Tkwu#&iјix:ƞ޽k4]瘺sKy\zdiаxk|Jc.Hmr?[Kʤq)NT"tpQW‡bkW ȝV+hv܏y퓍;?ja^_gܽb/_ZPoܘw`0KKdIi֍?SW6{'20̓mrFj"9#&SoV7Գ .8skB7%#t;n1W)aƘ`+ni4Ѱ;QΧsƁӢD\Q#&2<4D نM Ί Y’z>qӎqoH<Y[TMfe9֣#((4FsuM#QU5qmm!,bvz].:pOWﱹnҘX;DeAJ%vEi.T1t%hMm5/Xv#&^\2M[#( e6!y{J34gF=#&9|375iL]G,rW9Vؘ$01ygE]Bf9؉SaU#(q0Ki=IСVkq)VFا2&k[#(&Lޜi_`,Nʮ!hj7rst)U-n,C[7i6 'A0ɉe%/4ڛ$*J4\`1{Mr#EIMn`Lm0n柋̷Xl.0Hm{E>H;. gٖl2~Q<>qF7/RM8fZRX8a#xLzD#&[M ynaǸAjAf9tB[+#&xCḧoQ5Dp(߮Z&5iec#dV1q,rF!Fe޵PfٌcCO$#3dfd3#({Ȇ*:46d{z2ѱS9Bf1fploW;-h5Yxị lSmBlsI+LF 3.Vձ #&4#jaiYXiVYu3fV\fzmxI4Z9UUJdkV@)"7ua;'Sݽ]#&B4ښ8/x5S Tk |ʖ#(Ȼf;U!:#(#(µβkѓKZ|d[`wc`ݐ4֭T8uΓ\d[k+~xJ|R`lx&+DdQe3fkCvi4@م<k]7q'Ԅ|ە8 ͔a>!@\#((hkaEm6&'ent߫8XN`vh#`|^vCiDDC"O&4xDA8¦\դyh"FLf ꆅ.B 8v#&bDb6F*1wa:2HqiSɔ`s W{Zo+쨔i1`?JmTw Fm(CQTCq!,VgY #( n#&QA,&F#(u]MqŁHVlnvͥ&QR #&*6 4ϙ@Y#ay*"d6,[c&#&r*8T&h.xn-uEt@hj*dq؍uVEq$󸜊64q"kEӝ (#(އǍȻ1*Pnko!T4l#v#(H8dbqNaZ[apۚԏfFAĎR/H6YA6s0Vp%LPt:ePA#p jDC#(&#( @"ELA,OϯwZFְ'J}+- ZCYd>FLkz587½)bM8-tM#JjHJdBe2CY2rNmfKH@xɆ*X^W/`Tc#&-54Z6r9$_(5`P){ XVE0`m>Y[#&W ;)^EXVkI"#(#&&\-l6`AATB q%kPa 6ߴ*`b U!kb^1+;(4oDsȇto#j9,uhMlN6Ͽ4,{jz м?!"HypA#&]eZ PWrD0>g}(;$I&IdI$&;TH@]X"2dI&J a#(߳`-iٯbHN?Pl=YP:~, wn;e25<3W#&K(̭AhBk\|*ݻiBLRaŚ`{$q-bi)PRPjXQniM JĔ^L$l?syCTD#(iڮ QaLZ2hVLmQL)\%ٌPnQުޡrZpD1" 8+[ukqZbgM.z@p#(( H#(/Ta}_ȁJT,$45? |QCKɈJV磮UCaPҙ*6*ɪ7i@c{wKWtW;žȷVUqpFD:z@ pG6$XD=Ƈ6 u43 khM^ml]*H?K0¿YD)&QXhV,QtdJ.B ҆*IFAj Ԡ˔!ِ !-&0#((;m+f6^$bMEïzzjX^MFH#&jQ"#&1ŖwPIG4mi+p\\ Q*2٫}$F٦:Db4L 0ՃJ%AD5&QA0oMv FSUs82~îDAI ʨP0ID1ܙ o3SAAmT.(El)7r`R$9RL齺1m. X4tR̨$I)&{/ Ua MEp=7h,(x&K\.","@!1pP0h+GAX*o0O~M"79bI K:.hb\eQxL`x)PGtP#(%oȳM'hK@99 qo~z?Ômscn#&hcbӴSQ'hԻQR0hfMTYEdXHE1mAly #(W9oՒ=Jd~b6f2ƥ\ċPpR+UEo/K;x4H٠]Q$#(B2zyU(7!Qs!=i<'h8X! ȨyA3ag=#&iWm1BDz\#(HnEKG"lt:#(CM1M$ZӿAQ`Tew@'v=P[2α+2BNQ}AYB>6G%c5fMHdRmLԈRTTVf2ȪFIZ[iRU4+ebjk%Kf {Pul4 $L0{5[mZ) `t0#&l1"$M`i"DO3: }>VЩ%1,ʢ\h!|j{ e9y"^ .1=meNGĚCS5#(iha=i#(E@#(ZK GW^qjL8ơ@|VP4C=e^e}'P;l^U/ t$YmGۖoڛx&ӹ]UCɣ=WPJE#&#-`图tlRg!֔'sYmr<RU#(8^!+@-&ީ@Qbb$zXPlI񱧲c#(7(ďb#&6@ĀbUHz`i[sr,7EuxX,H|ST@MDbEҠBx`n,h% '?^8P8|d^wiX9@it;06qXɔ[xm@RY$BCXȦ$|wd(h}$6Ӻ}TR)#&HdPVqO2'T4H8nDߦY* /nM$2{;4˝|/WWdm_3Jλ,duxSx\viTYTj0!XPuZ҃Mo5+j̺7N#[#&%,|L F`%#O \fMǃ]\S#5e p?Q˩ZHTh7v٘HEqfWy?ȃ:}[ I>A_n$bwO-8EIR:AgM 1k1 #&Nh.D48at͝ 7GvcTGq zJX9Ӱ#&[]lD9Sэ݆5QK⢙>[3s\#&vL]*18aZPBB„(%)Y[ uiI ȅ#t};m;ͦ5yk*Yqy#(m9KBHD@p(d1*P6=Bri#&^Y~tMժOn`]j:]+CA bB')FbjPP#(|N1""A,)B*JR+w Z#(R9 &YDb@:lLaQr%FR7P-* iڒ#ycdZҴ"P!'x4f55BaUrFp&7R%F<#(ZrHe0FU]c0A< 9 L@B1ǔ|ZV'&YRaꝷ3!aGI5#&65ᑴ8((Mȡ "آS-#;uv[˲5t5Q#&4OBmSH(zWœ';crk(RDPKJIp޷W?ӡhi5YV&q#&Lh1$l$a3о߹,#:bFX-05]c.`Q!z(NLP?GB^rmDeWӀre\\#yk"kkGmZd遲0-ĉ&o;IQ>ChF;#&>l~s3:h]#u# QjS{w ઇ4J'\=}ˆJ\F҂L-:RFp-`LO0Qty1y~Lbs[<<2oyoc3*Oo (C {v&nE16IؼmmjR:ǖ҈E[Dav3_nW'Q5WÒO"ƞؗB2+9 @jFiZug95m}JRD!@ạؗ<9ȆbHѱ#G*e}D;v:^( Y%_zxZ4*zsVȐ -!uR:ehՁ_/ad"+t@&(vOFDR^fIQ#H/{^z$wkŻmWnfjڔ+Z:2*g",`H 7'k%iZO]J&i?Y}ɲP1@`IN|uU{O"}=HzFw1dYW*#(@Td3ub$-LU]MC<8-ƛ5BO`vEkefƌM7{mCWJr걧Uܧ=o7OMIm%(׋PTWWtAn3K^QVd#([N`u}zÅ~ZX?aA:(U n9bR])@pz C&>3gu5Ӎc*2V$[iSCKJ};K;XmpG-1{pKVFtudO}Xt{ =8ilaZkԦ0tlAƋSVn`d|#"HIoOeLn Z#fd}FϐȌhEwu.Tgg.ys>f#&1~cIIߡK^} a#&Xf@gw˪kN |u㙜cʹk[!/tRr#&S ~dT#&nÁ.HeGd:" Ԁ;tjMV5ThQlXD* H!QT/1^?5".ޮx*^#&2EIԬ[ۇق"GdkR Y/$RGPБeќBٲ#& l;Ÿ1QgA'$iZv"wgNܞtq"rXL8w*@x`b7}(K1o[B~Ļ/#&0SԑSHYڶ!M5\xywR0ԗ#()4L@Vh@v/n(!DHDY0ٍfbDڅm2Ĝq՟w]{?8 (]nwn㬭lc4N֟X>fPPV\6{={z#Pb(\{v~yޖܼz%l| }VI鶫H8oT! P$fn#&jŸ&$/79Db`V!mJ4mi`$z042=2F Ґ|`$I+Fݱ|/jy4;nRU= hҤ`f,ѓ#(c"5iPT0b""T51!fu]+!0kr̺X0H"=.۟PdCEgɓ9{{ $83XjXE# ia{M- xRz/)C|x|ϩק5L B!*S>bca^OΛ+Gaӷ?fYGt#&;Dɢ LUhi`׸9TJ:YۼyBj\c7[$BF"#&P}@<=|UϕZ=vK`grE;j8t T)) ^JeQ* XQ; Np 1#(Ĕ6@~0̛RGj@QEuUbmnm*-st+S㴮QIx-kjM'mrYe;]Qѵw]5uwv 25dj QFDWh#e@O(J`#(>z6lvQ{#( M=!tEE >;@G^"#&I @^F.r]нZDžt/]j=о :$zyW}ټ#&fiXޱQ:b(mE.X?kLSQKVkj|ꋖ*ΙeEr%~ryF_+֋U"#& 䐓d$%b(G+]yDccADŽiH]uL몊(c5fpa?{ԃ(AȪJP#&BIAPnaTB$K1S9 @mq#&6&6 b&UFT"n15A",N.W#x*HB1Јe@1UboA%b A.XaTǗݙ|i}aX1JQLlrV[#&Oʉf6$hƱ+:ӯi81p-_-1(R#(V9C@{Nw*ݧ7x#({;"P#(lXHJmh]mt'X\XV ]?G![WnǪdH7#(BҨ5D*"@Nr]L椓LWD0"omXDd㪋"2O!iOo#KoD㮏T#&> }kaUh9EKАـHqpPP˹}\tڤ=}gYB+ p y7:9v>G8NA#&DJ4;jL#SzKhbh0}qP#(0O!SK/4nQMEPLw|B])J!#&iˉNl'RQ*EJЀ'\ ĀȤ&[\n9cE!e"B驮Yɫ)sv];u1Qw74-T!F#(ل#(HodF4.EM_6j*7Uvu$%3yxOvB 0dsxkHQҐ잢\ܢpyc:4>K=E;ɪʔi٫nZf';rጝ"MHQR4 ajPZMdጌ&]865fX&1RK&W)iJ#(1Ҥ$72)Y#(bDrD)F7hqE-PTf"K}~l;s#(m Ν*#(W&:ǎaҙv Mfg-,,d*PS)1HA6Yf*"t)H1QoM`U鑚A7#(ƯW4z$a/DDك74lѻeӋ1:ReZ&R;R0l,$#(ugBD]l:<\k)ZR}bX7)3 w#(15R#T)D(4be#&5]V:_:!+kRX*شQB$LT#(C(Kd0CD--$ d)+s\HU;hصw;^yyw_B #(5"Ƃ4{aiA8<_3m4^(By1!qvuk6ꫢD /ʩ! $RJBTPs0φdt٭*ZT%$TP2 !P66h]sY[-!0"$"t%@kWt_p f8o!{0CxSIj<"JC.(w}@t!e/=Fuݮy+,Z#Hu&B,_AfʗKMYI"!r|*¥z^d(#(5h-`*c?.f)n"DEhKNJ,d^UE<#|PHN#(Ү-RTR0=;^8Ta*عNbq#&?JO;u٧{08W12ɼv6}c#(Jf*уOsS]7WoH+wDžǒ;^V̞'ۭn#K4zNWK#(pD7AS-66Uǖ&#ޝG?CWiHX^6CRZmĜ#(p,ٳz7}|qXK/r[ZI0V'BK s XN Ep4ɯ99ܙ؞H;N[w4m 3?5\e͞O.$eCQ SEbtu#(+mpk91lZ}]o9W̘[1Γf#&E"M&3KĶ\Vz#(lҟز7T,+HɳF[KdԹ3DW='eVh.9C& .f=W|畾5/#&oC_1Σ^~N˺u;k#Pl.}!l૓v:Wb[42ۤ3GEbMYcLeҨ(k4vXhYVC+\qr媑I:@B /n#{> W}1K1ĽDz<9 Dc:za<DC:@#(3A\@* i#&RuƗ,tv)ݘPyaAsΆ$WBz9;q.R|]FE@#(I6o#&1K{{k\q`nYlƅ@?$E&\yh7H*‚ k9hy(t<#(#&LZt#&^.w};L umr)YQW;aIA[.ME q ~3{'JM&쫿H]X<3JkB2d (.ue6Ab&)g97M7/]eZ\*񧆇Ysc=0̺<ˑҢT{|@q!Z4d!OnD/d4yaSnhYknċsw^+/Z:k]Ss:I>ԲDE]88Gw~9v<'|V節TʜVg1"&^:)2m(y6;\\ݪ5XN/խjiq}|p>15M38*t% PWugLz?#l].4@( $Y,%-0YhP`yfwt[8"!\~:r0hPUִY60M#& 8MQiR"縅 uԃAȄA0uy|8)="p`1Gp0~^.Tz.޽:5$`BBER|eU c&h];yȮ;u/]^WcN#(v:ޕ_y HHޝp1,ݱK`;N, [7,AHHw䰃^(S(A|`2#(2*P .^iTs~;8R/GCH@(Dq1hTE 6@>c#(דAZ)$ dDq6Hԁ$"O~­u#& }DKU7vpLL{B(a:&Q98q[j*:դmz9~]A/d5D 5>3_D{GçԼ\OTՋ1OWIe]uy%xvRUU* șyk9}I"{Ǽۯ]TvvZ((Ɲrr*U$}V\6wOd*!wAL a*HRƷ6[^Miܞ&|oE%mA1S#VJTIdM"jF4SL+(L̪JdԚj1 6"HNR/`O`[@T)qMZ<-6UO#&`5ˆBɰ9CW;O(!]/{]#&~(ұEcI@` ZBB6|#&#(!YJIt4:D4`EُA5 :#(kCNT 4j5\E4El9hEhJ3I4pmVK`Olh)"64Dj2"C8X뒮Jz#(#( MN#pkOݟ!HMG{&7e $St&eL ˓Ucڻwv{ab"`cVHd^QAxYWH|K2q)(x;.v>}@cA=P(s(.dO@0$#(:&,=BC:;(ȤXi|>;ѩju`#nT,nϡx=z8Th!1cx? z5%eӶ-po%bu=))RB8Fq{zgY,W0lb1ʚ?\.ka)G( `% qD*"Abfg&d1p\R@#6tD6#8`,#&Eh .g4U!#(b&0@ۧ3<5oN'GT<ڀ00aG3~mt!2c4ƒ!t4!HdQ{n#pǍ/R0"C#Z6Hމzݙꥌ\Ryz{8N [`rp[X jBH:kU#&P& :*Lts\6X񷩜O=9)iB/,;k$T@(lm 2UKlKC-:^x&𽔻J\f6NoM=c5ҭȐYB#(p^#&#,̻nS. H6`ZnO$!tE261Hj4pv!)4]rsO[mfpNZʔlwv-pS-sM(4 ̪|BȆ!@·Xn)L[˙t #d uH9#IȻ謰-1!~C8gLP{FV[wV+`BM8*:ӝ*s:adW2BzqhvWVz!&pOJ␭:٠$lrYcp!S˷\\Zh Ȅ@wԬW&LQI4),F m2&zQd= %Q *#&60*8ewi0Jp$Ց-kfjL#(ddX*n,VgdbޯӓD"VS6ES~Xg14ߟߕ|=-xwax*cy)5FZRT%2RCnƂmVL\7V-h|Nٕ#&,E#&24&Q@&G"u5YFMo7]bkŚ8oJ+$ɋk& EJ16AkaͽW/0Xᢗ=zdd8-[GWk ̅gfDõ.l}1+2$o0!/ 4%b]8 #& 2% &bEXTpt-#&rf<#dkFŌ#&c<5Pəbl3T: Ćbt"iaAF6BP 9PlF$mB鉫֑̆C^eW322mRSM(U4ȚB`h<`DaTDJXmTSg^Ôμd8h;NCgd7"}G5KEDV OM\MwPnKm閫M!Ǩ9Zo>=r(sG@kK`Ϯ[T=.wZ rSKwY}/UIm1[ pcf|lvPNoq!^3lYF]r]4ȼ Ƙlw!w7*,1wAd ٜgmo`M ~dL)tǠv>o0a dcoxK?y'b|8`e#(C \m:xNLuX$/D%6@6gxU,J#(_B+PpS#$;$q-&Q@9X[ QeH]ZʨL`a c!F*BQ jQaf-#FT >(W ߔԎjI!G#&-FLJ"0n<:ξk؅P9]ub&(N)G*-"E5oLh$j4akڇAT#(B7f>KOZ#(bM-2in[Kd-TEC#(B hPmA6|RpI #(B("$WUT]ٷÖF$UJp6_3lVX mʯwETsVP1P5|Nۄw;x>9| 7 r*^\F(ԔIjU܄#O<ҝtXnRכj[ĩe)j`^{;m5 +E̡tf)TS{ l^t&j 0\S}[adEAFɑІenmgQfHfPV#(HT0D`٘ä4?3S/ dBhI e#_,a*MY7 KvJwl1>ِؓ6'*YQ83vqO8 Q%഑K5DZ"L6 z x#&OL /%N=QM]r6gU;^x+0A",ٸvMo|F7nF%`A@aU5CAGA&j1#(S#&fzi3nf2E<9#( x8v3JH HlmN x=y$ zh['){КX$uaK:aZς4m8zWPŲ d{h)9Ǘp;:79'e`dSp=T`@a@B"2='d=ԢW=xl*X; 6ʅu@ȚEE )ƛ6 rے^HH6$nQb5o\ c@c(mr+jJVZ!#(Z(?"&G "+Yl߯@;Pub8Pq @ BBA1sTQY5-6Ij>֋F%ų4"D",[8!޸Lj8qQd +A#(Hѭ26~7tX;)n]fg,"~;<#&vC[U>XB7|Lrxz_DVl=p?5q#&,cqW$>W|ko7"jB0l8QRbJ FCfjjɷ3mYZ, Aϰ?CuѨY]FԒl}=2ok׿,TH16a]M f^jz #(:h$a)Fo9ХWAEU{E#&QQ`;cDgóKBk6lQ(ihn#Id-!66[0y5߶ZrcOZ| rSp҃l\aFRO]R )TDUYc#(shXnV^#a$^1`aRɳ0vN^5UiKKؾ-Q4*d!!U~.]JI-^^wpI5˔ҁPuxZ3*yWK$])bmyݷ*&4REkż<+֯Jk-"FͲ3iM]umbPNaWt0AUcf?6-ț:a%I߅0+C &Q{Ϩ%P"B_)~妄;L$#oUG.FUUW1_ OT`YȧiaPVKN?|It:|R]_w=woSnnKd!uJ.5{n'< )4su-\N#&s3h0a :gejxZ}84xGhBV܋GaWcg&ďCaka!`"B&eb0!.!iR ľY=>FpTcbKƝ&Xp1(uc*H懧|PYłptࡇ&/"@Ӫ:Զ$<f\HmQɑ5ӶJ4Mi﫽J+T;ϣ2`:#&LS~n ѿR yU%ښ׹qva`}{vq2#&~ Y>y̓Dκ,#&v)u73%1/GWs|MV'sHsI#&YG}_??_{߯/_/wO?3;$ $v`&5'?"ƿt*0 n6j~t )$q)#& &#%v4Ky_v#(`o6O]l 3$#N vi>mښtzuC?ܨ)Cmv~Z01pΏZbO1t_SZ.MΎEJĺvfR]7%f%lxHOV#&2Y#&Hr#&mAm#cZج.fP.hjk}ciǝ`QԦ4Yg^xAɘj-xsDv1{C;N4qkՀ\tzqp@e#(TPܡ&GVCb#&`.G^hs#(_H1NH:q]K^.xMngx\(X,V#&hKDvn6傁d z5UV}.2F0ICt#^I$ DbBQBbLH Hc;訆*CnLX=LlZ*@×=~'}R{ITv7Y}P<mBJ&a"#&3~ieZ#(} \6#{&^ęL3J|QJbymDfMϻp!#& sqqS gۭ#(^q/&Zs[G[rmnle,auZ"2+xT@ȃ*QiƳN;pj,HFp*^i!`MdqiO-!b2KaE1XvMcץ! hw&dn;`6w ؎EXBN.ɕt~Q`HT=?{J>+1#&ž~H#&]aPOZahm@A4J?;aO{ey 2j((*D:5a̿w)f}//(1`O&}tmA?xn<(JhO(b2E1x?-YFL5YAAԎpouX氶$6tfeӲ9'Z'U?"96߲q3? @ܑN$+:3 #<== LucenePlusPlus-rel_3.0.4/wscript000066400000000000000000000153171217574114600167400ustar00rootroot00000000000000############################################################################# ## Copyright (c) 2009-2011 Alan Wright. All rights reserved. ## Distributable under the terms of either the Apache License (Version 2.0) ## or the GNU Lesser General Public License. ############################################################################# import sys import os from copy import copy import Options from Configure import conf from TaskGen import feature, after import Task APPNAME='Lucene++' VERSION='3.0.4.0' top = '.' out = 'bin' source_patterns = [ '**/*.c', '**/*.cpp' ] lucene_source_dirs = [ 'src/core/analysis', 'src/core/document', 'src/core/index', 'src/core/queryparser', 'src/core/search', 'src/core/store', 'src/core/util' ] lucene_contrib_source_dirs = [ 'src/contrib/analyzers', 'src/contrib/highlighter', 'src/contrib/memory', 'src/contrib/snowball' ] lucene_include_dirs = [ 'include', 'src/core/include', 'src/contrib/include', 'src/contrib/snowball/libstemmer_c/include', 'src/core/util/md5', 'src/core/util/nedmalloc', 'src/core/util/unicode' ] tester_source_dirs = [ 'src/test/analysis', 'src/test/contrib', 'src/test/document', 'src/test/index', 'src/test/queryparser', 'src/test/search', 'src/test/store', 'src/test/util', 'src/test/main' ] tester_include_dirs = [ 'include', 'src/core/include', 'src/contrib/include', 'src/test/include' ] def options(opt): opt.tool_options("boost") opt.tool_options('compiler_cxx') #opt.tool_options('clang', tooldir = 'build') #opt.tool_options('gch', tooldir = 'build') opt.add_option( '--debug', default = False, action = "store_true", help ='debug build no optimization, etc.', dest = 'debug') opt.add_option( '--static', default = False, action = "store_true", help ='fully static build', dest = 'static') def configure(conf): conf.check_tool('g++') conf.check_tool('gcc') conf.check_cc(lib = 'z', mandatory = True) conf.check_cc(lib = 'pthread', mandatory = True) conf.check_tool('boost') #conf.check_tool('clang', 'build') #conf.check_tool('gch', 'build') conf.check_boost( static = 'onlystatic', lib = ['filesystem', 'thread', 'regex', 'system', 'date_time', 'iostreams', 'unit_test_framework'] ) def build(bld): target_type = 'cstlib' if Options.options.static else 'cshlib' debug_define = '_DEBUG' if Options.options.debug else 'NDEBUG' if Options.options.debug: compile_flags = ['-O0', '-g'] else: compile_flags = ['-O2'] lucene_sources = [] for source_dir in lucene_source_dirs: source_dir = bld.path.find_dir(source_dir) lucene_sources.extend(source_dir.ant_glob(source_patterns)) bld( name = 'lucene++', features = ['cxx', 'c'] + [target_type], source = [source.relpath_gen(bld.path) for source in lucene_sources], target = 'lucene++', pch = 'src/core/include/LuceneInc.h', includes = lucene_include_dirs + bld.env["CPPPATH_BOOST"], cflags = compile_flags, cxxflags = compile_flags, defines = ['LPP_BUILDING_LIB', 'LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD Z' ) lucene_contrib_sources = [] for source_dir in lucene_contrib_source_dirs: source_dir = bld.path.find_dir(source_dir) lucene_contrib_sources.extend(source_dir.ant_glob(source_patterns)) bld( name = 'lucene_contrib', features = ['cxx', 'c'] + [target_type], source = [source.relpath_gen(bld.path) for source in lucene_contrib_sources], target = 'lucene_contrib', pch = 'src/contrib/include/ContribInc.h', includes = lucene_include_dirs + bld.env["CPPPATH_BOOST"], cflags = compile_flags, cxxflags = compile_flags, defines = ['LPP_BUILDING_LIB', 'LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD Z', use = 'lucene++' ) tester_sources = [] for source_dir in tester_source_dirs: source_dir = bld.path.find_dir(source_dir) tester_sources.extend(source_dir.ant_glob(source_patterns)) bld( name = 'lucene_tester', features = ['cxx', 'c', 'cprogram'], source = [source.relpath_gen(bld.path) for source in tester_sources], target = 'lucene_tester', pch = 'src/test/include/TestInc.h', includes = tester_include_dirs + bld.env["CPPPATH_BOOST"], cflags = compile_flags, cxxflags = compile_flags, defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + ['LPP_EXPOSE_INTERNAL'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS BOOST_UNIT_TEST_FRAMEWORK PTHREAD Z', use = 'lucene++ lucene_contrib' ) bld( name = 'deletefiles', features = ['cxx', 'c', 'cprogram'], source = bld.path.find_resource('src/demo/deletefiles/main.cpp').relpath_gen(bld.path), target = 'deletefiles', includes = ['include'] + bld.env["CPPPATH_BOOST"], cflags = compile_flags, cxxflags = compile_flags, defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD Z', use = 'lucene++' ) bld( name = 'indexfiles', features = ['cxx', 'c', 'cprogram'], source = bld.path.find_resource('src/demo/indexfiles/main.cpp').relpath_gen(bld.path), target = 'indexfiles', includes = ['include'] + bld.env["CPPPATH_BOOST"], cflags = compile_flags, cxxflags = compile_flags, defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD Z', use = 'lucene++' ) bld( name = 'searchfiles', features = ['cxx', 'c', 'cprogram'], source = bld.path.find_resource('src/demo/searchfiles/main.cpp').relpath_gen(bld.path), target = 'searchfiles', includes = ['include'] + bld.env["CPPPATH_BOOST"], cflags = compile_flags, cxxflags = compile_flags, defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD Z', use = 'lucene++' )